# Note

Since the task was to achieve the highest possible ROC-AUC score on Kaggle, I didn't spend as much time as I usually do on the notebook's appearance. (One of the Mentors said that we don't get the grade/percentage for the notebook, just for the kaggle score.)

I achieved a score of 0.946, which I was happy with.

I tried several methods to deal with the NaN values, but none of them worked. I can't fill them now, because it will just ruin the meaning of the NaN values. I choosed a model that can work with them, and I introduced new features to perform better.

In [None]:
!pip install catboost

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting catboost
  Downloading catboost-1.1.1-cp38-none-manylinux1_x86_64.whl (76.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.6/76.6 MB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: catboost
Successfully installed catboost-1.1.1


In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from random import choice, randint

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, precision_score, recall_score, roc_curve, auc
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
import graphviz
import pydotplus

from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

sns.set(style="darkgrid")

In [None]:
# !pip install pyarrow

import pyarrow.parquet as pq

file_path = 'data/id_map.parquet'

id_map = pq.read_pandas(file_path).to_pandas()

In [None]:
id_map

Unnamed: 0,id,webpage
0,326127,www.abmecatronique.com
1,182113,groups.live.com
2,551820,majeureliguefootball.wordpress.com
3,401995,cdt46.media.tourinsoft.eu
4,105504,www.hdwallpapers.eu
...,...,...
48366,386746,i1-js-14-3-01-11074-747051290-i.init.cedexis-r...
48367,530474,i1-js-14-3-01-12434-548464295-i.init.cedexis-r...
48368,142359,embed.api.tv
48369,520516,n-tennis.fr


# Input

In [None]:
train_df = pd.read_csv('data/train.csv')
test_df = pd.read_csv('data/test.csv')

# General informations

In [None]:
train_df.head()

Unnamed: 0,session_id,webpage1,time1,webpage2,time2,webpage3,time3,webpage4,time4,webpage5,time5,webpage6,time6,webpage7,time7,webpage8,time8,webpage9,time9,webpage10,time10,target
0,0,9486,2019-02-20 05:57:45,,,,,,,,,,,,,,,,,,,0
1,1,11722,2019-02-22 07:14:50,12385.0,2019-02-22 07:14:50,50163.0,2019-02-22 07:14:51,12385.0,2019-02-22 07:14:51,12398.0,2019-02-22 07:14:51,50150.0,2019-02-22 07:14:51,50163.0,2019-02-22 07:14:52,50150.0,2019-02-22 07:14:52,19860.0,2019-02-22 07:15:15,19886.0,2019-02-22 07:15:16,0
2,2,192149,2018-12-16 12:35:17,659.0,2018-12-16 12:35:18,192136.0,2018-12-16 12:35:19,192149.0,2018-12-16 12:35:19,633.0,2018-12-16 12:35:19,659.0,2018-12-16 12:35:19,192136.0,2018-12-16 12:35:20,192136.0,2018-12-16 12:35:21,192136.0,2018-12-16 12:35:22,192136.0,2018-12-16 12:35:24,0
3,3,10591,2019-02-13 12:40:35,451.0,2019-02-13 12:40:35,77580.0,2019-02-13 12:40:35,227821.0,2019-02-13 12:40:35,633.0,2019-02-13 12:41:05,425.0,2019-02-13 12:42:14,10591.0,2019-02-13 12:42:14,227834.0,2019-02-13 12:42:15,227834.0,2019-02-13 12:42:16,227834.0,2019-02-13 12:42:17,0
4,4,438,2018-04-12 06:22:26,425.0,2018-04-12 06:22:26,529.0,2018-04-12 06:22:28,65685.0,2018-04-12 06:22:29,187638.0,2018-04-12 06:22:29,451.0,2018-04-12 06:22:29,425.0,2018-04-12 06:22:29,65685.0,2018-04-12 06:22:31,187625.0,2018-04-12 06:22:31,187625.0,2018-04-12 06:22:32,0


In [None]:
test_df.head()

Unnamed: 0,session_id,webpage1,time1,webpage2,time2,webpage3,time3,webpage4,time4,webpage5,time5,webpage6,time6,webpage7,time7,webpage8,time8,webpage9,time9,webpage10,time10
0,0,10318,2019-03-28 06:47:12,10318.0,2019-03-28 06:47:42,10318.0,2019-03-28 06:48:12,10318.0,2019-03-28 06:48:42,10318.0,2019-03-28 06:49:12,10318.0,2019-03-28 06:49:42,10318.0,2019-03-28 06:50:12,10318.0,2019-03-28 06:50:42,10318.0,2019-03-28 06:51:12,10318.0,2019-03-28 06:51:42
1,1,438,2019-02-28 06:48:05,2453.0,2019-02-28 06:50:22,2427.0,2019-02-28 06:50:22,2466.0,2019-02-28 06:50:23,2453.0,2019-02-28 06:50:23,2466.0,2019-02-28 06:50:59,2427.0,2019-02-28 06:50:59,2453.0,2019-02-28 06:50:59,2453.0,2019-02-28 06:52:06,2466.0,2019-02-28 06:52:11
2,2,7562,2019-03-18 11:13:31,425.0,2019-03-18 11:13:39,7562.0,2019-03-18 11:18:02,425.0,2019-03-18 11:18:43,425.0,2019-03-18 11:24:57,,,,,,,,,,
3,3,8836,2019-03-17 12:18:08,12372.0,2019-03-17 12:18:35,12398.0,2019-03-17 12:18:35,12385.0,2019-03-17 12:18:35,12385.0,2019-03-17 12:18:36,12398.0,2019-03-17 12:18:36,12372.0,2019-03-17 12:18:36,451.0,2019-03-17 12:18:52,425.0,2019-03-17 12:18:52,438.0,2019-03-17 12:18:53
4,4,451,2019-03-31 09:46:43,9863.0,2019-03-31 09:46:44,451.0,2019-03-31 09:46:46,9889.0,2019-03-31 09:46:46,451.0,2019-03-31 09:47:45,425.0,2019-03-31 09:47:46,9928.0,2019-03-31 09:49:43,867.0,2019-03-31 10:05:12,880.0,2019-03-31 10:05:12,867.0,2019-03-31 10:05:13


In [None]:
test_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 93338 entries, 0 to 93337
Data columns (total 21 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   session_id  93338 non-null  int64  
 1   webpage1    93338 non-null  int64  
 2   time1       93338 non-null  object 
 3   webpage2    91859 non-null  float64
 4   time2       91859 non-null  object 
 5   webpage3    90606 non-null  float64
 6   time3       90606 non-null  object 
 7   webpage4    89674 non-null  float64
 8   time4       89674 non-null  object 
 9   webpage5    88744 non-null  float64
 10  time5       88744 non-null  object 
 11  webpage6    87929 non-null  float64
 12  time6       87929 non-null  object 
 13  webpage7    87143 non-null  float64
 14  time7       87143 non-null  object 
 15  webpage8    86409 non-null  float64
 16  time8       86409 non-null  object 
 17  webpage9    85685 non-null  float64
 18  time9       85685 non-null  object 
 19  webpage10   84959 non-nul

In [None]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 159969 entries, 0 to 159968
Data columns (total 22 columns):
 #   Column      Non-Null Count   Dtype  
---  ------      --------------   -----  
 0   session_id  159969 non-null  int64  
 1   webpage1    159969 non-null  int64  
 2   time1       159969 non-null  object 
 3   webpage2    157990 non-null  float64
 4   time2       157990 non-null  object 
 5   webpage3    156066 non-null  float64
 6   time3       156066 non-null  object 
 7   webpage4    154402 non-null  float64
 8   time4       154402 non-null  object 
 9   webpage5    152841 non-null  float64
 10  time5       152841 non-null  object 
 11  webpage6    151324 non-null  float64
 12  time6       151324 non-null  object 
 13  webpage7    149912 non-null  float64
 14  time7       149912 non-null  object 
 15  webpage8    148574 non-null  float64
 16  time8       148574 non-null  object 
 17  webpage9    147160 non-null  float64
 18  time9       147160 non-null  object 
 19  we

In [None]:
train_df.describe()

Unnamed: 0,session_id,webpage1,webpage2,webpage3,webpage4,webpage5,webpage6,webpage7,webpage8,webpage9,webpage10,target
count,159969.0,159969.0,157990.0,156066.0,154402.0,152841.0,151324.0,149912.0,148574.0,147160.0,145856.0,159969.0
mean,79984.0,44362.540573,44593.501722,45025.879231,45448.173081,45663.769584,45820.414706,46410.833809,46813.332925,46971.420053,47558.884736,0.009846
std,46179.216943,97167.414738,97682.859444,97995.435761,98488.571326,98504.193709,98600.68429,99314.76832,99651.347793,99794.332873,100488.244994,0.098736
min,0.0,165.0,165.0,165.0,165.0,165.0,165.0,165.0,165.0,165.0,165.0,0.0
25%,39992.0,828.0,828.0,828.0,828.0,828.0,828.0,828.0,828.0,828.0,828.0,0.0
50%,79984.0,8160.0,8160.0,8173.0,8238.0,8693.0,8810.0,8940.0,8953.0,8953.0,8953.0,0.0
75%,119976.0,30845.0,30663.0,31508.0,31716.0,31716.0,31963.0,32704.0,33809.0,33835.0,34108.0,0.0
max,159968.0,540965.0,540952.0,540939.0,540939.0,540939.0,540952.0,540952.0,540965.0,540965.0,540965.0,1.0


In [None]:
train_df['target'].value_counts()

0    158394
1      1575
Name: target, dtype: int64

The dataset is highly unbalanced.

In [None]:
train_df[train_df['target'] == 1].head()

Unnamed: 0,session_id,webpage1,time1,webpage2,time2,webpage3,time3,webpage4,time4,webpage5,...,time6,webpage7,time7,webpage8,time8,webpage9,time9,webpage10,time10,target
97,97,70313,2018-11-22 09:18:49,70287.0,2018-11-22 09:18:49,438.0,2018-11-22 09:18:50,70300.0,2018-11-22 09:18:50,70378.0,...,2018-11-22 09:18:50,438.0,2018-11-22 09:18:51,607.0,2018-11-22 09:18:54,581.0,2018-11-22 09:18:54,4546.0,2018-11-22 09:18:54,1
255,255,633,2019-02-13 08:41:49,3662.0,2019-02-13 08:43:14,568.0,2019-02-13 08:43:17,581.0,2019-02-13 08:43:18,607.0,...,2019-02-13 08:43:23,529.0,2019-02-13 08:43:25,581.0,2019-02-13 08:43:48,164251.0,2019-02-13 08:43:48,9304.0,2019-02-13 08:43:48,1
382,382,659,2019-02-17 12:37:03,451.0,2019-02-17 12:37:06,8953.0,2019-02-17 12:37:08,438.0,2019-02-17 12:37:12,659.0,...,2019-02-17 12:37:12,425.0,2019-02-17 12:37:12,8966.0,2019-02-17 12:37:13,438.0,2019-02-17 12:37:15,8966.0,2019-02-17 12:37:17,1
406,406,1153,2018-09-12 12:16:55,11579.0,2018-09-12 12:16:56,1192.0,2018-09-12 12:16:58,11579.0,2018-09-12 12:16:58,1192.0,...,2018-09-12 12:17:06,1153.0,2018-09-12 12:17:10,11579.0,2018-09-12 12:17:13,11579.0,2018-09-12 12:17:14,1192.0,2018-09-12 12:17:14,1
441,441,13893,2018-09-12 12:40:49,1153.0,2018-09-12 12:40:50,1192.0,2018-09-12 12:40:50,1205.0,2018-09-12 12:40:50,1140.0,...,2018-09-12 12:40:53,1205.0,2018-09-12 12:40:54,1192.0,2018-09-12 12:40:55,1205.0,2018-09-12 12:40:56,1205.0,2018-09-12 12:40:57,1


In [None]:
test_df.head()

Unnamed: 0,session_id,webpage1,time1,webpage2,time2,webpage3,time3,webpage4,time4,webpage5,...,webpage6,time6,webpage7,time7,webpage8,time8,webpage9,time9,webpage10,time10
0,0,10318,2019-03-28 06:47:12,10318.0,2019-03-28 06:47:42,10318.0,2019-03-28 06:48:12,10318.0,2019-03-28 06:48:42,10318.0,...,10318.0,2019-03-28 06:49:42,10318.0,2019-03-28 06:50:12,10318.0,2019-03-28 06:50:42,10318.0,2019-03-28 06:51:12,10318.0,2019-03-28 06:51:42
1,1,438,2019-02-28 06:48:05,2453.0,2019-02-28 06:50:22,2427.0,2019-02-28 06:50:22,2466.0,2019-02-28 06:50:23,2453.0,...,2466.0,2019-02-28 06:50:59,2427.0,2019-02-28 06:50:59,2453.0,2019-02-28 06:50:59,2453.0,2019-02-28 06:52:06,2466.0,2019-02-28 06:52:11
2,2,7562,2019-03-18 11:13:31,425.0,2019-03-18 11:13:39,7562.0,2019-03-18 11:18:02,425.0,2019-03-18 11:18:43,425.0,...,,,,,,,,,,
3,3,8836,2019-03-17 12:18:08,12372.0,2019-03-17 12:18:35,12398.0,2019-03-17 12:18:35,12385.0,2019-03-17 12:18:35,12385.0,...,12398.0,2019-03-17 12:18:36,12372.0,2019-03-17 12:18:36,451.0,2019-03-17 12:18:52,425.0,2019-03-17 12:18:52,438.0,2019-03-17 12:18:53
4,4,451,2019-03-31 09:46:43,9863.0,2019-03-31 09:46:44,451.0,2019-03-31 09:46:46,9889.0,2019-03-31 09:46:46,451.0,...,425.0,2019-03-31 09:47:46,9928.0,2019-03-31 09:49:43,867.0,2019-03-31 10:05:12,880.0,2019-03-31 10:05:12,867.0,2019-03-31 10:05:13


In [None]:
pd.set_option('display.max_columns', None)

# New features

Create new features. This will make the model better.

In [None]:
for i in range(1, 11):
  col_name = "time" + str(i)
  if col_name in train_df.columns:
    train_df[col_name] = pd.to_datetime(train_df[col_name], errors = 'coerce')

for i in range(1, 11):
  col_name = "time" + str(i)
  if col_name in train_df.columns:
    train_df[col_name + "_hour"] = train_df[col_name].dt.hour
    train_df[col_name + "_minute"] = train_df[col_name].dt.minute
    train_df[col_name + '_second'] = train_df[col_name].dt.second
    train_df[col_name + '_day_of_week'] = train_df[col_name].dt.day_of_week
    train_df[col_name + '_month'] = train_df[col_name].dt.month
    train_df[col_name + '_year'] = train_df[col_name].dt.year
    
  if i > 1 and i < 11:
    j = i + 1
    previous_col_name = 'time' + str(i - 1)
    train_df[col_name + ' - ' + previous_col_name + '_seconds'] = (train_df[col_name] - train_df[previous_col_name]).dt.total_seconds()

for i in range(1, 11):
  col = 'time' + str(i)
  train_df = train_df.drop(columns = [col])

In [None]:
train_df.head()

Unnamed: 0,session_id,webpage1,webpage2,webpage3,webpage4,webpage5,webpage6,webpage7,webpage8,webpage9,webpage10,target,time1_hour,time1_minute,time1_second,time1_day_of_week,time1_month,time1_year,time2_hour,time2_minute,time2_second,time2_day_of_week,time2_month,time2_year,time2 - time1_seconds,time3_hour,time3_minute,time3_second,time3_day_of_week,time3_month,time3_year,time3 - time2_seconds,time4_hour,time4_minute,time4_second,time4_day_of_week,time4_month,time4_year,time4 - time3_seconds,time5_hour,time5_minute,time5_second,time5_day_of_week,time5_month,time5_year,time5 - time4_seconds,time6_hour,time6_minute,time6_second,time6_day_of_week,time6_month,time6_year,time6 - time5_seconds,time7_hour,time7_minute,time7_second,time7_day_of_week,time7_month,time7_year,time7 - time6_seconds,time8_hour,time8_minute,time8_second,time8_day_of_week,time8_month,time8_year,time8 - time7_seconds,time9_hour,time9_minute,time9_second,time9_day_of_week,time9_month,time9_year,time9 - time8_seconds,time10_hour,time10_minute,time10_second,time10_day_of_week,time10_month,time10_year,time10 - time9_seconds
0,0,9486,,,,,,,,,,0,5,57,45,2,2,2019,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,1,11722,12385.0,50163.0,12385.0,12398.0,50150.0,50163.0,50150.0,19860.0,19886.0,0,7,14,50,4,2,2019,7.0,14.0,50.0,4.0,2.0,2019.0,0.0,7.0,14.0,51.0,4.0,2.0,2019.0,1.0,7.0,14.0,51.0,4.0,2.0,2019.0,0.0,7.0,14.0,51.0,4.0,2.0,2019.0,0.0,7.0,14.0,51.0,4.0,2.0,2019.0,0.0,7.0,14.0,52.0,4.0,2.0,2019.0,1.0,7.0,14.0,52.0,4.0,2.0,2019.0,0.0,7.0,15.0,15.0,4.0,2.0,2019.0,23.0,7.0,15.0,16.0,4.0,2.0,2019.0,1.0
2,2,192149,659.0,192136.0,192149.0,633.0,659.0,192136.0,192136.0,192136.0,192136.0,0,12,35,17,6,12,2018,12.0,35.0,18.0,6.0,12.0,2018.0,1.0,12.0,35.0,19.0,6.0,12.0,2018.0,1.0,12.0,35.0,19.0,6.0,12.0,2018.0,0.0,12.0,35.0,19.0,6.0,12.0,2018.0,0.0,12.0,35.0,19.0,6.0,12.0,2018.0,0.0,12.0,35.0,20.0,6.0,12.0,2018.0,1.0,12.0,35.0,21.0,6.0,12.0,2018.0,1.0,12.0,35.0,22.0,6.0,12.0,2018.0,1.0,12.0,35.0,24.0,6.0,12.0,2018.0,2.0
3,3,10591,451.0,77580.0,227821.0,633.0,425.0,10591.0,227834.0,227834.0,227834.0,0,12,40,35,2,2,2019,12.0,40.0,35.0,2.0,2.0,2019.0,0.0,12.0,40.0,35.0,2.0,2.0,2019.0,0.0,12.0,40.0,35.0,2.0,2.0,2019.0,0.0,12.0,41.0,5.0,2.0,2.0,2019.0,30.0,12.0,42.0,14.0,2.0,2.0,2019.0,69.0,12.0,42.0,14.0,2.0,2.0,2019.0,0.0,12.0,42.0,15.0,2.0,2.0,2019.0,1.0,12.0,42.0,16.0,2.0,2.0,2019.0,1.0,12.0,42.0,17.0,2.0,2.0,2019.0,1.0
4,4,438,425.0,529.0,65685.0,187638.0,451.0,425.0,65685.0,187625.0,187625.0,0,6,22,26,3,4,2018,6.0,22.0,26.0,3.0,4.0,2018.0,0.0,6.0,22.0,28.0,3.0,4.0,2018.0,2.0,6.0,22.0,29.0,3.0,4.0,2018.0,1.0,6.0,22.0,29.0,3.0,4.0,2018.0,0.0,6.0,22.0,29.0,3.0,4.0,2018.0,0.0,6.0,22.0,29.0,3.0,4.0,2018.0,0.0,6.0,22.0,31.0,3.0,4.0,2018.0,2.0,6.0,22.0,31.0,3.0,4.0,2018.0,0.0,6.0,22.0,32.0,3.0,4.0,2018.0,1.0


In [None]:
for i in range(1, 11):
  col_name = "time" + str(i)
  if col_name in test_df.columns:
    test_df[col_name] = pd.to_datetime(test_df[col_name], errors = 'coerce')

for i in range(1, 11):
  col_name = "time" + str(i)
  if col_name in test_df.columns:
    test_df[col_name + "_hour"] = test_df[col_name].dt.hour
    test_df[col_name + "_minute"] = test_df[col_name].dt.minute
    test_df[col_name + '_second'] = test_df[col_name].dt.second
    test_df[col_name + "_day_of_week"] = test_df[col_name].dt.day_of_week
    test_df[col_name + "_month"] = test_df[col_name].dt.month
    test_df[col_name + '_year'] = test_df[col_name].dt.year
    
  if i > 1 and i < 11:
    j = i + 1
    previous_col_name = 'time' + str(i - 1)
    test_df[col_name + ' - ' + previous_col_name + '_seconds'] = (test_df[col_name] - test_df[previous_col_name]).dt.total_seconds()

for i in range(1, 11):
  col = 'time' + str(i)
  test_df = test_df.drop(columns = [col])

In [None]:
test_df.head()

Unnamed: 0,session_id,webpage1,webpage2,webpage3,webpage4,webpage5,webpage6,webpage7,webpage8,webpage9,webpage10,time1_hour,time1_minute,time1_second,time1_day_of_week,time1_month,time1_year,time2_hour,time2_minute,time2_second,time2_day_of_week,time2_month,time2_year,time2 - time1_seconds,time3_hour,time3_minute,time3_second,time3_day_of_week,time3_month,time3_year,time3 - time2_seconds,time4_hour,time4_minute,time4_second,time4_day_of_week,time4_month,time4_year,time4 - time3_seconds,time5_hour,time5_minute,time5_second,time5_day_of_week,time5_month,time5_year,time5 - time4_seconds,time6_hour,time6_minute,time6_second,time6_day_of_week,time6_month,time6_year,time6 - time5_seconds,time7_hour,time7_minute,time7_second,time7_day_of_week,time7_month,time7_year,time7 - time6_seconds,time8_hour,time8_minute,time8_second,time8_day_of_week,time8_month,time8_year,time8 - time7_seconds,time9_hour,time9_minute,time9_second,time9_day_of_week,time9_month,time9_year,time9 - time8_seconds,time10_hour,time10_minute,time10_second,time10_day_of_week,time10_month,time10_year,time10 - time9_seconds
0,0,10318,10318.0,10318.0,10318.0,10318.0,10318.0,10318.0,10318.0,10318.0,10318.0,6,47,12,3,3,2019,6.0,47.0,42.0,3.0,3.0,2019.0,30.0,6.0,48.0,12.0,3.0,3.0,2019.0,30.0,6.0,48.0,42.0,3.0,3.0,2019.0,30.0,6.0,49.0,12.0,3.0,3.0,2019.0,30.0,6.0,49.0,42.0,3.0,3.0,2019.0,30.0,6.0,50.0,12.0,3.0,3.0,2019.0,30.0,6.0,50.0,42.0,3.0,3.0,2019.0,30.0,6.0,51.0,12.0,3.0,3.0,2019.0,30.0,6.0,51.0,42.0,3.0,3.0,2019.0,30.0
1,1,438,2453.0,2427.0,2466.0,2453.0,2466.0,2427.0,2453.0,2453.0,2466.0,6,48,5,3,2,2019,6.0,50.0,22.0,3.0,2.0,2019.0,137.0,6.0,50.0,22.0,3.0,2.0,2019.0,0.0,6.0,50.0,23.0,3.0,2.0,2019.0,1.0,6.0,50.0,23.0,3.0,2.0,2019.0,0.0,6.0,50.0,59.0,3.0,2.0,2019.0,36.0,6.0,50.0,59.0,3.0,2.0,2019.0,0.0,6.0,50.0,59.0,3.0,2.0,2019.0,0.0,6.0,52.0,6.0,3.0,2.0,2019.0,67.0,6.0,52.0,11.0,3.0,2.0,2019.0,5.0
2,2,7562,425.0,7562.0,425.0,425.0,,,,,,11,13,31,0,3,2019,11.0,13.0,39.0,0.0,3.0,2019.0,8.0,11.0,18.0,2.0,0.0,3.0,2019.0,263.0,11.0,18.0,43.0,0.0,3.0,2019.0,41.0,11.0,24.0,57.0,0.0,3.0,2019.0,374.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,3,8836,12372.0,12398.0,12385.0,12385.0,12398.0,12372.0,451.0,425.0,438.0,12,18,8,6,3,2019,12.0,18.0,35.0,6.0,3.0,2019.0,27.0,12.0,18.0,35.0,6.0,3.0,2019.0,0.0,12.0,18.0,35.0,6.0,3.0,2019.0,0.0,12.0,18.0,36.0,6.0,3.0,2019.0,1.0,12.0,18.0,36.0,6.0,3.0,2019.0,0.0,12.0,18.0,36.0,6.0,3.0,2019.0,0.0,12.0,18.0,52.0,6.0,3.0,2019.0,16.0,12.0,18.0,52.0,6.0,3.0,2019.0,0.0,12.0,18.0,53.0,6.0,3.0,2019.0,1.0
4,4,451,9863.0,451.0,9889.0,451.0,425.0,9928.0,867.0,880.0,867.0,9,46,43,6,3,2019,9.0,46.0,44.0,6.0,3.0,2019.0,1.0,9.0,46.0,46.0,6.0,3.0,2019.0,2.0,9.0,46.0,46.0,6.0,3.0,2019.0,0.0,9.0,47.0,45.0,6.0,3.0,2019.0,59.0,9.0,47.0,46.0,6.0,3.0,2019.0,1.0,9.0,49.0,43.0,6.0,3.0,2019.0,117.0,10.0,5.0,12.0,6.0,3.0,2019.0,929.0,10.0,5.0,12.0,6.0,3.0,2019.0,0.0,10.0,5.0,13.0,6.0,3.0,2019.0,1.0


In [None]:
train_df.isna().sum()

session_id                    0
webpage1                      0
webpage2                   1979
webpage3                   3903
webpage4                   5567
                          ...  
time10_second             14113
time10_day_of_week        14113
time10_month              14113
time10_year               14113
time10 - time9_seconds    14113
Length: 81, dtype: int64

# Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(train_df.drop('target', axis = 1), train_df['target'], test_size = 0.2, random_state = 42)

In [None]:
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(127975, 80) (127975,)
(31994, 80) (31994,)


In [None]:
def evaluate_model(classifier, train_set, train_target, test_set, test_target, metrics=[roc_auc_score]):
    prediction = train_model(classifier, train_set, train_target, test_set)
    results = get_model_metrics(classifier.__class__.__name__, test_target, prediction, metrics=[roc_auc_score])
    return results


def train_model(classifier, train_set, train_target, test_set):
    classifier.fit(train_set, train_target)
    return classifier.predict_proba(test_set)


def get_model_metrics(classifier_name, test_target, prediction, metrics=[roc_auc_score]):
    mectrics_result = dict()
    for metric in metrics:
        score = metric(test_target, prediction[:, 1])
        print("{} for model {} = {:.3f}".format(metric.__name__, classifier_name, score))
        mectrics_result[metric.__name__] = score
    mectrics_result['classifier'] = classifier_name
    return mectrics_result


In [None]:
def get_curves_tree(param_dict: dict, random_state=42) -> None:
    train_results = []
    test_results = []
    for key in param_dict.keys():
        for item in param_dict[key]:
            dt = DecisionTreeClassifier(**{key:item}, random_state=random_state)
            dt.fit(X_train, y_train)
            train_pred = dt.predict_proba(X_train)
            roc_auc = roc_auc_score(y_train, train_pred[:, 1])
            train_results.append(roc_auc)
            y_pred = dt.predict_proba(X_test)
            roc_auc = roc_auc_score(y_test, y_pred[:, 1])
            test_results.append(roc_auc)


        plt.figure(figsize=(10,4))
        sns.lineplot(param_dict[key], train_results, label='Train AUC')
        sns.lineplot(param_dict[key], test_results, label='Test AUC')
        plt.ylabel('AUC score')
        plt.xlabel(key)
        plt.show()

In [None]:
results_by_model = pd.DataFrame()

In [None]:
CATEGORICAL_FEATURES = test_df.columns.values.tolist()
CATEGORICAL_FEATURES

['session_id',
 'webpage1',
 'webpage2',
 'webpage3',
 'webpage4',
 'webpage5',
 'webpage6',
 'webpage7',
 'webpage8',
 'webpage9',
 'webpage10',
 'time1_hour',
 'time1_minute',
 'time1_second',
 'time1_day_of_week',
 'time1_month',
 'time1_year',
 'time2_hour',
 'time2_minute',
 'time2_second',
 'time2_day_of_week',
 'time2_month',
 'time2_year',
 'time2 - time1_seconds',
 'time3_hour',
 'time3_minute',
 'time3_second',
 'time3_day_of_week',
 'time3_month',
 'time3_year',
 'time3 - time2_seconds',
 'time4_hour',
 'time4_minute',
 'time4_second',
 'time4_day_of_week',
 'time4_month',
 'time4_year',
 'time4 - time3_seconds',
 'time5_hour',
 'time5_minute',
 'time5_second',
 'time5_day_of_week',
 'time5_month',
 'time5_year',
 'time5 - time4_seconds',
 'time6_hour',
 'time6_minute',
 'time6_second',
 'time6_day_of_week',
 'time6_month',
 'time6_year',
 'time6 - time5_seconds',
 'time7_hour',
 'time7_minute',
 'time7_second',
 'time7_day_of_week',
 'time7_month',
 'time7_year',
 'time7 - 

In [None]:
def process_categorical_only(dataframe, 
                             cat_features=CATEGORICAL_FEATURES):
    dataframe = dataframe.copy()
    cat_encoded = pd.get_dummies(dataframe[cat_features])
    dataframe.drop(columns=cat_features, inplace=True)
    dataframe = pd.concat((dataframe, cat_encoded), axis=1)
    return dataframe

In [None]:
xg_clf = XGBClassifier(objective ='binary:logistic', random_state=42)
result_dict= evaluate_model(xg_clf, X_train, y_train, X_test, y_test)
results_by_model = results_by_model.append(result_dict, ignore_index=True)

roc_auc_score for model XGBClassifier = 0.973


In [None]:
y_pred_test_df = xg_clf.predict_proba(test_df)

In [None]:
pd.reset_option('display.max_columns')

In [None]:
temp_df = pd.DataFrame(y_pred_test_df)
temp_df[temp_df[1] > 0.3]

Unnamed: 0,0,1
506,0.31258,0.68742
10813,0.675207,0.324793
12122,0.413673,0.586327
17748,0.307364,0.692636
17884,0.444817,0.555183
22389,0.667164,0.332836
22709,0.672649,0.327351
31912,0.300315,0.699685
37846,0.678519,0.321481
40679,0.353277,0.646723


There is not enough prediction for class `1`

In [None]:
train_df_features = train_df
train_df_features = train_df_features.drop(columns = ['target'])
train_df_y = train_df['target']

In [None]:
print(train_df_features.shape, train_df_y.shape)

(159969, 60) (159969,)


# LGBM Classifier

In [None]:
lgb_clf = LGBMClassifier(n_estimators=1000, 
                         num_leaves=7
                         ,random_state=42)

result_dict = evaluate_model(lgb_clf, X_train, y_train, X_test, y_test)
results_by_model = results_by_model.append(result_dict, ignore_index=True) 

roc_auc_score for model LGBMClassifier = 0.991


In [None]:
y_pred_test_df = lgb_clf.predict_proba(test_df)

In [None]:
temp_df = pd.DataFrame(y_pred_test_df)
temp_df[temp_df[1] > 0.5]

final_df = temp_df
final_df

Unnamed: 0,0,1
0,0.999996,4.406875e-06
1,0.999984,1.621532e-05
2,0.999932,6.789836e-05
3,0.991173,8.827068e-03
4,0.999999,6.107458e-07
...,...,...
93333,0.999874,1.256923e-04
93334,1.000000,3.021262e-07
93335,1.000000,3.407381e-08
93336,0.999999,9.724723e-07


In [None]:
temp_df[temp_df[1] >0.5]

Unnamed: 0,0,1
96,3.530594e-08,1.000000
108,2.637151e-01,0.736285
335,5.880915e-06,0.999994
506,1.395302e-01,0.860470
557,1.133798e-06,0.999999
...,...,...
91313,9.032456e-07,0.999999
92008,1.635026e-05,0.999984
92628,1.227628e-05,0.999988
93017,1.458028e-03,0.998542


In [None]:
final_df = final_df.drop(columns = [0])

In [None]:
final_df.reset_index(inplace = True)

In [None]:
final_df = final_df.rename(columns = {'index': 'session_id', 1: 'target'})
final_df

Unnamed: 0,session_id,target
0,0,4.406875e-06
1,1,1.621532e-05
2,2,6.789836e-05
3,3,8.827068e-03
4,4,6.107458e-07
...,...,...
93333,93333,1.256923e-04
93334,93334,3.021262e-07
93335,93335,3.407381e-08
93336,93336,9.724723e-07


In [None]:
final_df.to_csv('output_with_year_1.csv', index = False, header = True, sep = ',', columns = ['session_id', 'target'])

Train with the full train dataset

In [None]:
train_df_features = train_df
train_df_features = train_df_features.drop(columns = ['target'])

train_df_target = train_df['target']

In [None]:
train_df_features.head()

Unnamed: 0,session_id,webpage1,webpage2,webpage3,webpage4,webpage5,webpage6,webpage7,webpage8,webpage9,webpage10,time1_hour,time1_minute,time1_second,time1_day_of_week,time1_month,time1_year,time2_hour,time2_minute,time2_second,time2_day_of_week,time2_month,time2_year,time2 - time1_seconds,time3_hour,time3_minute,time3_second,time3_day_of_week,time3_month,time3_year,time3 - time2_seconds,time4_hour,time4_minute,time4_second,time4_day_of_week,time4_month,time4_year,time4 - time3_seconds,time5_hour,time5_minute,time5_second,time5_day_of_week,time5_month,time5_year,time5 - time4_seconds,time6_hour,time6_minute,time6_second,time6_day_of_week,time6_month,time6_year,time6 - time5_seconds,time7_hour,time7_minute,time7_second,time7_day_of_week,time7_month,time7_year,time7 - time6_seconds,time8_hour,time8_minute,time8_second,time8_day_of_week,time8_month,time8_year,time8 - time7_seconds,time9_hour,time9_minute,time9_second,time9_day_of_week,time9_month,time9_year,time9 - time8_seconds,time10_hour,time10_minute,time10_second,time10_day_of_week,time10_month,time10_year,time10 - time9_seconds
0,0,9486,,,,,,,,,,5,57,45,2,2,2019,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,1,11722,12385.0,50163.0,12385.0,12398.0,50150.0,50163.0,50150.0,19860.0,19886.0,7,14,50,4,2,2019,7.0,14.0,50.0,4.0,2.0,2019.0,0.0,7.0,14.0,51.0,4.0,2.0,2019.0,1.0,7.0,14.0,51.0,4.0,2.0,2019.0,0.0,7.0,14.0,51.0,4.0,2.0,2019.0,0.0,7.0,14.0,51.0,4.0,2.0,2019.0,0.0,7.0,14.0,52.0,4.0,2.0,2019.0,1.0,7.0,14.0,52.0,4.0,2.0,2019.0,0.0,7.0,15.0,15.0,4.0,2.0,2019.0,23.0,7.0,15.0,16.0,4.0,2.0,2019.0,1.0
2,2,192149,659.0,192136.0,192149.0,633.0,659.0,192136.0,192136.0,192136.0,192136.0,12,35,17,6,12,2018,12.0,35.0,18.0,6.0,12.0,2018.0,1.0,12.0,35.0,19.0,6.0,12.0,2018.0,1.0,12.0,35.0,19.0,6.0,12.0,2018.0,0.0,12.0,35.0,19.0,6.0,12.0,2018.0,0.0,12.0,35.0,19.0,6.0,12.0,2018.0,0.0,12.0,35.0,20.0,6.0,12.0,2018.0,1.0,12.0,35.0,21.0,6.0,12.0,2018.0,1.0,12.0,35.0,22.0,6.0,12.0,2018.0,1.0,12.0,35.0,24.0,6.0,12.0,2018.0,2.0
3,3,10591,451.0,77580.0,227821.0,633.0,425.0,10591.0,227834.0,227834.0,227834.0,12,40,35,2,2,2019,12.0,40.0,35.0,2.0,2.0,2019.0,0.0,12.0,40.0,35.0,2.0,2.0,2019.0,0.0,12.0,40.0,35.0,2.0,2.0,2019.0,0.0,12.0,41.0,5.0,2.0,2.0,2019.0,30.0,12.0,42.0,14.0,2.0,2.0,2019.0,69.0,12.0,42.0,14.0,2.0,2.0,2019.0,0.0,12.0,42.0,15.0,2.0,2.0,2019.0,1.0,12.0,42.0,16.0,2.0,2.0,2019.0,1.0,12.0,42.0,17.0,2.0,2.0,2019.0,1.0
4,4,438,425.0,529.0,65685.0,187638.0,451.0,425.0,65685.0,187625.0,187625.0,6,22,26,3,4,2018,6.0,22.0,26.0,3.0,4.0,2018.0,0.0,6.0,22.0,28.0,3.0,4.0,2018.0,2.0,6.0,22.0,29.0,3.0,4.0,2018.0,1.0,6.0,22.0,29.0,3.0,4.0,2018.0,0.0,6.0,22.0,29.0,3.0,4.0,2018.0,0.0,6.0,22.0,29.0,3.0,4.0,2018.0,0.0,6.0,22.0,31.0,3.0,4.0,2018.0,2.0,6.0,22.0,31.0,3.0,4.0,2018.0,0.0,6.0,22.0,32.0,3.0,4.0,2018.0,1.0


In [None]:
train_df_target

0         0
1         0
2         0
3         0
4         0
         ..
159964    0
159965    0
159966    0
159967    0
159968    0
Name: target, Length: 159969, dtype: int64

In [None]:
lgb_clf = LGBMClassifier(n_estimators=1000, 
                         num_leaves=7
                         ,random_state=42)

lgb_clf.fit(train_df_features, train_df_target)

LGBMClassifier(n_estimators=1000, num_leaves=7, random_state=42)

In [None]:
y_test_pred = lgb_clf.predict_proba(test_df)

In [None]:
y_test_pred = pd.DataFrame(y_test_pred)

In [None]:
y_test_pred[y_test_pred[1] > 0.5]

Unnamed: 0,0,1
108,0.156504,0.843496
389,0.309457,0.690543
415,0.407048,0.592952
506,0.000463,0.999537
1101,0.056955,0.943045
...,...,...
92283,0.000000,1.000000
92725,0.068229,0.931771
93040,0.000000,1.000000
93163,0.045431,0.954569


In [None]:
y_test_pred = y_test_pred.drop(columns = [0])
y_test_pred.reset_index(inplace = True)

y_test_pred = y_test_pred.rename(columns = {'index': 'session_id', 1: 'target'})
y_test_pred

Unnamed: 0,session_id,target
0,0,0.000000
1,1,0.000000
2,2,0.000000
3,3,0.058034
4,4,0.000039
...,...,...
93333,93333,0.000450
93334,93334,0.000000
93335,93335,0.000000
93336,93336,0.000008


In [None]:
y_test_pred.to_csv('output_full_train.csv', index = False, header = True, sep = ',', columns = ['session_id', 'target'])

I tried to turn date values into numbers, but the score was decreased a bit.

In [None]:
# time_columns = [col for col in train_df.columns if 'time' in col]
# for col in time_columns:
#     train_df[col] = pd.to_datetime(train_df[col]).astype(int) / 10**9

  train_df[col] = pd.to_datetime(train_df[col]).astype(int) / 10**9


In [None]:
# time_columns = [col for col in test_df.columns if 'time' in col]
# for col in time_columns:
#     test_df[col] = pd.to_datetime(test_df[col]).astype(int) / 10**9

  test_df[col] = pd.to_datetime(test_df[col]).astype(int) / 10**9


In [None]:
# train_df.head()

Unnamed: 0,session_id,webpage1,webpage2,webpage3,webpage4,webpage5,webpage6,webpage7,webpage8,webpage9,webpage10,target,time1_hour,time1_minute,time1_month,time1_day_of_week,time2_hour,time2_minute,time2_month,time2_day_of_week,time2 - time1_seconds,time3_hour,time3_minute,time3_month,time3_day_of_week,time3 - time2_seconds,time4_hour,time4_minute,time4_month,time4_day_of_week,time4 - time3_seconds,time5_hour,time5_minute,time5_month,time5_day_of_week,time5 - time4_seconds,time6_hour,time6_minute,time6_month,time6_day_of_week,time6 - time5_seconds,time7_hour,time7_minute,time7_month,time7_day_of_week,time7 - time6_seconds,time8_hour,time8_minute,time8_month,time8_day_of_week,time8 - time7_seconds,time9_hour,time9_minute,time9_month,time9_day_of_week,time9 - time8_seconds,time10_hour,time10_minute,time10_month,time10_day_of_week,time10 - time9_seconds
0,0,9486,,,,,,,,,,0,0.0,0.0,1e-09,3e-09,2.3e-08,5.9e-08,1.2e-08,2e-09,-1e-08,2.3e-08,5.9e-08,1.2e-08,2e-09,0.0,2.3e-08,5.9e-08,1.2e-08,2e-09,0.0,2.3e-08,5.9e-08,1.2e-08,2e-09,0.0,2.3e-08,5.9e-08,1.2e-08,2e-09,0.0,2.3e-08,5.9e-08,1.2e-08,2e-09,0.0,2.3e-08,5.9e-08,1.2e-08,2e-09,0.0,2.3e-08,5.9e-08,1.2e-08,2e-09,0.0,2.3e-08,5.9e-08,1.2e-08,2e-09,0.0
1,1,11722,12385.0,50163.0,12385.0,12398.0,50150.0,50163.0,50150.0,19860.0,19886.0,0,0.0,0.0,1e-09,3e-09,0.0,0.0,1e-09,3e-09,0.0,0.0,0.0,1e-09,3e-09,0.0,0.0,0.0,1e-09,3e-09,0.0,0.0,0.0,1e-09,3e-09,0.0,0.0,0.0,1e-09,3e-09,0.0,0.0,0.0,1e-09,3e-09,0.0,0.0,0.0,1e-09,3e-09,0.0,0.0,0.0,1e-09,3e-09,0.0,0.0,0.0,1e-09,3e-09,0.0
2,2,192149,659.0,192136.0,192149.0,633.0,659.0,192136.0,192136.0,192136.0,192136.0,0,0.0,0.0,1e-09,3e-09,0.0,0.0,1e-09,3e-09,0.0,0.0,0.0,1e-09,3e-09,0.0,0.0,0.0,1e-09,3e-09,0.0,0.0,0.0,1e-09,3e-09,0.0,0.0,0.0,1e-09,3e-09,0.0,0.0,0.0,1e-09,3e-09,0.0,0.0,0.0,1e-09,3e-09,0.0,0.0,0.0,1e-09,3e-09,0.0,0.0,0.0,1e-09,3e-09,0.0
3,3,10591,451.0,77580.0,227821.0,633.0,425.0,10591.0,227834.0,227834.0,227834.0,0,0.0,0.0,1e-09,3e-09,0.0,0.0,1e-09,3e-09,0.0,0.0,0.0,1e-09,3e-09,0.0,0.0,0.0,1e-09,3e-09,0.0,0.0,0.0,1e-09,3e-09,0.0,0.0,0.0,1e-09,3e-09,0.0,0.0,0.0,1e-09,3e-09,0.0,0.0,0.0,1e-09,3e-09,0.0,0.0,0.0,1e-09,3e-09,0.0,0.0,0.0,1e-09,3e-09,0.0
4,4,438,425.0,529.0,65685.0,187638.0,451.0,425.0,65685.0,187625.0,187625.0,0,0.0,0.0,1e-09,3e-09,0.0,0.0,1e-09,3e-09,0.0,0.0,0.0,1e-09,3e-09,0.0,0.0,0.0,1e-09,3e-09,0.0,0.0,0.0,1e-09,3e-09,0.0,0.0,0.0,1e-09,3e-09,0.0,0.0,0.0,1e-09,3e-09,0.0,0.0,0.0,1e-09,3e-09,0.0,0.0,0.0,1e-09,3e-09,0.0,0.0,0.0,1e-09,3e-09,0.0


In [None]:
# test_df.head()

Unnamed: 0,session_id,webpage1,webpage2,webpage3,webpage4,webpage5,webpage6,webpage7,webpage8,webpage9,webpage10,time1_hour,time1_minute,time1_month,time1_day_of_week,time2_hour,time2_minute,time2_month,time2_day_of_week,time2 - time1_seconds,time3_hour,time3_minute,time3_month,time3_day_of_week,time3 - time2_seconds,time4_hour,time4_minute,time4_month,time4_day_of_week,time4 - time3_seconds,time5_hour,time5_minute,time5_month,time5_day_of_week,time5 - time4_seconds,time6_hour,time6_minute,time6_month,time6_day_of_week,time6 - time5_seconds,time7_hour,time7_minute,time7_month,time7_day_of_week,time7 - time6_seconds,time8_hour,time8_minute,time8_month,time8_day_of_week,time8 - time7_seconds,time9_hour,time9_minute,time9_month,time9_day_of_week,time9 - time8_seconds,time10_hour,time10_minute,time10_month,time10_day_of_week,time10 - time9_seconds
0,0,10318,10318.0,10318.0,10318.0,10318.0,10318.0,10318.0,10318.0,10318.0,10318.0,6e-09,4.7e-08,3e-09,3e-09,6e-09,4.7e-08,3e-09,3e-09,3e-08,6e-09,4.8e-08,3e-09,3e-09,3e-08,6e-09,4.8e-08,3e-09,3e-09,3e-08,6e-09,4.9e-08,3e-09,3e-09,3e-08,6e-09,4.9e-08,3e-09,3e-09,3e-08,6e-09,5e-08,3e-09,3e-09,3e-08,6e-09,5e-08,3e-09,3e-09,3e-08,6e-09,5.1e-08,3e-09,3e-09,3e-08,6e-09,5.1e-08,3e-09,3e-09,3e-08
1,1,438,2453.0,2427.0,2466.0,2453.0,2466.0,2427.0,2453.0,2453.0,2466.0,6e-09,4.8e-08,2e-09,3e-09,6e-09,5e-08,2e-09,3e-09,1.37e-07,6e-09,5e-08,2e-09,3e-09,0.0,6e-09,5e-08,2e-09,3e-09,1e-09,6e-09,5e-08,2e-09,3e-09,0.0,6e-09,5e-08,2e-09,3e-09,3.6e-08,6e-09,5e-08,2e-09,3e-09,0.0,6e-09,5e-08,2e-09,3e-09,0.0,6e-09,5.2e-08,2e-09,3e-09,6.7e-08,6e-09,5.2e-08,2e-09,3e-09,5e-09
2,2,7562,425.0,7562.0,425.0,425.0,,,,,,1.1e-08,1.3e-08,3e-09,0.0,1.1e-08,1.3e-08,3e-09,0.0,8e-09,1.1e-08,1.8e-08,3e-09,0.0,2.63e-07,1.1e-08,1.8e-08,3e-09,0.0,4.1e-08,1.1e-08,2.4e-08,3e-09,0.0,3.74e-07,-9223372000.0,-9223372000.0,-9223372000.0,-9223372000.0,-9223372000.0,-9223372000.0,-9223372000.0,-9223372000.0,-9223372000.0,-9223372000.0,-9223372000.0,-9223372000.0,-9223372000.0,-9223372000.0,-9223372000.0,-9223372000.0,-9223372000.0,-9223372000.0,-9223372000.0,-9223372000.0,-9223372000.0,-9223372000.0,-9223372000.0,-9223372000.0,-9223372000.0
3,3,8836,12372.0,12398.0,12385.0,12385.0,12398.0,12372.0,451.0,425.0,438.0,1.2e-08,1.8e-08,3e-09,6e-09,1.2e-08,1.8e-08,3e-09,6e-09,2.7e-08,1.2e-08,1.8e-08,3e-09,6e-09,0.0,1.2e-08,1.8e-08,3e-09,6e-09,0.0,1.2e-08,1.8e-08,3e-09,6e-09,1e-09,1.2e-08,1.8e-08,3e-09,6e-09,0.0,1.2e-08,1.8e-08,3e-09,6e-09,0.0,1.2e-08,1.8e-08,3e-09,6e-09,1.6e-08,1.2e-08,1.8e-08,3e-09,6e-09,0.0,1.2e-08,1.8e-08,3e-09,6e-09,1e-09
4,4,451,9863.0,451.0,9889.0,451.0,425.0,9928.0,867.0,880.0,867.0,9e-09,4.6e-08,3e-09,6e-09,9e-09,4.6e-08,3e-09,6e-09,1e-09,9e-09,4.6e-08,3e-09,6e-09,2e-09,9e-09,4.6e-08,3e-09,6e-09,0.0,9e-09,4.7e-08,3e-09,6e-09,5.9e-08,9e-09,4.7e-08,3e-09,6e-09,1e-09,9e-09,4.9e-08,3e-09,6e-09,1.17e-07,1e-08,5e-09,3e-09,6e-09,9.29e-07,1e-08,5e-09,3e-09,6e-09,0.0,1e-08,5e-09,3e-09,6e-09,1e-09
