In [2]:
import pandas as pd
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import pickle

from sklearn.preprocessing import MinMaxScaler

import re

pd.set_option('display.max_columns', 999)
pd.set_option('display.max_rows', 500)

In [3]:
path = './Datasets/'

In [4]:
log_df = pd.read_parquet(path + "logs.parquet")

In [5]:
log_df.shape

(4483537, 9)

In [5]:
# Feature transformations

# Create categorical variables for url requests

log_df = pd.get_dummies(log_df, columns = ['url'])

# Create url request only dataframe

url_features = [col for col in log_df.columns if "url" in col]

req_df = log_df[url_features]

In [6]:
req_df.head(10)

Unnamed: 0,url_/2/2-2-2-3-2/197,url_/2/2-2-2-4-4/72,url_/2/2-2-3-2/126,url_/2/2-2-4/199,url_/2/2-2/6,url_/2/2-2/95,url_/2/2-3-2-2-3/104,url_/2/2-3/104,url_/2/2/119,url_/2/2/130,url_/2/2/247,url_/2/2/57,url_/2/3-2-3-4-2/94,url_/2/3-2-3/93,url_/2/3-2-4-4/40,url_/2/3-3-2-3/126,url_/2/3-3-2/245,url_/2/3-3-3-3/125,url_/2/3-3-3-3/84,url_/2/3-3-3/181,url_/2/3-3/87,url_/2/3-3/91,url_/2/3-4-2-3/152,url_/2/3-4-3/155,url_/2/3-4-4-4-3/154,url_/2/3-4/105,url_/2/3-4/232,url_/2/3/188,url_/2/3/6,url_/2/4-2-2-3-2/204,url_/2/4-2-4-2/65,url_/2/4-2-4/235,url_/2/4-3-2-2-2/105,url_/2/4-3-2-2-2/85,url_/2/4-3-4-2-4/12,url_/2/4-3-4/76,url_/2/4-3/65,url_/2/4-4-2-3-4/103,url_/2/4-4-2-3-4/222,url_/2/4-4-2-4/104,url_/2/4-4-3/147,url_/2/4-4-4-2-3/49,url_/2/4/23,url_/3/2-2-3/49,url_/3/2-3-2-2/204,url_/3/2-3-2-2/212,url_/3/2-3-2-3-3/93,url_/3/2-3-3-2/107,url_/3/2-3-4-4/11,url_/3/2-3-4/201,url_/3/2-4-3-3/13,url_/3/2/158,url_/3/3-2-2-2-2/197,url_/3/3-2-2-2/137,url_/3/3-2-2/77,url_/3/3-2/82,url_/3/3-3-2-2/115,url_/3/3-3-3-3-3/101,url_/3/3-3-3-4-4/14,url_/3/3-3-3-4/9,url_/3/3-3/110,url_/3/3-4-4-4-4/215,url_/3/3-4/115,url_/3/3-4/230,url_/3/3-4/37,url_/3/3/139,url_/3/3/14,url_/3/3/180,url_/3/3/47,url_/3/4-2-2-4-4/26,url_/3/4-2-2/38,url_/3/4-2-2/96,url_/3/4-2-3-4/56,url_/3/4-2/223,url_/3/4-3-3/93,url_/3/4-3/42,url_/3/4-4-2-4-4/215,url_/3/4-4-3-4/66,url_/3/4-4-3/163,url_/3/4-4-3/60,url_/3/4-4-4-4-2/37,url_/3/4-4-4-4/217,url_/3/4-4-4/139,url_/3/4-4/192,url_/3/4-4/195,url_/3/4/3,url_/3/4/71,url_/4/2-2-4-4/145,url_/4/2-3-3-4/64,url_/4/2-3/65,url_/4/2-4-2-2/75,url_/4/2-4-3/155,url_/4/2-4/147,url_/4/2-4/79,url_/4/2/110,url_/4/2/228,url_/4/2/229,url_/4/2/88,url_/4/3-2-4/33,url_/4/3-3-3-3-3/181,url_/4/3-3-3-3-4/16,url_/4/3-4-3-4/122,url_/4/3-4-3/96,url_/4/3-4-4-3/216,url_/4/3-4-4-4-3/183,url_/4/3-4/154,url_/4/3/169,url_/4/3/171,url_/4/3/188,url_/4/3/65,url_/4/4-2-4-3-2/45,url_/4/4-3-2-4-4/139,url_/4/4-3-3-4/229,url_/4/4-3-3/238,url_/4/4-3-4-4/149,url_/4/4-3-4/142,url_/4/4-3/224,url_/4/4-3/76,url_/4/4-4-2/41,url_/4/4-4-3-4/16,url_/4/4-4-4/102,url_/4/4-4/214,url_/4/4-4/69,url_/4/4/106,url_/4/4/130,url_/4/4/223,url_/4/4/231,url_/4/4/71,url_/4/4/79
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [7]:
# Create sequence prediction outcome variable -> next 250 requests

for url in url_features:
     req_df["CS_" + url] = req_df[url].rolling(250).sum()

cumsum_cols = [col for col in req_df.columns if "CS_" in col]
        
# Delete NaN rows

req_df.dropna(inplace=True)

# Shift output variable 250 places down

req_df[cumsum_cols] = req_df[cumsum_cols].shift(-250)

# Delete NaN rows again

req_df.dropna(inplace=True)

# Scale CS cols between 0 and 1

req_df[cumsum_cols] = MinMaxScaler().fit_transform(req_df[cumsum_cols])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


In [8]:
req_df.head(10)

Unnamed: 0,url_/2/2-2-2-3-2/197,url_/2/2-2-2-4-4/72,url_/2/2-2-3-2/126,url_/2/2-2-4/199,url_/2/2-2/6,url_/2/2-2/95,url_/2/2-3-2-2-3/104,url_/2/2-3/104,url_/2/2/119,url_/2/2/130,url_/2/2/247,url_/2/2/57,url_/2/3-2-3-4-2/94,url_/2/3-2-3/93,url_/2/3-2-4-4/40,url_/2/3-3-2-3/126,url_/2/3-3-2/245,url_/2/3-3-3-3/125,url_/2/3-3-3-3/84,url_/2/3-3-3/181,url_/2/3-3/87,url_/2/3-3/91,url_/2/3-4-2-3/152,url_/2/3-4-3/155,url_/2/3-4-4-4-3/154,url_/2/3-4/105,url_/2/3-4/232,url_/2/3/188,url_/2/3/6,url_/2/4-2-2-3-2/204,url_/2/4-2-4-2/65,url_/2/4-2-4/235,url_/2/4-3-2-2-2/105,url_/2/4-3-2-2-2/85,url_/2/4-3-4-2-4/12,url_/2/4-3-4/76,url_/2/4-3/65,url_/2/4-4-2-3-4/103,url_/2/4-4-2-3-4/222,url_/2/4-4-2-4/104,url_/2/4-4-3/147,url_/2/4-4-4-2-3/49,url_/2/4/23,url_/3/2-2-3/49,url_/3/2-3-2-2/204,url_/3/2-3-2-2/212,url_/3/2-3-2-3-3/93,url_/3/2-3-3-2/107,url_/3/2-3-4-4/11,url_/3/2-3-4/201,url_/3/2-4-3-3/13,url_/3/2/158,url_/3/3-2-2-2-2/197,url_/3/3-2-2-2/137,url_/3/3-2-2/77,url_/3/3-2/82,url_/3/3-3-2-2/115,url_/3/3-3-3-3-3/101,url_/3/3-3-3-4-4/14,url_/3/3-3-3-4/9,url_/3/3-3/110,url_/3/3-4-4-4-4/215,url_/3/3-4/115,url_/3/3-4/230,url_/3/3-4/37,url_/3/3/139,url_/3/3/14,url_/3/3/180,url_/3/3/47,url_/3/4-2-2-4-4/26,url_/3/4-2-2/38,url_/3/4-2-2/96,url_/3/4-2-3-4/56,url_/3/4-2/223,url_/3/4-3-3/93,url_/3/4-3/42,url_/3/4-4-2-4-4/215,url_/3/4-4-3-4/66,url_/3/4-4-3/163,url_/3/4-4-3/60,url_/3/4-4-4-4-2/37,url_/3/4-4-4-4/217,url_/3/4-4-4/139,url_/3/4-4/192,url_/3/4-4/195,url_/3/4/3,url_/3/4/71,url_/4/2-2-4-4/145,url_/4/2-3-3-4/64,url_/4/2-3/65,url_/4/2-4-2-2/75,url_/4/2-4-3/155,url_/4/2-4/147,url_/4/2-4/79,url_/4/2/110,url_/4/2/228,url_/4/2/229,url_/4/2/88,url_/4/3-2-4/33,url_/4/3-3-3-3-3/181,url_/4/3-3-3-3-4/16,url_/4/3-4-3-4/122,url_/4/3-4-3/96,url_/4/3-4-4-3/216,url_/4/3-4-4-4-3/183,url_/4/3-4/154,url_/4/3/169,url_/4/3/171,url_/4/3/188,url_/4/3/65,url_/4/4-2-4-3-2/45,url_/4/4-3-2-4-4/139,url_/4/4-3-3-4/229,url_/4/4-3-3/238,url_/4/4-3-4-4/149,url_/4/4-3-4/142,url_/4/4-3/224,url_/4/4-3/76,url_/4/4-4-2/41,url_/4/4-4-3-4/16,url_/4/4-4-4/102,url_/4/4-4/214,url_/4/4-4/69,url_/4/4/106,url_/4/4/130,url_/4/4/223,url_/4/4/231,url_/4/4/71,url_/4/4/79,CS_url_/2/2-2-2-3-2/197,CS_url_/2/2-2-2-4-4/72,CS_url_/2/2-2-3-2/126,CS_url_/2/2-2-4/199,CS_url_/2/2-2/6,CS_url_/2/2-2/95,CS_url_/2/2-3-2-2-3/104,CS_url_/2/2-3/104,CS_url_/2/2/119,CS_url_/2/2/130,CS_url_/2/2/247,CS_url_/2/2/57,CS_url_/2/3-2-3-4-2/94,CS_url_/2/3-2-3/93,CS_url_/2/3-2-4-4/40,CS_url_/2/3-3-2-3/126,CS_url_/2/3-3-2/245,CS_url_/2/3-3-3-3/125,CS_url_/2/3-3-3-3/84,CS_url_/2/3-3-3/181,CS_url_/2/3-3/87,CS_url_/2/3-3/91,CS_url_/2/3-4-2-3/152,CS_url_/2/3-4-3/155,CS_url_/2/3-4-4-4-3/154,CS_url_/2/3-4/105,CS_url_/2/3-4/232,CS_url_/2/3/188,CS_url_/2/3/6,CS_url_/2/4-2-2-3-2/204,CS_url_/2/4-2-4-2/65,CS_url_/2/4-2-4/235,CS_url_/2/4-3-2-2-2/105,CS_url_/2/4-3-2-2-2/85,CS_url_/2/4-3-4-2-4/12,CS_url_/2/4-3-4/76,CS_url_/2/4-3/65,CS_url_/2/4-4-2-3-4/103,CS_url_/2/4-4-2-3-4/222,CS_url_/2/4-4-2-4/104,CS_url_/2/4-4-3/147,CS_url_/2/4-4-4-2-3/49,CS_url_/2/4/23,CS_url_/3/2-2-3/49,CS_url_/3/2-3-2-2/204,CS_url_/3/2-3-2-2/212,CS_url_/3/2-3-2-3-3/93,CS_url_/3/2-3-3-2/107,CS_url_/3/2-3-4-4/11,CS_url_/3/2-3-4/201,CS_url_/3/2-4-3-3/13,CS_url_/3/2/158,CS_url_/3/3-2-2-2-2/197,CS_url_/3/3-2-2-2/137,CS_url_/3/3-2-2/77,CS_url_/3/3-2/82,CS_url_/3/3-3-2-2/115,CS_url_/3/3-3-3-3-3/101,CS_url_/3/3-3-3-4-4/14,CS_url_/3/3-3-3-4/9,CS_url_/3/3-3/110,CS_url_/3/3-4-4-4-4/215,CS_url_/3/3-4/115,CS_url_/3/3-4/230,CS_url_/3/3-4/37,CS_url_/3/3/139,CS_url_/3/3/14,CS_url_/3/3/180,CS_url_/3/3/47,CS_url_/3/4-2-2-4-4/26,CS_url_/3/4-2-2/38,CS_url_/3/4-2-2/96,CS_url_/3/4-2-3-4/56,CS_url_/3/4-2/223,CS_url_/3/4-3-3/93,CS_url_/3/4-3/42,CS_url_/3/4-4-2-4-4/215,CS_url_/3/4-4-3-4/66,CS_url_/3/4-4-3/163,CS_url_/3/4-4-3/60,CS_url_/3/4-4-4-4-2/37,CS_url_/3/4-4-4-4/217,CS_url_/3/4-4-4/139,CS_url_/3/4-4/192,CS_url_/3/4-4/195,CS_url_/3/4/3,CS_url_/3/4/71,CS_url_/4/2-2-4-4/145,CS_url_/4/2-3-3-4/64,CS_url_/4/2-3/65,CS_url_/4/2-4-2-2/75,CS_url_/4/2-4-3/155,CS_url_/4/2-4/147,CS_url_/4/2-4/79,CS_url_/4/2/110,CS_url_/4/2/228,CS_url_/4/2/229,CS_url_/4/2/88,CS_url_/4/3-2-4/33,CS_url_/4/3-3-3-3-3/181,CS_url_/4/3-3-3-3-4/16,CS_url_/4/3-4-3-4/122,CS_url_/4/3-4-3/96,CS_url_/4/3-4-4-3/216,CS_url_/4/3-4-4-4-3/183,CS_url_/4/3-4/154,CS_url_/4/3/169,CS_url_/4/3/171,CS_url_/4/3/188,CS_url_/4/3/65,CS_url_/4/4-2-4-3-2/45,CS_url_/4/4-3-2-4-4/139,CS_url_/4/4-3-3-4/229,CS_url_/4/4-3-3/238,CS_url_/4/4-3-4-4/149,CS_url_/4/4-3-4/142,CS_url_/4/4-3/224,CS_url_/4/4-3/76,CS_url_/4/4-4-2/41,CS_url_/4/4-4-3-4/16,CS_url_/4/4-4-4/102,CS_url_/4/4-4/214,CS_url_/4/4-4/69,CS_url_/4/4/106,CS_url_/4/4/130,CS_url_/4/4/223,CS_url_/4/4/231,CS_url_/4/4/71,CS_url_/4/4/79
249,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.086957,0.1,0.111111,0.15,0.1875,0.0,0.043478,0.176471,0.055556,0.125,0.142857,0.117647,0.222222,0.105263,0.111111,0.0,0.222222,0.333333,0.043478,0.052632,0.058824,0.222222,0.0,0.0,0.157895,0.058824,0.0625,0.1,0.058824,0.0,0.111111,0.117647,0.090909,0.055556,0.05,0.055556,0.058824,0.055556,0.0,0.066667,0.263158,0.055556,0.347826,0.0,0.157895,0.0,0.034483,0.045455,0.0,0.0,0.0,0.0,0.117647,0.055556,0.125,0.4,0.117647,0.2,0.090909,0.206897,0.217391,0.055556,0.227273,0.117647,0.058824,0.0,0.176471,0.5,0.058824,0.117647,0.380952,0.0,0.052632,0.058824,0.0,0.0,0.058824,0.05,0.045455,0.047619,0.176471,0.052632,0.117647,0.0,0.0,0.1,0.055556,0.176471,0.035714,0.0,0.176471,0.047619,0.133333,0.176471,0.222222,0.1875,0.133333,0.0625,0.095238,0.138889,0.047619,0.0,0.055556,0.157895,0.0,0.058824,0.117647,0.133333,0.238095,0.125,0.117647,0.0625,0.0,0.166667,0.0,0.0,0.125,0.0,0.1,0.25,0.125,0.2,0.0,0.133333,0.055556,0.176471,0.0,0.0,0.111111
250,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0.086957,0.1,0.111111,0.15,0.1875,0.0,0.043478,0.176471,0.055556,0.125,0.142857,0.117647,0.222222,0.105263,0.111111,0.0,0.222222,0.333333,0.043478,0.052632,0.058824,0.222222,0.0,0.0,0.157895,0.058824,0.0625,0.1,0.058824,0.0,0.111111,0.117647,0.090909,0.055556,0.05,0.055556,0.058824,0.055556,0.0,0.066667,0.263158,0.055556,0.347826,0.0,0.157895,0.0,0.034483,0.045455,0.0,0.0,0.0,0.0,0.117647,0.055556,0.125,0.4,0.117647,0.2,0.090909,0.206897,0.217391,0.055556,0.227273,0.117647,0.058824,0.0,0.176471,0.5,0.058824,0.117647,0.380952,0.0,0.052632,0.058824,0.0,0.0,0.058824,0.05,0.090909,0.047619,0.176471,0.052632,0.117647,0.0,0.0,0.1,0.055556,0.176471,0.035714,0.0,0.176471,0.047619,0.133333,0.176471,0.222222,0.1875,0.133333,0.0625,0.095238,0.138889,0.047619,0.0,0.055556,0.157895,0.0,0.058824,0.117647,0.133333,0.238095,0.125,0.117647,0.0625,0.0,0.166667,0.0,0.0,0.125,0.0,0.1,0.25,0.125,0.2,0.0,0.133333,0.055556,0.117647,0.0,0.0,0.111111
251,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.086957,0.1,0.111111,0.15,0.1875,0.0,0.043478,0.176471,0.055556,0.125,0.142857,0.117647,0.222222,0.105263,0.111111,0.0,0.222222,0.333333,0.043478,0.052632,0.058824,0.222222,0.0,0.0,0.157895,0.058824,0.0625,0.1,0.058824,0.0,0.111111,0.117647,0.090909,0.055556,0.05,0.055556,0.058824,0.055556,0.0,0.066667,0.315789,0.055556,0.347826,0.0,0.157895,0.0,0.034483,0.045455,0.0,0.0,0.0,0.0,0.117647,0.055556,0.125,0.4,0.117647,0.2,0.090909,0.206897,0.217391,0.055556,0.227273,0.117647,0.058824,0.0,0.176471,0.5,0.058824,0.117647,0.380952,0.0,0.052632,0.058824,0.0,0.0,0.058824,0.05,0.090909,0.047619,0.176471,0.052632,0.117647,0.0,0.0,0.1,0.055556,0.176471,0.035714,0.0,0.176471,0.047619,0.133333,0.176471,0.222222,0.125,0.133333,0.0625,0.095238,0.138889,0.047619,0.0,0.055556,0.157895,0.0,0.058824,0.117647,0.133333,0.238095,0.125,0.117647,0.0625,0.0,0.166667,0.0,0.0,0.125,0.0,0.1,0.25,0.125,0.2,0.0,0.133333,0.055556,0.117647,0.0,0.0,0.111111
252,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.086957,0.1,0.111111,0.15,0.1875,0.0,0.043478,0.176471,0.055556,0.125,0.142857,0.117647,0.222222,0.105263,0.111111,0.0,0.222222,0.333333,0.043478,0.052632,0.058824,0.222222,0.0,0.0,0.157895,0.058824,0.0625,0.1,0.058824,0.0,0.111111,0.117647,0.090909,0.055556,0.05,0.055556,0.058824,0.055556,0.0,0.066667,0.315789,0.055556,0.347826,0.0,0.157895,0.0,0.034483,0.045455,0.0,0.0,0.033333,0.0,0.117647,0.055556,0.125,0.4,0.117647,0.2,0.090909,0.206897,0.217391,0.055556,0.227273,0.117647,0.058824,0.0,0.176471,0.5,0.058824,0.117647,0.380952,0.0,0.052632,0.058824,0.0,0.0,0.058824,0.05,0.090909,0.047619,0.176471,0.052632,0.117647,0.0,0.0,0.1,0.055556,0.176471,0.035714,0.0,0.176471,0.047619,0.133333,0.176471,0.222222,0.125,0.133333,0.0625,0.095238,0.111111,0.047619,0.0,0.055556,0.157895,0.0,0.058824,0.117647,0.133333,0.238095,0.125,0.117647,0.0625,0.0,0.166667,0.0,0.0,0.125,0.0,0.1,0.25,0.125,0.2,0.0,0.133333,0.055556,0.117647,0.0,0.0,0.111111
253,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0.086957,0.1,0.111111,0.15,0.1875,0.0,0.043478,0.176471,0.055556,0.125,0.142857,0.117647,0.222222,0.105263,0.111111,0.0,0.222222,0.333333,0.043478,0.052632,0.058824,0.222222,0.0,0.0,0.157895,0.058824,0.0625,0.1,0.058824,0.0,0.111111,0.117647,0.090909,0.055556,0.05,0.055556,0.058824,0.055556,0.0,0.066667,0.315789,0.055556,0.347826,0.0,0.157895,0.0,0.034483,0.045455,0.0,0.0,0.033333,0.0,0.117647,0.055556,0.125,0.4,0.117647,0.2,0.090909,0.206897,0.217391,0.055556,0.227273,0.117647,0.058824,0.0,0.176471,0.5,0.058824,0.117647,0.428571,0.0,0.052632,0.058824,0.0,0.0,0.058824,0.05,0.090909,0.047619,0.176471,0.052632,0.117647,0.0,0.0,0.1,0.055556,0.176471,0.035714,0.0,0.176471,0.047619,0.133333,0.176471,0.222222,0.125,0.133333,0.0625,0.095238,0.111111,0.047619,0.0,0.055556,0.157895,0.0,0.058824,0.117647,0.133333,0.238095,0.125,0.117647,0.0625,0.0,0.166667,0.0,0.0,0.125,0.0,0.1,0.25,0.125,0.2,0.0,0.066667,0.055556,0.117647,0.0,0.0,0.111111
254,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.086957,0.1,0.111111,0.15,0.1875,0.0,0.043478,0.176471,0.055556,0.125,0.142857,0.117647,0.222222,0.105263,0.111111,0.0,0.222222,0.375,0.043478,0.052632,0.058824,0.222222,0.0,0.0,0.157895,0.058824,0.0625,0.1,0.058824,0.0,0.111111,0.117647,0.090909,0.055556,0.05,0.055556,0.058824,0.055556,0.0,0.066667,0.315789,0.055556,0.347826,0.0,0.157895,0.0,0.034483,0.045455,0.0,0.0,0.033333,0.0,0.117647,0.055556,0.125,0.4,0.117647,0.2,0.090909,0.206897,0.217391,0.055556,0.227273,0.117647,0.058824,0.0,0.176471,0.5,0.058824,0.117647,0.428571,0.0,0.052632,0.058824,0.0,0.0,0.058824,0.05,0.090909,0.047619,0.176471,0.052632,0.058824,0.0,0.0,0.1,0.055556,0.176471,0.035714,0.0,0.176471,0.047619,0.133333,0.176471,0.222222,0.125,0.133333,0.0625,0.095238,0.111111,0.047619,0.0,0.055556,0.157895,0.0,0.058824,0.117647,0.133333,0.238095,0.125,0.117647,0.0625,0.0,0.166667,0.0,0.0,0.125,0.0,0.1,0.25,0.125,0.2,0.0,0.066667,0.055556,0.117647,0.0,0.0,0.111111
255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.086957,0.1,0.111111,0.15,0.1875,0.0,0.043478,0.176471,0.055556,0.125,0.142857,0.117647,0.222222,0.105263,0.111111,0.0,0.222222,0.375,0.043478,0.052632,0.058824,0.222222,0.0,0.0,0.157895,0.058824,0.0625,0.1,0.058824,0.0,0.111111,0.117647,0.090909,0.055556,0.05,0.055556,0.058824,0.055556,0.0,0.066667,0.315789,0.055556,0.347826,0.0,0.157895,0.0,0.034483,0.045455,0.0,0.0,0.033333,0.0,0.117647,0.055556,0.125,0.4,0.117647,0.2,0.090909,0.172414,0.217391,0.055556,0.227273,0.117647,0.058824,0.0,0.176471,0.5,0.058824,0.117647,0.428571,0.0,0.052632,0.058824,0.0,0.0,0.058824,0.05,0.090909,0.047619,0.176471,0.052632,0.058824,0.0,0.0,0.1,0.055556,0.176471,0.035714,0.0,0.176471,0.047619,0.133333,0.176471,0.222222,0.125,0.133333,0.0625,0.095238,0.111111,0.047619,0.0,0.055556,0.157895,0.0,0.058824,0.117647,0.133333,0.238095,0.125,0.176471,0.0625,0.0,0.166667,0.0,0.0,0.125,0.0,0.1,0.25,0.125,0.2,0.0,0.066667,0.055556,0.117647,0.0,0.0,0.111111
256,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0.086957,0.1,0.111111,0.15,0.1875,0.0,0.043478,0.176471,0.055556,0.125,0.142857,0.117647,0.222222,0.105263,0.111111,0.0,0.222222,0.375,0.043478,0.052632,0.058824,0.222222,0.0,0.0,0.157895,0.058824,0.0625,0.1,0.058824,0.0,0.111111,0.117647,0.090909,0.055556,0.05,0.055556,0.058824,0.055556,0.0,0.066667,0.315789,0.055556,0.347826,0.0,0.157895,0.0,0.034483,0.045455,0.0,0.0,0.033333,0.0,0.117647,0.055556,0.125,0.4,0.117647,0.2,0.090909,0.172414,0.26087,0.055556,0.227273,0.117647,0.058824,0.0,0.176471,0.5,0.058824,0.117647,0.428571,0.0,0.052632,0.058824,0.0,0.0,0.058824,0.05,0.090909,0.047619,0.176471,0.052632,0.058824,0.0,0.0,0.1,0.055556,0.176471,0.035714,0.0,0.176471,0.047619,0.133333,0.176471,0.222222,0.125,0.133333,0.0625,0.095238,0.111111,0.047619,0.0,0.055556,0.157895,0.0,0.058824,0.117647,0.133333,0.238095,0.125,0.176471,0.0625,0.0,0.166667,0.0,0.0,0.125,0.0,0.1,0.25,0.125,0.133333,0.0,0.066667,0.055556,0.117647,0.0,0.0,0.111111
257,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.086957,0.1,0.111111,0.15,0.1875,0.055556,0.043478,0.176471,0.055556,0.125,0.142857,0.117647,0.222222,0.105263,0.111111,0.0,0.222222,0.375,0.043478,0.052632,0.058824,0.222222,0.0,0.0,0.157895,0.058824,0.0625,0.1,0.058824,0.0,0.111111,0.117647,0.045455,0.055556,0.05,0.055556,0.058824,0.055556,0.0,0.066667,0.315789,0.055556,0.347826,0.0,0.157895,0.0,0.034483,0.045455,0.0,0.0,0.033333,0.0,0.117647,0.055556,0.125,0.4,0.117647,0.2,0.090909,0.172414,0.26087,0.055556,0.227273,0.117647,0.058824,0.0,0.176471,0.5,0.058824,0.117647,0.428571,0.0,0.052632,0.058824,0.0,0.0,0.058824,0.05,0.090909,0.047619,0.176471,0.052632,0.058824,0.0,0.0,0.1,0.055556,0.176471,0.035714,0.0,0.176471,0.047619,0.133333,0.176471,0.222222,0.125,0.133333,0.0625,0.095238,0.111111,0.047619,0.0,0.055556,0.157895,0.0,0.058824,0.117647,0.133333,0.238095,0.125,0.176471,0.0625,0.0,0.166667,0.0,0.0,0.125,0.0,0.1,0.25,0.125,0.133333,0.0,0.066667,0.055556,0.117647,0.0,0.0,0.111111
258,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.086957,0.1,0.111111,0.15,0.1875,0.055556,0.043478,0.176471,0.055556,0.125,0.142857,0.117647,0.222222,0.105263,0.111111,0.0,0.222222,0.333333,0.043478,0.052632,0.058824,0.222222,0.0,0.0,0.157895,0.058824,0.0625,0.1,0.058824,0.0,0.111111,0.117647,0.045455,0.055556,0.05,0.055556,0.058824,0.055556,0.0,0.066667,0.315789,0.055556,0.347826,0.0,0.157895,0.0,0.034483,0.045455,0.0,0.0,0.033333,0.0,0.117647,0.055556,0.125,0.45,0.117647,0.2,0.090909,0.172414,0.26087,0.055556,0.227273,0.117647,0.058824,0.0,0.176471,0.5,0.058824,0.117647,0.428571,0.0,0.052632,0.058824,0.0,0.0,0.058824,0.05,0.090909,0.047619,0.176471,0.052632,0.058824,0.0,0.0,0.1,0.055556,0.176471,0.035714,0.0,0.176471,0.047619,0.133333,0.176471,0.222222,0.125,0.133333,0.0625,0.095238,0.111111,0.047619,0.0,0.055556,0.157895,0.0,0.058824,0.117647,0.133333,0.238095,0.125,0.176471,0.0625,0.0,0.166667,0.0,0.0,0.125,0.0,0.1,0.25,0.125,0.133333,0.0,0.066667,0.055556,0.117647,0.0,0.0,0.111111


In [None]:
# Split 50/50 into train and test sets (keeping distribution)

train_df = req_df.iloc[::2]  # even

test_df = req_df.iloc[1::2]  # odd


In [None]:
# Create subset (every 2th request) and trim

# trim to size

train_df = train_df.iloc[0:2241000]
test_df = test_df.iloc[0:2241000]

In [None]:
train_df.to_parquet(path + 'train.parquet')
test_df.to_parquet(path + 'test.parquet')

with open(path + 'cumsum_cols.pkl', 'wb') as f:
    pickle.dump(cumsum_cols, f)