In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import gc
import os
import matplotlib
import matplotlib.pyplot as plt

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
pd.set_option('max_columns', 150)

# matplotlib and seaborn for plotting

matplotlib.rcParams['figure.dpi'] = 120 #resolution
matplotlib.rcParams['figure.figsize'] = (8,6) #figure size


sns.set_style('darkgrid')

color = sns.color_palette()

rootdata = '/content/drive/MyDrive/Thesis Data/original_data/'
destination_data = '/content/drive/MyDrive/Thesis Data/prepared_data/'

# Data Loading

In [4]:
product_level_features_df = pd.read_csv(destination_data+'final_product_level_features.csv')
user_level_feature_df = pd.read_csv(destination_data+'final_user_product_level_features.csv')
user_product_level_feature_df = pd.read_csv(destination_data+'final_user_level_features.csv')


In [5]:
aisles = pd.read_csv(rootdata+'aisles.csv')
departments = pd.read_csv(rootdata+'departments.csv')
orders = pd.read_csv(rootdata+'orders.csv')
order_products_prior = pd.read_csv(rootdata+'order_products__prior.csv')
order_products_train = pd.read_csv(rootdata+'order_products__train.csv')
products = pd.read_csv(rootdata+'products.csv')

In [6]:
# Method for memory reduction 
def reduce_memory(df):
   
    start_mem_usg = df.memory_usage().sum() / 1024**2 
    print("Memory usage of properties dataframe is :",start_mem_usg," MB")
    
    for col in df.columns:
        if df[col].dtypes in ["int64", "int32", "int16"]:
            
            cmin = df[col].min()
            cmax = df[col].max()
            
            if cmin > np.iinfo(np.int8).min and cmax < np.iinfo(np.int8).max:
                df[col] = df[col].astype(np.int8)
            
            elif cmin > np.iinfo(np.int16).min and cmax < np.iinfo(np.int16).max:
                df[col] = df[col].astype(np.int16)
            
            elif cmin > np.iinfo(np.int32).min and cmax < np.iinfo(np.int32).max:
                df[col] = df[col].astype(np.int32)
        
        if df[col].dtypes in ["float64", "float32"]:
            
            cmin = df[col].min()
            cmax = df[col].max()
            
            if cmin > np.finfo(np.float16).min and cmax < np.finfo(np.float16).max:
                df[col] = df[col].astype(np.float16)
            
            elif cmin > np.finfo(np.float32).min and cmax < np.finfo(np.float32).max:
                df[col] = df[col].astype(np.float32)
    
    print("")
    print("___MEMORY USAGE AFTER COMPLETION:___")
    mem_usg = df.memory_usage().sum() / 1024**2 
    print("Memory usage is: ",mem_usg," MB")
    print("This is ",100*mem_usg/start_mem_usg,"% of the initial size")
    
    return df

# New Featured Dataset understanding 

In [7]:
product_level_features_df.shape

(49677, 175)

In [8]:
user_level_feature_df.shape

(13307953, 10)

In [9]:
user_product_level_feature_df.shape

(206209, 20)

In [10]:
user_product_level_feature_df.dtypes

user_id                                     int64
user_purchased_avg_dow                    float64
user_purchased_std_dow                    float64
user_avg_order_hour_of_day                float64
user_std_order_hour_of_day                float64
user_avg_days_since_prior_order           float64
user_std_days_since_prior_order           float64
total_order_by_user                         int64
total_product_purchased_by_user             int64
total_unique_product_purchased_by_user      int64
total_product_reordered_by_user           float64
product_reorder_propotion_by_user         float64
user_average_order_size                   float64
user_reorder_items_in_order               float64
orders_3                                    int64
orders_2                                    int64
orders_1                                    int64
reorder_3                                 float64
reorder_2                                 float64
reorder_1                                 float64


In [11]:
user_level_feature_df.dtypes

user_id                             int64
product_id                          int64
total_product_orders_by_user        int64
total_product_reorders_by_user      int64
avg_add_to_cart_by_user           float64
avg_days_since_last_bought        float64
last_ordered_in                     int64
is_reorder_3                      float64
is_reorder_2                      float64
is_reorder_1                      float64
dtype: object

In [12]:
product_level_features_df.dtypes

product_id                               int64
product_avg_add_to_cart_order          float64
product_orders_total_count               int64
product_reorder_total_count            float64
product_reorder_percentage             float64
                                        ...   
aisle_trash bags liners                  int64
aisle_vitamins supplements               int64
aisle_water seltzer sparkling water      int64
aisle_white wines                        int64
aisle_yogurt                             int64
Length: 175, dtype: object

In [13]:
reduce_memory(product_level_features_df)

Memory usage of properties dataframe is : 66.3260726928711  MB

___MEMORY USAGE AFTER COMPLETION:___
Memory usage is:  10.422771453857422  MB
This is  15.71444083855381 % of the initial size


Unnamed: 0,product_id,product_avg_add_to_cart_order,product_orders_total_count,product_reorder_total_count,product_reorder_percentage,prod_order_first_time_total_cnt,prod_order_second_time_total_cnt,product_is_organic,product_vs_unique_users,second_time_purchased_percent,aisle_avg_add_to_cart_order,aisle_std_add_to_cart_order,aisle_orders_total_count,aisle_reorder_total_count,aisle_reorder_percentage,aisle_vs_unique_users,department_avg_add_to_cart_order,department_std_add_to_cart_order,department_orders_total_count,department_reorder_total_count,department_reorder_percentage,department_vs_unique_users,department_babies,department_bakery,department_beverages,department_breakfast,department_bulk,department_canned goods,department_dairy eggs,department_deli,department_dry goods pasta,department_frozen,department_household,department_international,department_meat seafood,department_missing,department_other,department_pantry,department_personal care,department_pets,department_produce,department_snacks,aisle_asian foods,aisle_baby accessories,aisle_baby bath body care,aisle_baby food formula,aisle_bakery desserts,aisle_baking ingredients,aisle_baking supplies decor,aisle_beauty,aisle_beers coolers,aisle_body lotions soap,aisle_bread,aisle_breakfast bakery,aisle_breakfast bars pastries,aisle_bulk dried fruits vegetables,aisle_bulk grains rice dried goods,aisle_buns rolls,aisle_butter,aisle_candy chocolate,aisle_canned fruit applesauce,aisle_canned jarred vegetables,aisle_canned meals beans,aisle_canned meat seafood,aisle_cat food care,aisle_cereal,aisle_chips pretzels,aisle_cleaning products,aisle_cocoa drink mixes,aisle_coffee,aisle_cold flu allergy,aisle_condiments,aisle_cookies cakes,aisle_crackers,aisle_cream,...,aisle_frozen meals,aisle_frozen meat seafood,aisle_frozen pizza,aisle_frozen produce,aisle_frozen vegan vegetarian,aisle_fruit vegetable snacks,aisle_grains rice dried goods,aisle_granola,aisle_hair care,aisle_honeys syrups nectars,aisle_hot cereal pancake mixes,aisle_hot dogs bacon sausage,aisle_ice cream ice,aisle_ice cream toppings,aisle_indian foods,aisle_instant foods,aisle_juice nectars,aisle_kitchen supplies,aisle_kosher foods,aisle_latino foods,aisle_laundry,aisle_lunch meat,aisle_marinades meat preparation,aisle_meat counter,aisle_milk,aisle_mint gum,aisle_missing,aisle_more household,aisle_muscles joints pain relief,aisle_nuts seeds dried fruit,aisle_oils vinegars,aisle_oral hygiene,aisle_other,aisle_other creams cheeses,aisle_packaged cheese,aisle_packaged meat,aisle_packaged poultry,aisle_packaged produce,aisle_packaged seafood,aisle_packaged vegetables fruits,aisle_paper goods,aisle_pasta sauce,aisle_pickled goods olives,aisle_plates bowls cups flatware,aisle_popcorn jerky,aisle_poultry counter,aisle_prepared meals,aisle_prepared soups salads,aisle_preserved dips spreads,aisle_protein meal replacements,aisle_red wines,aisle_refrigerated,aisle_refrigerated pudding desserts,aisle_salad dressing toppings,aisle_seafood counter,aisle_shave needs,aisle_skin care,aisle_soap,aisle_soft drinks,aisle_soup broth bouillon,aisle_soy lactosefree,aisle_specialty cheeses,aisle_specialty wines champagnes,aisle_spices seasonings,aisle_spirits,aisle_spreads,aisle_tea,aisle_tofu meat alternatives,aisle_tortillas flat bread,aisle_trail mix snack mix,aisle_trash bags liners,aisle_vitamins supplements,aisle_water seltzer sparkling water,aisle_white wines,aisle_yogurt
0,1,5.800781,1852,1136.0,0.613281,716,276,0,716,0.385498,9.250000,7.843750,234065,128431.0,0.548828,54202,9.187500,7.691406,2887550,1657973.0,0.574219,174219,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,2,9.890625,90,12.0,0.133301,78,8,0,78,0.102539,10.000000,7.996094,212092,32321.0,0.152344,76402,9.593750,7.875000,1875577,650301.0,0.346680,172755,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
2,3,6.414062,277,203.0,0.732910,74,36,0,74,0.486572,8.523438,7.816406,249341,131556.0,0.527832,53197,6.976562,6.710938,2690129,1757892.0,0.653320,172795,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
3,4,9.507812,329,147.0,0.446777,182,64,0,182,0.351562,9.210938,7.437500,390299,217262.0,0.556641,58749,9.000000,7.394531,2236432,1211890.0,0.541992,163233,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,5,6.464844,15,9.0,0.600098,6,4,0,6,0.666504,10.296875,8.187500,62510,17542.0,0.280518,32312,9.593750,7.875000,1875577,650301.0,0.346680,172755,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49672,49684,4.332031,9,1.0,0.111084,8,1,0,8,0.125000,4.851562,5.566406,28102,16084.0,0.572266,5695,5.429688,5.777344,153696,87595.0,0.569824,15798,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0
49673,49685,9.570312,49,6.0,0.122437,43,6,0,43,0.139526,9.296875,7.578125,99369,53875.0,0.541992,23508,9.000000,7.394531,2236432,1211890.0,0.541992,163233,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
49674,49686,7.500000,120,84.0,0.700195,36,16,0,36,0.444336,7.515625,6.488281,584834,391937.0,0.670410,103565,8.085938,6.906250,1176787,739188.0,0.627930,140612,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
49675,49687,7.539062,13,6.0,0.461426,7,4,0,7,0.571289,7.511719,6.761719,63421,39377.0,0.621094,7908,7.718750,7.011719,97724,58760.0,0.601074,14986,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [14]:
reduce_memory(user_level_feature_df)

Memory usage of properties dataframe is : 1015.3163604736328  MB

___MEMORY USAGE AFTER COMPLETION:___
Memory usage is:  266.5206346511841  MB
This is  26.25000886687726 % of the initial size


Unnamed: 0,user_id,product_id,total_product_orders_by_user,total_product_reorders_by_user,avg_add_to_cart_by_user,avg_days_since_last_bought,last_ordered_in,is_reorder_3,is_reorder_2,is_reorder_1
0,1,196,10,9,1.400391,18.296875,10,1.0,1.0,1.0
1,1,10258,9,8,3.333984,19.562500,10,1.0,1.0,1.0
2,1,10326,1,0,5.000000,28.000000,5,0.0,0.0,0.0
3,1,12427,10,9,3.300781,18.296875,10,1.0,1.0,1.0
4,1,13032,3,2,6.332031,21.671875,10,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
13307948,206209,43961,3,2,8.000000,23.328125,12,1.0,0.0,0.0
13307949,206209,44325,1,0,8.000000,9.000000,7,0.0,0.0,0.0
13307950,206209,48370,1,0,8.000000,30.000000,11,0.0,0.0,0.0
13307951,206209,48697,1,0,6.000000,9.000000,7,0.0,0.0,0.0


In [15]:
reduce_memory(user_product_level_feature_df)

Memory usage of properties dataframe is : 31.465118408203125  MB

___MEMORY USAGE AFTER COMPLETION:___
Memory usage is:  7.473058700561523  MB
This is  23.750295815232835 % of the initial size


Unnamed: 0,user_id,user_purchased_avg_dow,user_purchased_std_dow,user_avg_order_hour_of_day,user_std_order_hour_of_day,user_avg_days_since_prior_order,user_std_days_since_prior_order,total_order_by_user,total_product_purchased_by_user,total_unique_product_purchased_by_user,total_product_reordered_by_user,product_reorder_propotion_by_user,user_average_order_size,user_reorder_items_in_order,orders_3,orders_2,orders_1,reorder_3,reorder_2,reorder_1
0,1,2.644531,1.255859,10.539062,3.500000,19.140625,9.640625,10,59,18,41.0,0.694824,5.898438,0.706055,6,6,9,0.666504,1.000000,0.666504
1,2,2.005859,0.971191,10.437500,1.649414,15.367188,9.093750,14,195,102,93.0,0.476807,13.929688,0.447998,19,9,16,0.579102,0.000000,0.625000
2,3,1.011719,1.246094,16.359375,1.455078,10.976562,4.800781,12,88,33,55.0,0.625000,7.332031,0.658691,6,5,6,0.833496,1.000000,1.000000
3,4,4.722656,0.826660,13.109375,1.745117,13.500000,8.312500,5,18,17,1.0,0.055542,3.599609,0.028564,7,2,3,0.142822,0.000000,0.000000
4,5,1.622070,1.277344,15.726562,2.589844,12.273438,4.968750,4,37,23,14.0,0.378418,9.250000,0.377686,9,5,12,0.444336,0.399902,0.666504
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
206204,206205,3.718750,1.084961,13.625000,1.791992,13.406250,9.804688,3,32,24,8.0,0.250000,10.664062,0.369141,17,8,7,0.000000,0.250000,0.856934
206205,206206,2.312500,1.929688,16.796875,2.283203,4.085938,3.447266,67,285,150,135.0,0.473633,4.253906,0.570801,8,3,3,0.500000,0.333252,1.000000
206206,206207,2.896484,2.052734,13.132812,4.500000,14.031250,10.945312,16,223,92,131.0,0.587402,13.937500,0.637207,8,22,13,1.000000,0.681641,1.000000
206207,206208,2.759766,1.734375,13.968750,3.802734,7.433594,3.986328,49,677,198,479.0,0.707520,13.812500,0.700195,23,8,17,1.000000,0.875000,0.823730


# Final Dataset creation

In [16]:
# Merge order vs eval_set = train dataset
train_orders = orders.merge(order_products_train, on = 'order_id', how = 'inner')
train_orders.head()

Unnamed: 0,order_id,user_id,eval_set,order_number,order_dow,order_hour_of_day,days_since_prior_order,product_id,add_to_cart_order,reordered
0,1187899,1,train,11,4,8,14.0,196,1,1
1,1187899,1,train,11,4,8,14.0,25133,2,1
2,1187899,1,train,11,4,8,14.0,38928,3,1
3,1187899,1,train,11,4,8,14.0,26405,4,1
4,1187899,1,train,11,4,8,14.0,39657,5,1


In [17]:
train_orders.drop(['eval_set', 'add_to_cart_order', 'order_id'], axis = 1, inplace = True)

In [18]:
#train unique users
train_users = train_orders.user_id.unique()
train_users[:10]

array([ 1,  2,  5,  7,  8,  9, 10, 13, 14, 17])

In [19]:
user_level_feature_df.shape

(13307953, 10)

In [20]:

df = user_level_feature_df[user_level_feature_df.user_id.isin(train_users)]
df.head()

Unnamed: 0,user_id,product_id,total_product_orders_by_user,total_product_reorders_by_user,avg_add_to_cart_by_user,avg_days_since_last_bought,last_ordered_in,is_reorder_3,is_reorder_2,is_reorder_1
0,1,196,10,9,1.400391,18.296875,10,1.0,1.0,1.0
1,1,10258,9,8,3.333984,19.5625,10,1.0,1.0,1.0
2,1,10326,1,0,5.0,28.0,5,0.0,0.0,0.0
3,1,12427,10,9,3.300781,18.296875,10,1.0,1.0,1.0
4,1,13032,3,2,6.332031,21.671875,10,1.0,0.0,0.0


In [21]:
df = df.merge(train_orders, on = ['user_id', 'product_id'], how = 'outer')
df.head()

Unnamed: 0,user_id,product_id,total_product_orders_by_user,total_product_reorders_by_user,avg_add_to_cart_by_user,avg_days_since_last_bought,last_ordered_in,is_reorder_3,is_reorder_2,is_reorder_1,order_number,order_dow,order_hour_of_day,days_since_prior_order,reordered
0,1,196,10.0,9.0,1.400391,18.296875,10.0,1.0,1.0,1.0,11.0,4.0,8.0,14.0,1.0
1,1,10258,9.0,8.0,3.333984,19.5625,10.0,1.0,1.0,1.0,11.0,4.0,8.0,14.0,1.0
2,1,10326,1.0,0.0,5.0,28.0,5.0,0.0,0.0,0.0,,,,,
3,1,12427,10.0,9.0,3.300781,18.296875,10.0,1.0,1.0,1.0,,,,,
4,1,13032,3.0,2.0,6.332031,21.671875,10.0,1.0,0.0,0.0,11.0,4.0,8.0,14.0,1.0


In [22]:
# Null value Imputation
df.order_number.fillna(df.groupby('user_id')['order_number'].transform('mean'), inplace = True)
df.order_dow.fillna(df.groupby('user_id')['order_dow'].transform('mean'), inplace = True)
df.order_hour_of_day.fillna(df.groupby('user_id')['order_hour_of_day'].transform('mean'), inplace = True)
df.days_since_prior_order.fillna(df.groupby('user_id')['days_since_prior_order'].\
                                                             transform('mean'), inplace = True)

In [23]:
df.reordered.value_counts()

1.0    828824
0.0    555793
Name: reordered, dtype: int64

In [24]:
df.reordered.isnull().sum()

7645837

In [25]:

df = df[df.reordered != 0]

In [26]:

df.shape

(8474661, 15)

In [27]:
#Null value imputation for reordered 
df.reordered.fillna(0, inplace = True)

df.isnull().sum()

user_id                           0
product_id                        0
total_product_orders_by_user      0
total_product_reorders_by_user    0
avg_add_to_cart_by_user           0
avg_days_since_last_bought        0
last_ordered_in                   0
is_reorder_3                      0
is_reorder_2                      0
is_reorder_1                      0
order_number                      0
order_dow                         0
order_hour_of_day                 0
days_since_prior_order            0
reordered                         0
dtype: int64

In [28]:
df.head()

Unnamed: 0,user_id,product_id,total_product_orders_by_user,total_product_reorders_by_user,avg_add_to_cart_by_user,avg_days_since_last_bought,last_ordered_in,is_reorder_3,is_reorder_2,is_reorder_1,order_number,order_dow,order_hour_of_day,days_since_prior_order,reordered
0,1,196,10.0,9.0,1.400391,18.296875,10.0,1.0,1.0,1.0,11.0,4.0,8.0,14.0,1.0
1,1,10258,9.0,8.0,3.333984,19.5625,10.0,1.0,1.0,1.0,11.0,4.0,8.0,14.0,1.0
2,1,10326,1.0,0.0,5.0,28.0,5.0,0.0,0.0,0.0,11.0,4.0,8.0,14.0,0.0
3,1,12427,10.0,9.0,3.300781,18.296875,10.0,1.0,1.0,1.0,11.0,4.0,8.0,14.0,0.0
4,1,13032,3.0,2.0,6.332031,21.671875,10.0,1.0,0.0,0.0,11.0,4.0,8.0,14.0,1.0


In [29]:

product_level_features_df.head()

Unnamed: 0,product_id,product_avg_add_to_cart_order,product_orders_total_count,product_reorder_total_count,product_reorder_percentage,prod_order_first_time_total_cnt,prod_order_second_time_total_cnt,product_is_organic,product_vs_unique_users,second_time_purchased_percent,aisle_avg_add_to_cart_order,aisle_std_add_to_cart_order,aisle_orders_total_count,aisle_reorder_total_count,aisle_reorder_percentage,aisle_vs_unique_users,department_avg_add_to_cart_order,department_std_add_to_cart_order,department_orders_total_count,department_reorder_total_count,department_reorder_percentage,department_vs_unique_users,department_babies,department_bakery,department_beverages,department_breakfast,department_bulk,department_canned goods,department_dairy eggs,department_deli,department_dry goods pasta,department_frozen,department_household,department_international,department_meat seafood,department_missing,department_other,department_pantry,department_personal care,department_pets,department_produce,department_snacks,aisle_asian foods,aisle_baby accessories,aisle_baby bath body care,aisle_baby food formula,aisle_bakery desserts,aisle_baking ingredients,aisle_baking supplies decor,aisle_beauty,aisle_beers coolers,aisle_body lotions soap,aisle_bread,aisle_breakfast bakery,aisle_breakfast bars pastries,aisle_bulk dried fruits vegetables,aisle_bulk grains rice dried goods,aisle_buns rolls,aisle_butter,aisle_candy chocolate,aisle_canned fruit applesauce,aisle_canned jarred vegetables,aisle_canned meals beans,aisle_canned meat seafood,aisle_cat food care,aisle_cereal,aisle_chips pretzels,aisle_cleaning products,aisle_cocoa drink mixes,aisle_coffee,aisle_cold flu allergy,aisle_condiments,aisle_cookies cakes,aisle_crackers,aisle_cream,...,aisle_frozen meals,aisle_frozen meat seafood,aisle_frozen pizza,aisle_frozen produce,aisle_frozen vegan vegetarian,aisle_fruit vegetable snacks,aisle_grains rice dried goods,aisle_granola,aisle_hair care,aisle_honeys syrups nectars,aisle_hot cereal pancake mixes,aisle_hot dogs bacon sausage,aisle_ice cream ice,aisle_ice cream toppings,aisle_indian foods,aisle_instant foods,aisle_juice nectars,aisle_kitchen supplies,aisle_kosher foods,aisle_latino foods,aisle_laundry,aisle_lunch meat,aisle_marinades meat preparation,aisle_meat counter,aisle_milk,aisle_mint gum,aisle_missing,aisle_more household,aisle_muscles joints pain relief,aisle_nuts seeds dried fruit,aisle_oils vinegars,aisle_oral hygiene,aisle_other,aisle_other creams cheeses,aisle_packaged cheese,aisle_packaged meat,aisle_packaged poultry,aisle_packaged produce,aisle_packaged seafood,aisle_packaged vegetables fruits,aisle_paper goods,aisle_pasta sauce,aisle_pickled goods olives,aisle_plates bowls cups flatware,aisle_popcorn jerky,aisle_poultry counter,aisle_prepared meals,aisle_prepared soups salads,aisle_preserved dips spreads,aisle_protein meal replacements,aisle_red wines,aisle_refrigerated,aisle_refrigerated pudding desserts,aisle_salad dressing toppings,aisle_seafood counter,aisle_shave needs,aisle_skin care,aisle_soap,aisle_soft drinks,aisle_soup broth bouillon,aisle_soy lactosefree,aisle_specialty cheeses,aisle_specialty wines champagnes,aisle_spices seasonings,aisle_spirits,aisle_spreads,aisle_tea,aisle_tofu meat alternatives,aisle_tortillas flat bread,aisle_trail mix snack mix,aisle_trash bags liners,aisle_vitamins supplements,aisle_water seltzer sparkling water,aisle_white wines,aisle_yogurt
0,1,5.800781,1852,1136.0,0.613281,716,276,0,716,0.385498,9.25,7.84375,234065,128431.0,0.548828,54202,9.1875,7.691406,2887550,1657973.0,0.574219,174219,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,2,9.890625,90,12.0,0.133301,78,8,0,78,0.102539,10.0,7.996094,212092,32321.0,0.152344,76402,9.59375,7.875,1875577,650301.0,0.34668,172755,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
2,3,6.414062,277,203.0,0.73291,74,36,0,74,0.486572,8.523438,7.816406,249341,131556.0,0.527832,53197,6.976562,6.710938,2690129,1757892.0,0.65332,172795,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
3,4,9.507812,329,147.0,0.446777,182,64,0,182,0.351562,9.210938,7.4375,390299,217262.0,0.556641,58749,9.0,7.394531,2236432,1211890.0,0.541992,163233,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,5,6.464844,15,9.0,0.600098,6,4,0,6,0.666504,10.296875,8.1875,62510,17542.0,0.280518,32312,9.59375,7.875,1875577,650301.0,0.34668,172755,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [30]:
user_level_feature_df.head()


Unnamed: 0,user_id,product_id,total_product_orders_by_user,total_product_reorders_by_user,avg_add_to_cart_by_user,avg_days_since_last_bought,last_ordered_in,is_reorder_3,is_reorder_2,is_reorder_1
0,1,196,10,9,1.400391,18.296875,10,1.0,1.0,1.0
1,1,10258,9,8,3.333984,19.5625,10,1.0,1.0,1.0
2,1,10326,1,0,5.0,28.0,5,0.0,0.0,0.0
3,1,12427,10,9,3.300781,18.296875,10,1.0,1.0,1.0
4,1,13032,3,2,6.332031,21.671875,10,1.0,0.0,0.0


In [31]:
user_product_level_feature_df.head()

Unnamed: 0,user_id,user_purchased_avg_dow,user_purchased_std_dow,user_avg_order_hour_of_day,user_std_order_hour_of_day,user_avg_days_since_prior_order,user_std_days_since_prior_order,total_order_by_user,total_product_purchased_by_user,total_unique_product_purchased_by_user,total_product_reordered_by_user,product_reorder_propotion_by_user,user_average_order_size,user_reorder_items_in_order,orders_3,orders_2,orders_1,reorder_3,reorder_2,reorder_1
0,1,2.644531,1.255859,10.539062,3.5,19.140625,9.640625,10,59,18,41.0,0.694824,5.898438,0.706055,6,6,9,0.666504,1.0,0.666504
1,2,2.005859,0.971191,10.4375,1.649414,15.367188,9.09375,14,195,102,93.0,0.476807,13.929688,0.447998,19,9,16,0.579102,0.0,0.625
2,3,1.011719,1.246094,16.359375,1.455078,10.976562,4.800781,12,88,33,55.0,0.625,7.332031,0.658691,6,5,6,0.833496,1.0,1.0
3,4,4.722656,0.82666,13.109375,1.745117,13.5,8.3125,5,18,17,1.0,0.055542,3.599609,0.028564,7,2,3,0.142822,0.0,0.0
4,5,1.62207,1.277344,15.726562,2.589844,12.273438,4.96875,4,37,23,14.0,0.378418,9.25,0.377686,9,5,12,0.444336,0.399902,0.666504


In [32]:
df = df.merge(product_level_features_df, on = 'product_id', how = 'left')
df = df.merge(user_product_level_feature_df, on = 'user_id', how = 'left')
df.head()

Unnamed: 0,user_id,product_id,total_product_orders_by_user,total_product_reorders_by_user,avg_add_to_cart_by_user,avg_days_since_last_bought,last_ordered_in,is_reorder_3,is_reorder_2,is_reorder_1,order_number,order_dow,order_hour_of_day,days_since_prior_order,reordered,product_avg_add_to_cart_order,product_orders_total_count,product_reorder_total_count,product_reorder_percentage,prod_order_first_time_total_cnt,prod_order_second_time_total_cnt,product_is_organic,product_vs_unique_users,second_time_purchased_percent,aisle_avg_add_to_cart_order,aisle_std_add_to_cart_order,aisle_orders_total_count,aisle_reorder_total_count,aisle_reorder_percentage,aisle_vs_unique_users,department_avg_add_to_cart_order,department_std_add_to_cart_order,department_orders_total_count,department_reorder_total_count,department_reorder_percentage,department_vs_unique_users,department_babies,department_bakery,department_beverages,department_breakfast,department_bulk,department_canned goods,department_dairy eggs,department_deli,department_dry goods pasta,department_frozen,department_household,department_international,department_meat seafood,department_missing,department_other,department_pantry,department_personal care,department_pets,department_produce,department_snacks,aisle_asian foods,aisle_baby accessories,aisle_baby bath body care,aisle_baby food formula,aisle_bakery desserts,aisle_baking ingredients,aisle_baking supplies decor,aisle_beauty,aisle_beers coolers,aisle_body lotions soap,aisle_bread,aisle_breakfast bakery,aisle_breakfast bars pastries,aisle_bulk dried fruits vegetables,aisle_bulk grains rice dried goods,aisle_buns rolls,aisle_butter,aisle_candy chocolate,aisle_canned fruit applesauce,...,aisle_latino foods,aisle_laundry,aisle_lunch meat,aisle_marinades meat preparation,aisle_meat counter,aisle_milk,aisle_mint gum,aisle_missing,aisle_more household,aisle_muscles joints pain relief,aisle_nuts seeds dried fruit,aisle_oils vinegars,aisle_oral hygiene,aisle_other,aisle_other creams cheeses,aisle_packaged cheese,aisle_packaged meat,aisle_packaged poultry,aisle_packaged produce,aisle_packaged seafood,aisle_packaged vegetables fruits,aisle_paper goods,aisle_pasta sauce,aisle_pickled goods olives,aisle_plates bowls cups flatware,aisle_popcorn jerky,aisle_poultry counter,aisle_prepared meals,aisle_prepared soups salads,aisle_preserved dips spreads,aisle_protein meal replacements,aisle_red wines,aisle_refrigerated,aisle_refrigerated pudding desserts,aisle_salad dressing toppings,aisle_seafood counter,aisle_shave needs,aisle_skin care,aisle_soap,aisle_soft drinks,aisle_soup broth bouillon,aisle_soy lactosefree,aisle_specialty cheeses,aisle_specialty wines champagnes,aisle_spices seasonings,aisle_spirits,aisle_spreads,aisle_tea,aisle_tofu meat alternatives,aisle_tortillas flat bread,aisle_trail mix snack mix,aisle_trash bags liners,aisle_vitamins supplements,aisle_water seltzer sparkling water,aisle_white wines,aisle_yogurt,user_purchased_avg_dow,user_purchased_std_dow,user_avg_order_hour_of_day,user_std_order_hour_of_day,user_avg_days_since_prior_order,user_std_days_since_prior_order,total_order_by_user,total_product_purchased_by_user,total_unique_product_purchased_by_user,total_product_reordered_by_user,product_reorder_propotion_by_user,user_average_order_size,user_reorder_items_in_order,orders_3,orders_2,orders_1,reorder_3,reorder_2,reorder_1
0,1,196,10.0,9.0,1.400391,18.296875,10.0,1.0,1.0,1.0,11.0,4.0,8.0,14.0,1.0,3.722656,35791,27791.0,0.776367,8000,4660,0,8000,0.58252,6.453125,6.449219,357537,228406.0,0.638672,63506,6.976562,6.710938,2690129,1757892.0,0.65332,172795,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.644531,1.255859,10.539062,3.5,19.140625,9.640625,10,59,18,41.0,0.694824,5.898438,0.706055,6,6,9,0.666504,1.0,0.666504
1,1,10258,9.0,8.0,3.333984,19.5625,10.0,1.0,1.0,1.0,11.0,4.0,8.0,14.0,1.0,4.277344,1946,1389.0,0.713867,557,308,0,557,0.552734,9.320312,7.90625,306487,159119.0,0.519043,76177,9.1875,7.691406,2887550,1657973.0,0.574219,174219,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.644531,1.255859,10.539062,3.5,19.140625,9.640625,10,59,18,41.0,0.694824,5.898438,0.706055,6,6,9,0.666504,1.0,0.666504
2,1,10326,1.0,0.0,5.0,28.0,5.0,0.0,0.0,0.0,11.0,4.0,8.0,14.0,0.0,4.191406,5526,3603.0,0.651855,1923,1003,0,1923,0.521484,7.144531,6.28125,3642188,2615469.0,0.718262,177141,8.023438,6.660156,9479291,6160710.0,0.649902,193237,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.644531,1.255859,10.539062,3.5,19.140625,9.640625,10,59,18,41.0,0.694824,5.898438,0.706055,6,6,9,0.666504,1.0,0.666504
3,1,12427,10.0,9.0,3.300781,18.296875,10.0,1.0,1.0,1.0,11.0,4.0,8.0,14.0,0.0,4.761719,6476,4797.0,0.740723,1679,889,0,1679,0.529297,8.460938,7.375,163524,96804.0,0.591797,44854,9.1875,7.691406,2887550,1657973.0,0.574219,174219,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.644531,1.255859,10.539062,3.5,19.140625,9.640625,10,59,18,41.0,0.694824,5.898438,0.706055,6,6,9,0.666504,1.0,0.666504
4,1,13032,3.0,2.0,6.332031,21.671875,10.0,1.0,0.0,0.0,11.0,4.0,8.0,14.0,1.0,5.621094,3751,2465.0,0.657227,1286,617,0,1286,0.479736,9.046875,7.566406,377586,215822.0,0.571777,77080,9.125,7.601562,709569,398013.0,0.561035,114169,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.644531,1.255859,10.539062,3.5,19.140625,9.640625,10,59,18,41.0,0.694824,5.898438,0.706055,6,6,9,0.666504,1.0,0.666504


In [33]:
#Feature Selection : order_diff
df['order_diff'] = df.order_number - df.last_ordered_in

In [34]:
df.shape

(8474661, 209)

In [35]:
df.isnull().sum().sort_values(ascending = False)

order_diff                        0
aisle_food storage                0
aisle_canned jarred vegetables    0
aisle_canned fruit applesauce     0
aisle_candy chocolate             0
                                 ..
aisle_lunch meat                  0
aisle_laundry                     0
aisle_latino foods                0
aisle_kosher foods                0
user_id                           0
Length: 209, dtype: int64

In [36]:
df.head(5)

Unnamed: 0,user_id,product_id,total_product_orders_by_user,total_product_reorders_by_user,avg_add_to_cart_by_user,avg_days_since_last_bought,last_ordered_in,is_reorder_3,is_reorder_2,is_reorder_1,order_number,order_dow,order_hour_of_day,days_since_prior_order,reordered,product_avg_add_to_cart_order,product_orders_total_count,product_reorder_total_count,product_reorder_percentage,prod_order_first_time_total_cnt,prod_order_second_time_total_cnt,product_is_organic,product_vs_unique_users,second_time_purchased_percent,aisle_avg_add_to_cart_order,aisle_std_add_to_cart_order,aisle_orders_total_count,aisle_reorder_total_count,aisle_reorder_percentage,aisle_vs_unique_users,department_avg_add_to_cart_order,department_std_add_to_cart_order,department_orders_total_count,department_reorder_total_count,department_reorder_percentage,department_vs_unique_users,department_babies,department_bakery,department_beverages,department_breakfast,department_bulk,department_canned goods,department_dairy eggs,department_deli,department_dry goods pasta,department_frozen,department_household,department_international,department_meat seafood,department_missing,department_other,department_pantry,department_personal care,department_pets,department_produce,department_snacks,aisle_asian foods,aisle_baby accessories,aisle_baby bath body care,aisle_baby food formula,aisle_bakery desserts,aisle_baking ingredients,aisle_baking supplies decor,aisle_beauty,aisle_beers coolers,aisle_body lotions soap,aisle_bread,aisle_breakfast bakery,aisle_breakfast bars pastries,aisle_bulk dried fruits vegetables,aisle_bulk grains rice dried goods,aisle_buns rolls,aisle_butter,aisle_candy chocolate,aisle_canned fruit applesauce,...,aisle_laundry,aisle_lunch meat,aisle_marinades meat preparation,aisle_meat counter,aisle_milk,aisle_mint gum,aisle_missing,aisle_more household,aisle_muscles joints pain relief,aisle_nuts seeds dried fruit,aisle_oils vinegars,aisle_oral hygiene,aisle_other,aisle_other creams cheeses,aisle_packaged cheese,aisle_packaged meat,aisle_packaged poultry,aisle_packaged produce,aisle_packaged seafood,aisle_packaged vegetables fruits,aisle_paper goods,aisle_pasta sauce,aisle_pickled goods olives,aisle_plates bowls cups flatware,aisle_popcorn jerky,aisle_poultry counter,aisle_prepared meals,aisle_prepared soups salads,aisle_preserved dips spreads,aisle_protein meal replacements,aisle_red wines,aisle_refrigerated,aisle_refrigerated pudding desserts,aisle_salad dressing toppings,aisle_seafood counter,aisle_shave needs,aisle_skin care,aisle_soap,aisle_soft drinks,aisle_soup broth bouillon,aisle_soy lactosefree,aisle_specialty cheeses,aisle_specialty wines champagnes,aisle_spices seasonings,aisle_spirits,aisle_spreads,aisle_tea,aisle_tofu meat alternatives,aisle_tortillas flat bread,aisle_trail mix snack mix,aisle_trash bags liners,aisle_vitamins supplements,aisle_water seltzer sparkling water,aisle_white wines,aisle_yogurt,user_purchased_avg_dow,user_purchased_std_dow,user_avg_order_hour_of_day,user_std_order_hour_of_day,user_avg_days_since_prior_order,user_std_days_since_prior_order,total_order_by_user,total_product_purchased_by_user,total_unique_product_purchased_by_user,total_product_reordered_by_user,product_reorder_propotion_by_user,user_average_order_size,user_reorder_items_in_order,orders_3,orders_2,orders_1,reorder_3,reorder_2,reorder_1,order_diff
0,1,196,10.0,9.0,1.400391,18.296875,10.0,1.0,1.0,1.0,11.0,4.0,8.0,14.0,1.0,3.722656,35791,27791.0,0.776367,8000,4660,0,8000,0.58252,6.453125,6.449219,357537,228406.0,0.638672,63506,6.976562,6.710938,2690129,1757892.0,0.65332,172795,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.644531,1.255859,10.539062,3.5,19.140625,9.640625,10,59,18,41.0,0.694824,5.898438,0.706055,6,6,9,0.666504,1.0,0.666504,1.0
1,1,10258,9.0,8.0,3.333984,19.5625,10.0,1.0,1.0,1.0,11.0,4.0,8.0,14.0,1.0,4.277344,1946,1389.0,0.713867,557,308,0,557,0.552734,9.320312,7.90625,306487,159119.0,0.519043,76177,9.1875,7.691406,2887550,1657973.0,0.574219,174219,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.644531,1.255859,10.539062,3.5,19.140625,9.640625,10,59,18,41.0,0.694824,5.898438,0.706055,6,6,9,0.666504,1.0,0.666504,1.0
2,1,10326,1.0,0.0,5.0,28.0,5.0,0.0,0.0,0.0,11.0,4.0,8.0,14.0,0.0,4.191406,5526,3603.0,0.651855,1923,1003,0,1923,0.521484,7.144531,6.28125,3642188,2615469.0,0.718262,177141,8.023438,6.660156,9479291,6160710.0,0.649902,193237,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.644531,1.255859,10.539062,3.5,19.140625,9.640625,10,59,18,41.0,0.694824,5.898438,0.706055,6,6,9,0.666504,1.0,0.666504,6.0
3,1,12427,10.0,9.0,3.300781,18.296875,10.0,1.0,1.0,1.0,11.0,4.0,8.0,14.0,0.0,4.761719,6476,4797.0,0.740723,1679,889,0,1679,0.529297,8.460938,7.375,163524,96804.0,0.591797,44854,9.1875,7.691406,2887550,1657973.0,0.574219,174219,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.644531,1.255859,10.539062,3.5,19.140625,9.640625,10,59,18,41.0,0.694824,5.898438,0.706055,6,6,9,0.666504,1.0,0.666504,1.0
4,1,13032,3.0,2.0,6.332031,21.671875,10.0,1.0,0.0,0.0,11.0,4.0,8.0,14.0,1.0,5.621094,3751,2465.0,0.657227,1286,617,0,1286,0.479736,9.046875,7.566406,377586,215822.0,0.571777,77080,9.125,7.601562,709569,398013.0,0.561035,114169,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2.644531,1.255859,10.539062,3.5,19.140625,9.640625,10,59,18,41.0,0.694824,5.898438,0.706055,6,6,9,0.666504,1.0,0.666504,1.0


In [38]:
df.to_csv(destination_data+'Final_data.csv',index=False)