# Librerías

In [9]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
import os
import seaborn as sns

%matplotlib inline

# Datos

In [10]:
train = pd.read_csv('../data/week2/prepro_train_pw.csv')
test = pd.read_csv('../data/week2/prepro_test_pw.csv')

In [11]:
train.head()

Unnamed: 0,week,product_id,block_id,stock,std_stock,family_id,subfamily_id,size_id,color_id,position,category_id,price,sales
0,0,310130,1726,1383,34.811328,679611953,533441312,7,1,3.0,3,12.95,33
1,0,1178388,592,60,2.160247,732697347,691762817,4,1,19.0,1,49.95,0
2,0,1561460,1625,2373,55.438769,396066037,520569701,5,1,38.0,3,29.95,21
3,0,1874414,1135,1686,20.463906,744793598,811402796,6,1,12.0,6,25.95,24
4,0,2436420,779,245,23.377339,768025921,665805124,5,1,,0,25.95,0


In [12]:
test.head()

Unnamed: 0,week,product_id,block_id,stock,std_stock,family_id,subfamily_id,size_id,color_id,position,category_id,price,sales
0,12,151926,1969,2005,27.864312,396066037,335531561,5,1,17.0,2,25.95,-1
1,12,213413,1648,4859,36.709667,552529755,11509337,7,1,59.0,2,19.95,-1
2,12,310130,1726,2745,44.60057,679611953,533441312,6,1,48.0,1,12.95,-1
3,12,455200,1400,337,5.841971,998145072,490222156,3,1,44.0,2,29.95,-1
4,12,571044,1098,1591,42.09633,831347344,750943270,4,2,180.0,2,15.95,-1


In [13]:
test.tail()

Unnamed: 0,week,product_id,block_id,stock,std_stock,family_id,subfamily_id,size_id,color_id,position,category_id,price,sales
9002,12,999772605,1912,1730,37.335048,775013441,383177575,1,2,8.0,6,15.95,-1
9003,12,999794342,1015,5,0.95119,775013441,957377364,1,1,,0,29.95,-1
9004,12,999816749,296,981,13.600542,759754297,395002630,6,1,42.0,2,5.95,-1
9005,12,999862351,1794,875,5.372046,396066037,943122287,4,2,191.0,2,25.95,-1
9006,12,999936664,2218,1526,6.669469,396066037,739564025,8,1,31.0,1,15.95,-1


In [14]:
test[test.product_id == 151926]

Unnamed: 0,week,product_id,block_id,stock,std_stock,family_id,subfamily_id,size_id,color_id,position,category_id,price,sales
0,12,151926,1969,2005,27.864312,396066037,335531561,5,1,17.0,2,25.95,-1


#### Frequency encoding

Para las categorias `family_id` y `subfamily_id`

In [15]:
train = train.drop('week', axis=1)
test = test.drop('week', axis=1)

In [16]:
encoding_train = train.groupby('family_id').size()
encoding_train = encoding_train/len(train)
train['family_id'] = train.family_id.map(encoding_train)
encoding_train = train.groupby('subfamily_id').size()
encoding_train = encoding_train/len(train)
train['subfamily_id'] = train.subfamily_id.map(encoding_train)

encoding_test = test.groupby('family_id').size()
encoding_test = encoding_test/len(test)
test['family_id'] = test.family_id.map(encoding_test)
encoding_test = test.groupby('subfamily_id').size()
encoding_test = encoding_test/len(test)
test['subfamily_id'] = test.subfamily_id.map(encoding_test)

In [17]:
train['stock_lag1'] = train.groupby(['product_id'])['stock'].shift(1)
test['stock_lag1'] = test.groupby(['product_id'])['stock'].shift(1)
train['stock_lead1'] = train.groupby(['product_id'])['stock'].shift(-1)
test['stock_lead1'] = test.groupby(['product_id'])['stock'].shift(-1)

train['std_stock_shift1'] = train[['stock', 'stock_lag1', 'stock_lead1']].std(axis=1)
test['std_stock_shift1'] = test[['stock', 'stock_lag1', 'stock_lead1']].std(axis=1)

train['mean_stock_shift1'] = train[['stock', 'stock_lag1', 'stock_lead1']].mean(axis=1)
test['mean_stock_shift1'] = test[['stock', 'stock_lag1', 'stock_lead1']].mean(axis=1)

train['min_stock_shift1'] = train[['stock', 'stock_lag1', 'stock_lead1']].min(axis=1)
test['min_stock_shift1'] = test[['stock', 'stock_lag1', 'stock_lead1']].min(axis=1)

train['max_stock_shift1'] = train[['stock', 'stock_lag1', 'stock_lead1']].max(axis=1)
test['max_stock_shift1'] = test[['stock', 'stock_lag1', 'stock_lead1']].max(axis=1)

train['median_stock_shift1'] = train[['stock', 'stock_lag1', 'stock_lead1']].median(axis=1)
test['median_stock_shift1'] = test[['stock', 'stock_lag1', 'stock_lead1']].median(axis=1)

train['stock_lag2'] = train.groupby(['product_id'])['stock'].shift(2)
test['stock_lag2'] = test.groupby(['product_id'])['stock'].shift(2)
train['stock_lead2'] = train.groupby(['product_id'])['stock'].shift(-2)
test['stock_lead2'] = test.groupby(['product_id'])['stock'].shift(-2)

train['std_stock_shift2'] = train[['stock', 'stock_lag1', 'stock_lead1',
                                   'stock_lag2', 'stock_lead2']].std(axis=1)
test['std_stock_shift2'] = test[['stock', 'stock_lag1', 'stock_lead1',
                                   'stock_lag2', 'stock_lead2']].std(axis=1)

train['min_stock_shift2'] = train[['stock', 'stock_lag1', 'stock_lead1',
                                   'stock_lag2', 'stock_lead2']].min(axis=1)
test['min_stock_shift2'] = test[['stock', 'stock_lag1', 'stock_lead1',
                                   'stock_lag2', 'stock_lead2']].min(axis=1)

train['max_stock_shift2'] = train[['stock', 'stock_lag1', 'stock_lead1',
                                   'stock_lag2', 'stock_lead2']].max(axis=1)
test['max_stock_shift2'] = test[['stock', 'stock_lag1', 'stock_lead1',
                                   'stock_lag2', 'stock_lead2']].max(axis=1)

train['median_stock_shift2'] = train[['stock', 'stock_lag1', 'stock_lead1',
                                   'stock_lag2', 'stock_lead2']].median(axis=1)
test['median_stock_shift2'] = test[['stock', 'stock_lag1', 'stock_lead1',
                                   'stock_lag2', 'stock_lead2']].median(axis=1)

train['mean_stock_shift2'] = train[['stock', 'stock_lag1', 'stock_lead1',
                                   'stock_lag2', 'stock_lead2']].mean(axis=1)
test['mean_stock_shift2'] = test[['stock', 'stock_lag1', 'stock_lead1',
                                   'stock_lag2', 'stock_lead2']].mean(axis=1)

In [18]:
train['pos_lag1'] = train.groupby(['product_id'])['position'].shift(1)
test['pos_lag1'] = test.groupby(['product_id'])['position'].shift(1)
train['pos_lead1'] = train.groupby(['product_id'])['position'].shift(-1)
test['pos_lead1'] = test.groupby(['product_id'])['position'].shift(-1)

train['std_pos_shift1'] = train[['position', 'pos_lag1', 'pos_lead1']].std(axis=1)
test['std_pos_shift1'] = test[['position', 'pos_lag1', 'pos_lead1']].std(axis=1)

train['mean_pos_shift1'] = train[['position', 'pos_lag1', 'pos_lead1']].mean(axis=1)
test['mean_pos_shift1'] = test[['position', 'pos_lag1', 'pos_lead1']].mean(axis=1)

train['min_pos_shift1'] = train[['position', 'pos_lag1', 'pos_lead1']].min(axis=1)
test['min_pos_shift1'] = test[['position', 'pos_lag1', 'pos_lead1']].min(axis=1)

train['max_pos_shift1'] = train[['position', 'pos_lag1', 'pos_lead1']].max(axis=1)
test['max_pos_shift1'] = test[['position', 'pos_lag1', 'pos_lead1']].max(axis=1)

train['median_pos_shift1'] = train[['position', 'pos_lag1', 'pos_lead1']].median(axis=1)
test['median_pos_shift1'] = test[['position', 'pos_lag1', 'pos_lead1']].median(axis=1)

train['pos_lag2'] = train.groupby(['product_id'])['position'].shift(2)
test['pos_lag2'] = test.groupby(['product_id'])['position'].shift(2)
train['pos_lead2'] = train.groupby(['product_id'])['position'].shift(-2)
test['pos_lead2'] = test.groupby(['product_id'])['position'].shift(-2)

train['std_pos_shift2'] = train[['position', 'pos_lag1', 'pos_lead1',
                                   'pos_lag2', 'pos_lead2']].std(axis=1)
test['std_pos_shift2'] = test[['position', 'pos_lag1', 'pos_lead1',
                                   'pos_lag2', 'pos_lead2']].std(axis=1)

train['min_pos_shift2'] = train[['position', 'pos_lag1', 'pos_lead1',
                                   'pos_lag2', 'pos_lead2']].min(axis=1)
test['min_pos_shift2'] = test[['position', 'pos_lag1', 'pos_lead1',
                                   'pos_lag2', 'pos_lead2']].min(axis=1)

train['max_pos_shift2'] = train[['position', 'pos_lag1', 'pos_lead1',
                                   'pos_lag2', 'pos_lead2']].max(axis=1)
test['max_pos_shift2'] = test[['position', 'pos_lag1', 'pos_lead1',
                                   'pos_lag2', 'pos_lead2']].max(axis=1)

train['median_pos_shift2'] = train[['position', 'pos_lag1', 'pos_lead1',
                                   'pos_lag2', 'pos_lead2']].median(axis=1)
test['median_pos_shift2'] = test[['position', 'pos_lag1', 'pos_lead1',
                                   'pos_lag2', 'pos_lead2']].median(axis=1)

train['mean_pos_shift2'] = train[['position', 'pos_lag1', 'pos_lead1',
                                   'pos_lag2', 'pos_lead2']].mean(axis=1)
test['mean_pos_shift2'] = test[['position', 'pos_lag1', 'pos_lead1',
                                   'pos_lag2', 'pos_lead2']].mean(axis=1)

In [19]:
train['diff_stock_lead1'] = train.stock - train.stock_lead1
train['diff_stock_lead2'] = train.stock - train.stock_lead2
train['diff_stock_lag1'] = train.stock - train.stock_lag1
train['diff_stock_lag2'] = train.stock - train.stock_lag2

test['diff_stock_lead1'] = test.stock - test.stock_lead1
test['diff_stock_lead2'] = test.stock - test.stock_lead2
test['diff_stock_lag1'] = test.stock - test.stock_lag1
test['diff_stock_lag2'] = test.stock - test.stock_lag2

In [20]:
train['diff_pos_lead1'] = train.position - train.pos_lead1
train['diff_pos_lead2'] = train.position - train.pos_lead2
train['diff_pos_lag1'] = train.position - train.pos_lag1
train['diff_pos_lag2'] = train.position - train.pos_lag2

test['diff_pos_lead1'] = test.position - test.pos_lead1
test['diff_pos_lead2'] = test.position - test.pos_lead2
test['diff_pos_lag1'] = test.position - test.pos_lag1
test['diff_pos_lag2'] = test.position - test.pos_lag2

In [21]:
train['std_stock_shift3'] = train[['stock', 'stock_lag1', 'stock_lead1',
                                   'stock_lag2', 'stock_lead2', 'diff_stock_lead1',
                                   'diff_stock_lead2', 'diff_stock_lag1', 'diff_stock_lag2']].std(axis=1)
test['std_stock_shift3'] = test[['stock', 'stock_lag1', 'stock_lead1',
                                   'stock_lag2', 'stock_lead2', 'diff_stock_lead1',
                                   'diff_stock_lead2', 'diff_stock_lag1', 'diff_stock_lag2']].std(axis=1)

train['mean_stock_shift3'] = train[['stock', 'stock_lag1', 'stock_lead1',
                                   'stock_lag2', 'stock_lead2', 'diff_stock_lead1',
                                   'diff_stock_lead2', 'diff_stock_lag1', 'diff_stock_lag2']].mean(axis=1)
test['mean_stock_shift3'] = test[['stock', 'stock_lag1', 'stock_lead1',
                                   'stock_lag2', 'stock_lead2', 'diff_stock_lead1',
                                   'diff_stock_lead2', 'diff_stock_lag1', 'diff_stock_lag2']].mean(axis=1)

train['min_stock_shift3'] = train[['stock', 'stock_lag1', 'stock_lead1',
                                   'stock_lag2', 'stock_lead2', 'diff_stock_lead1',
                                   'diff_stock_lead2', 'diff_stock_lag1', 'diff_stock_lag2']].min(axis=1)
test['min_stock_shift3'] = test[['stock', 'stock_lag1', 'stock_lead1',
                                   'stock_lag2', 'stock_lead2', 'diff_stock_lead1',
                                   'diff_stock_lead2', 'diff_stock_lag1', 'diff_stock_lag2']].min(axis=1)

train['max_stock_shift3'] = train[['stock', 'stock_lag1', 'stock_lead1',
                                   'stock_lag2', 'stock_lead2', 'diff_stock_lead1',
                                   'diff_stock_lead2', 'diff_stock_lag1', 'diff_stock_lag2']].max(axis=1)
test['max_stock_shift3'] = test[['stock', 'stock_lag1', 'stock_lead1',
                                   'stock_lag2', 'stock_lead2', 'diff_stock_lead1',
                                   'diff_stock_lead2', 'diff_stock_lag1', 'diff_stock_lag2']].max(axis=1)

train['median_stock_shift3'] = train[['stock', 'stock_lag1', 'stock_lead1',
                                   'stock_lag2', 'stock_lead2', 'diff_stock_lead1',
                                   'diff_stock_lead2', 'diff_stock_lag1', 'diff_stock_lag2']].median(axis=1)
test['median_stock_shift3'] = test[['stock', 'stock_lag1', 'stock_lead1',
                                   'stock_lag2', 'stock_lead2', 'diff_stock_lead1',
                                   'diff_stock_lead2', 'diff_stock_lag1', 'diff_stock_lag2']].median(axis=1)

In [22]:
train['std_pos_shift3'] = train[['position', 'pos_lag1', 'pos_lead1',
                                   'pos_lag2', 'pos_lead2', 'diff_pos_lead1',
                                   'diff_pos_lead2', 'diff_pos_lag1', 'diff_pos_lag2']].std(axis=1)
test['std_pos_shift3'] = test[['position', 'pos_lag1', 'pos_lead1',
                                   'pos_lag2', 'pos_lead2', 'diff_pos_lead1',
                                   'diff_pos_lead2', 'diff_pos_lag1', 'diff_pos_lag2']].std(axis=1)

train['mean_pos_shift3'] = train[['position', 'pos_lag1', 'pos_lead1',
                                   'pos_lag2', 'pos_lead2', 'diff_pos_lead1',
                                   'diff_pos_lead2', 'diff_pos_lag1', 'diff_pos_lag2']].mean(axis=1)
test['mean_pos_shift3'] = test[['position', 'pos_lag1', 'pos_lead1',
                                   'pos_lag2', 'pos_lead2', 'diff_pos_lead1',
                                   'diff_pos_lead2', 'diff_pos_lag1', 'diff_pos_lag2']].mean(axis=1)

train['min_pos_shift3'] = train[['position', 'pos_lag1', 'pos_lead1',
                                   'pos_lag2', 'pos_lead2', 'diff_pos_lead1',
                                   'diff_pos_lead2', 'diff_pos_lag1', 'diff_pos_lag2']].min(axis=1)
test['min_pos_shift3'] = test[['position', 'pos_lag1', 'pos_lead1',
                                   'pos_lag2', 'pos_lead2', 'diff_pos_lead1',
                                   'diff_pos_lead2', 'diff_pos_lag1', 'diff_pos_lag2']].min(axis=1)

train['max_pos_shift3'] = train[['position', 'pos_lag1', 'pos_lead1',
                                   'pos_lag2', 'pos_lead2', 'diff_pos_lead1',
                                   'diff_pos_lead2', 'diff_pos_lag1', 'diff_pos_lag2']].max(axis=1)
test['max_pos_shift3'] = test[['position', 'pos_lag1', 'pos_lead1',
                                   'pos_lag2', 'pos_lead2', 'diff_pos_lead1',
                                   'diff_pos_lead2', 'diff_pos_lag1', 'diff_pos_lag2']].max(axis=1)

train['median_pos_shift3'] = train[['position', 'pos_lag1', 'pos_lead1',
                                   'pos_lag2', 'pos_lead2', 'diff_pos_lead1',
                                   'diff_pos_lead2', 'diff_pos_lag1', 'diff_pos_lag2']].median(axis=1)
test['median_pos_shift3'] = test[['position', 'pos_lag1', 'pos_lead1',
                                   'pos_lag2', 'pos_lead2', 'diff_pos_lead1',
                                   'diff_pos_lead2', 'diff_pos_lag1', 'diff_pos_lag2']].median(axis=1)

In [23]:
train['size_p_color'] = train.size_id * train.color_id
test['size_p_color'] = test.size_id * test.color_id

In [24]:
train['size_d_color'] = train.size_id / train.color_id
test['size_d_color'] = test.size_id / test.color_id

In [25]:
train['ratio_position'] = train.position / train.position.max()
test['ratio_position'] = test.position / test.position.max()

#### Ratios

*desctivado*

In [26]:
# def gen_ratios(df, c):
#     for col in c:
#         df['ratio_{}'.format(col)] = train[col] / train[col].max()
        
#     return df

In [27]:
# cols_ratios = ['median_stock_shift3', 'mean_stock_shift3', 'max_stock_shift3', 'min_stock_shift3',
#               'median_stock_shift2', 'mean_stock_shift2', 'max_stock_shift2', 'min_stock_shift2',
#               'median_stock_shift1', 'mean_stock_shift1', 'max_stock_shift1', 'min_stock_shift1',
#                'median_pos_shift3', 'mean_pos_shift3', 'max_pos_shift3', 'min_pos_shift3',
#               'median_pos_shift2', 'mean_pos_shift2', 'max_pos_shift2', 'min_pos_shift2',
#               'median_pos_shift1', 'mean_pos_shift1', 'max_pos_shift1', 'min_pos_shift1']

# train = gen_ratios(train, cols_ratios)
# test = gen_ratios(test, cols_ratios)

In [28]:
# train.head()

In [29]:
# max_std_train1 = train.groupby('product_id').max()['std_stock_shift1']
# max_std_train1.name = 'max_std_stock_shift1'
# max_std_test1 = test.groupby('product_id').max()['std_stock_shift1']
# max_std_test1.name = 'max_std_stock_shift1'

# train = train.join(max_std_train1, on='product_id', how='left')
# test = test.join(max_std_test1, on='product_id', how='left')

# train['ratio_std_shift1'] = train.std_stock_shift1 / train.max_std_stock_shift1
# test['ratio_std_shift1'] = test.std_stock_shift1 / test.max_std_stock_shift1

# train = train.drop('max_std_stock_shift1', axis=1)
# test = test.drop('max_std_stock_shift1', axis=1)

# max_std_train2 = train.groupby('product_id').max()['std_stock_shift2']
# max_std_train2.name = 'max_std_stock_shift2'
# max_std_test2 = test.groupby('product_id').max()['std_stock_shift2']
# max_std_test2.name = 'max_std_stock_shift2'

# train = train.join(max_std_train2, on='product_id', how='left')
# test = test.join(max_std_test2, on='product_id', how='left')

# train['ratio_std_shift2'] = train.std_stock_shift2 / train.max_std_stock_shift2
# test['ratio_std_shift2'] = test.std_stock_shift2 / test.max_std_stock_shift2

# train = train.drop('max_std_stock_shift2', axis=1)
# test = test.drop('max_std_stock_shift2', axis=1)

In [30]:
# train.head()

In [31]:
# train[train.product_id==151926]

#### Frequency encoding por grupos

Útil para series temporales

In [32]:
agrupacion = ['subfamily_id', 'size_id', 'color_id']
a = train.groupby(agrupacion)['product_id'].count()
a.name = 'N1'
train = train.join(a, on=agrupacion, how='left')

a = test.groupby(agrupacion)['product_id'].count()
a.name = 'N1'
test = test.join(a, on=agrupacion, how='left')

In [33]:
agrupacion = ['subfamily_id', 'size_id', 'color_id']
a = train.groupby(agrupacion)['product_id'].count()
a.name = 'N2'
train = train.join(a, on=agrupacion, how='left')

a = test.groupby(agrupacion)['product_id'].count()
a.name = 'N2'
test = test.join(a, on=agrupacion, how='left')

In [34]:
agrupacion = ['subfamily_id', 'family_id']
a = train.groupby(agrupacion)['product_id'].count()
a.name = 'N3'
train = train.join(a, on=agrupacion, how='left')

a = test.groupby(agrupacion)['product_id'].count()
a.name = 'N3'
test = test.join(a, on=agrupacion, how='left')

In [35]:
agrupacion = ['product_id']
a = train.groupby(agrupacion)['product_id'].count()
a.name = 'N5'
train = train.join(a, on=agrupacion, how='left')

a = test.groupby(agrupacion)['product_id'].count()
a.name = 'N5'
test = test.join(a, on=agrupacion, how='left')

In [36]:
agrupacion = ['product_id', 'size_id', 'color_id']
a = train.groupby(agrupacion)['product_id'].count()
a.name = 'N6'
train = train.join(a, on=agrupacion, how='left')

a = test.groupby(agrupacion)['product_id'].count()
a.name = 'N6'
test = test.join(a, on=agrupacion, how='left')

In [37]:
agrupacion = ['size_id', 'color_id']
a = train.groupby(agrupacion)['product_id'].count()
a.name = 'N7'
train = train.join(a, on=agrupacion, how='left')

a = test.groupby(agrupacion)['product_id'].count()
a.name = 'N7'
test = test.join(a, on=agrupacion, how='left')

In [38]:
agrupacion = ['product_id']
a = train.groupby(agrupacion)['family_id'].count()
a.name = 'N8'
train = train.join(a, on=agrupacion, how='left')

a = test.groupby(agrupacion)['family_id'].count()
a.name = 'N8'
test = test.join(a, on=agrupacion, how='left')

In [39]:
agrupacion = ['price', 'family_id', 'subfamily_id']
a = train.groupby(agrupacion)['product_id'].count()
a.name = 'N9'
train = train.join(a, on=agrupacion, how='left')

a = test.groupby(agrupacion)['product_id'].count()
a.name = 'N9'
test = test.join(a, on=agrupacion, how='left')

In [40]:
agrupacion = ['price', 'family_id', 'subfamily_id', 'size_id', 'color_id']
a = train.groupby(agrupacion)['product_id'].count()
a.name = 'N10'
train = train.join(a, on=agrupacion, how='left')

a = test.groupby(agrupacion)['product_id'].count()
a.name = 'N10'
test = test.join(a, on=agrupacion, how='left')

In [41]:
agrupacion = ['block_id', 'size_id', 'color_id']
a = train.groupby(agrupacion)['product_id'].count()
a.name = 'N12'
train = train.join(a, on=agrupacion, how='left')

a = test.groupby(agrupacion)['product_id'].count()
a.name = 'N12'
test = test.join(a, on=agrupacion, how='left')

In [42]:
agrupacion = ['family_id', 'subfamily_id', 'size_id', 'color_id']
a = train.groupby(agrupacion)['product_id'].count()
a.name = 'N13'
train = train.join(a, on=agrupacion, how='left')

a = test.groupby(agrupacion)['product_id'].count()
a.name = 'N13'
test = test.join(a, on=agrupacion, how='left')

In [43]:
# n = 'N12'
# train = train.drop(n, axis=1)
# test = test.drop(n, axis=1)

In [44]:
train.head()

Unnamed: 0,product_id,block_id,stock,std_stock,family_id,subfamily_id,size_id,color_id,position,category_id,...,N2,N3,N5,N6,N7,N8,N9,N10,N12,N13
0,310130,1726,1383,34.811328,0.007292,0.000579,7,1,3.0,3,...,47,49,12,11,7643,12,12,11,18,16
1,1178388,592,60,2.160247,0.048966,0.012315,4,1,19.0,1,...,501,932,12,12,13530,12,258,156,12,500
2,1561460,1625,2373,55.438769,0.125164,0.023697,5,1,38.0,3,...,603,2005,12,12,10524,12,1426,475,14,603
3,1874414,1135,1686,20.463906,0.110331,0.015944,6,1,12.0,6,...,554,1194,12,12,6672,12,333,85,12,529
4,2436420,779,245,23.377339,0.02599,0.004432,5,1,,0,...,189,375,12,12,10524,12,33,21,21,189


In [45]:
train.to_csv('../data/week2/final_train_pw.csv', index=None)
test.to_csv('../data/week2/final_test_pw.csv', index=None)