In [70]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style = 'whitegrid')
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.linear_model import LinearRegression
import scipy.stats as stats
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, VotingRegressor
from scipy.stats import randint as sp_randint, uniform as sp_uniform
import lightgbm as lgb
from xgboost import XGBRFRegressor, XGBRegressor
from catboost import CatBoostRegressor
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.decomposition import PCA

In [71]:
pd.set_option('display.max_columns', None)

In [152]:
train = pd.read_csv('train_events.csv')
train.head()

Unnamed: 0,Year,Month,ProductCategory,Sales,Avg_Sales,Event,Federal Holiday
0,2009,1,WomenClothing,1755.0,1215.466667,0.0,3.0
1,2009,1,MenClothing,524.0,1215.466667,0.0,3.0
2,2009,1,OtherClothing,936.0,1215.466667,0.0,3.0
3,2009,2,WomenClothing,1729.0,1308.433333,1.0,1.0
4,2009,2,MenClothing,496.0,1308.433333,1.0,1.0


In [153]:
test = pd.read_csv('test_events.csv')
test.head()

Unnamed: 0,Year,Month,ProductCategory,Avg_Sales,Event,Federal Holiday
0,2014,1,WomenClothing,1215.466667,0.0,3.0
1,2014,1,MenClothing,1215.466667,0.0,3.0
2,2014,1,OtherClothing,1215.466667,0.0,3.0
3,2014,2,WomenClothing,1308.433333,1.0,1.0
4,2014,2,MenClothing,1308.433333,1.0,1.0


In [154]:
train = pd.get_dummies(train, drop_first = True)
test = pd.get_dummies(test, drop_first = True)
X_train = train.drop('Sales', 1)
y = train['Sales']

In [155]:
X_train.head()

Unnamed: 0,Year,Month,Avg_Sales,Event,Federal Holiday,ProductCategory_OtherClothing,ProductCategory_WomenClothing
0,2009,1,1215.466667,0.0,3.0,0,1
1,2009,1,1215.466667,0.0,3.0,0,0
2,2009,1,1215.466667,0.0,3.0,1,0
3,2009,2,1308.433333,1.0,1.0,0,1
4,2009,2,1308.433333,1.0,1.0,0,0


#### RF

In [144]:
rfr = RandomForestRegressor(random_state = 0)
rfr.fit(X_train, y)
imp = list(zip(rfr.feature_importances_, X_train.columns))
sorted(imp)

[(7.860868018786031e-07, 'Thunderstorm'),
 (3.320796514791274e-05, 'Fog , Rain , Snow'),
 (7.644313438445564e-05, 'Snow'),
 (0.00016808532102975546, 'Fog'),
 (0.00027061177273521466, 'Year'),
 (0.0002845584107518643, 'Visibility_High'),
 (0.0003353436099591242, 'Rain , Snow'),
 (0.00037647740488739836, 'Fog , Snow'),
 (0.0004024559786027238, 'Precip'),
 (0.0004152710114675468, 'Fog , Rain'),
 (0.00045453907686247586, 'Mill_Use'),
 (0.0004812277287255418, 'Wind_High'),
 (0.0004886764373068641, 'Normal'),
 (0.0005208762688843206, 'Humidity_Avg'),
 (0.0005721252981574184, 'Upland_Planted'),
 (0.0005875229073685211, 'Dew_High'),
 (0.0006348928533320882, 'Visibility_Low'),
 (0.0006384989132439447, 'Temp_Avg'),
 (0.0006683246586416982, 'Wind_Low'),
 (0.0006963818058038136, 'Sea_Avg'),
 (0.0007086510965173477, 'Yield_Harvested'),
 (0.0007503995402160682, 'Sea_High'),
 (0.0007565222364006875, 'Federal_Holidays'),
 (0.0009200945291257584, 'Visibility_Avg'),
 (0.0009755559370322316, 'Exports'),


### Reducing Columns

In [145]:
combined = [train, test]

train.columns

for i in combined:
    i['Holidays'] = i['Events'] + i['Federal_Holidays']
    i['GDP'] = (i['Nominal_GDP'] + i['Real_GDP']) / 2
    i['Interest_Rate'] = (i['Rate_CreditCard'] + i['Rate_PersonalLoan']) / 2
    i['Bad_Weather'] = i['Fog'] + i['Fog , Rain'] + i['Fog , Rain , Snow'] + i['Fog , Snow'] + i['Rain'] + i['Rain , Snow'] + i['Snow'] + i['Thunderstorm']

train = train.drop(['Events', 'Federal_Holidays', 'Nominal_GDP', 'Real_GDP', 'Rate_CreditCard', 'Rate_PersonalLoan', 
                'Upland_Planted', 'Upland_Harvested', 'Temp_High', 'Temp_Low', 'Dew_High', 'Dew_Low', 
                'Humidity_High', 'Humidity_Low', 'Sea_High', 'Sea_Low', 'Visibility_High', 'Visibility_Low',
               'Wind_Low', 'Wind_High', 'Fog , Rain', 'Fog , Rain , Snow', 'Fog , Snow', 'Rain', 'Rain , Snow', 'Snow',
               'Thunderstorm', 'Fog', 'Change'], axis = 1)

test = test.drop(['Events', 'Federal_Holidays', 'Nominal_GDP', 'Real_GDP', 'Rate_CreditCard', 'Rate_PersonalLoan', 
                'Upland_Planted', 'Upland_Harvested', 'Temp_High', 'Temp_Low', 'Dew_High', 'Dew_Low', 
                'Humidity_High', 'Humidity_Low', 'Sea_High', 'Sea_Low', 'Visibility_High', 'Visibility_Low',
               'Wind_Low', 'Wind_High', 'Fog , Rain', 'Fog , Rain , Snow', 'Fog , Snow', 'Rain', 'Rain , Snow', 'Snow',
               'Thunderstorm', 'Fog', 'Change'], axis = 1)

In [146]:
train.head()

Unnamed: 0,Year,Month,Sales,CPI,Unemp_Rate,Wages,Cotton_Price,Yield_Harvested,Production,Mill_Use,Exports,Temp_Avg,Dew_Avg,Humidity_Avg,Sea_Avg,Visibility_Avg,Wind_Avg,Precip,Normal,ProductCategory_OtherClothing,ProductCategory_WomenClothing,Holidays,GDP,Interest_Rate,Bad_Weather
0,2009,1,1755.0,233.402,7.8,22.05,0.577,799,12.589,4.17,11.55,-2.096774,-9.903226,57.354839,1015.967742,14.0,12.0,2.445806,21.0,0,1,3.0,14414.403119,11.735,10.0
1,2009,1,524.0,233.402,7.8,22.05,0.577,799,12.589,4.17,11.55,-2.096774,-9.903226,57.354839,1015.967742,14.0,12.0,2.445806,21.0,0,0,3.0,14414.403119,11.735,10.0
2,2009,1,936.0,233.402,7.8,22.05,0.577,799,12.589,4.17,11.55,-2.096774,-9.903226,57.354839,1015.967742,14.0,12.0,2.445806,21.0,1,0,3.0,14414.403119,11.735,10.0
3,2009,2,1729.0,234.663,8.3,22.22,0.5521,799,12.589,3.87,11.1,2.785714,-6.392857,54.0,1017.071429,15.178571,12.214286,0.7825,22.0,0,1,2.0,14377.688519,12.01,6.0
4,2009,2,496.0,234.663,8.3,22.22,0.5521,799,12.589,3.87,11.1,2.785714,-6.392857,54.0,1017.071429,15.178571,12.214286,0.7825,22.0,0,0,2.0,14377.688519,12.01,6.0


In [147]:
test.head()

Unnamed: 0,Year,Month,CPI,Unemp_Rate,Wages,Cotton_Price,Yield_Harvested,Production,Mill_Use,Exports,Temp_Avg,Dew_Avg,Humidity_Avg,Sea_Avg,Visibility_Avg,Wind_Avg,Precip,Normal,ProductCategory_OtherClothing,ProductCategory_WomenClothing,Holidays,GDP,Interest_Rate,Bad_Weather
0,2014,1,259.596,6.6,24.35,0.9096,807,12.551,3.58,9.75,-1.766667,-9.4,59.433333,1019.2,13.866667,10.1,2.000333,20.0,0,1,3.0,16330.790646,11.035,10.0
1,2014,1,259.596,6.6,24.35,0.9096,807,12.551,3.58,9.75,-1.766667,-9.4,59.433333,1019.2,13.866667,10.1,2.000333,20.0,0,0,3.0,16330.790646,11.035,10.0
2,2014,1,259.596,6.6,24.35,0.9096,807,12.551,3.58,9.75,-1.766667,-9.4,59.433333,1019.2,13.866667,10.1,2.000333,20.0,1,0,3.0,16330.790646,11.035,10.0
3,2014,2,259.019,6.7,24.58,0.9405,807,12.551,3.58,9.75,0.285714,-7.821429,58.714286,1016.964286,12.357143,10.25,4.972857,17.0,0,1,2.0,16395.592634,10.96,11.0
4,2014,2,259.019,6.7,24.58,0.9405,807,12.551,3.58,9.75,0.285714,-7.821429,58.714286,1016.964286,12.357143,10.25,4.972857,17.0,0,0,2.0,16395.592634,10.96,11.0


In [148]:
train = pd.get_dummies(train, drop_first = True)
test = pd.get_dummies(test, drop_first = True)
X_train = train.drop('Sales', 1)
y = train['Sales']

In [149]:
X_train.head()

Unnamed: 0,Year,Month,CPI,Unemp_Rate,Wages,Cotton_Price,Yield_Harvested,Production,Mill_Use,Exports,Temp_Avg,Dew_Avg,Humidity_Avg,Sea_Avg,Visibility_Avg,Wind_Avg,Precip,Normal,ProductCategory_OtherClothing,ProductCategory_WomenClothing,Holidays,GDP,Interest_Rate,Bad_Weather
0,2009,1,233.402,7.8,22.05,0.577,799,12.589,4.17,11.55,-2.096774,-9.903226,57.354839,1015.967742,14.0,12.0,2.445806,21.0,0,1,3.0,14414.403119,11.735,10.0
1,2009,1,233.402,7.8,22.05,0.577,799,12.589,4.17,11.55,-2.096774,-9.903226,57.354839,1015.967742,14.0,12.0,2.445806,21.0,0,0,3.0,14414.403119,11.735,10.0
2,2009,1,233.402,7.8,22.05,0.577,799,12.589,4.17,11.55,-2.096774,-9.903226,57.354839,1015.967742,14.0,12.0,2.445806,21.0,1,0,3.0,14414.403119,11.735,10.0
3,2009,2,234.663,8.3,22.22,0.5521,799,12.589,3.87,11.1,2.785714,-6.392857,54.0,1017.071429,15.178571,12.214286,0.7825,22.0,0,1,2.0,14377.688519,12.01,6.0
4,2009,2,234.663,8.3,22.22,0.5521,799,12.589,3.87,11.1,2.785714,-6.392857,54.0,1017.071429,15.178571,12.214286,0.7825,22.0,0,0,2.0,14377.688519,12.01,6.0


#### RF

In [150]:
rfr = RandomForestRegressor(random_state = 0)
rfr.fit(X_train, y)
imp = list(zip(rfr.feature_importances_, X_train.columns))
sorted(imp)

[(0.000420298078873163, 'Year'),
 (0.000661739968356576, 'Normal'),
 (0.0006862082421926048, 'Bad_Weather'),
 (0.000853558297636776, 'Precip'),
 (0.0008794670835396934, 'Mill_Use'),
 (0.0013578483997132987, 'Visibility_Avg'),
 (0.001658738582812249, 'Yield_Harvested'),
 (0.00167633218287758, 'Exports'),
 (0.0019284697472200915, 'Production'),
 (0.002001537363128499, 'Sea_Avg'),
 (0.002020406207376376, 'Wind_Avg'),
 (0.0020508896798121217, 'Interest_Rate'),
 (0.002190466817461625, 'Humidity_Avg'),
 (0.004093842066059423, 'Temp_Avg'),
 (0.004263555842542576, 'Dew_Avg'),
 (0.004633240354927401, 'Unemp_Rate'),
 (0.006491425123601775, 'Wages'),
 (0.008833936754487551, 'Cotton_Price'),
 (0.012008993069442506, 'CPI'),
 (0.01715269912398862, 'GDP'),
 (0.01927697784291461, 'Holidays'),
 (0.02137335533088042, 'ProductCategory_OtherClothing'),
 (0.03276092692081061, 'Month'),
 (0.8507250869193438, 'ProductCategory_WomenClothing')]

#### GBR

In [117]:
gbr = GradientBoostingRegressor(random_state = 0)
gbr.fit(X_train, y)
gbr.score(X_train, y)

0.9987631267566738

##### Tuning GBR

In [108]:
gbr = GradientBoostingRegressor(random_state = 0)

params = {'n_estimators' : sp_randint(50, 200),
          'max_features' : sp_randint(1, 5),
          'max_depth' : sp_randint(2, 20),
          'min_samples_leaf' : sp_randint(1, 10),
          'min_samples_split' : sp_randint(2, 10),
          'learning_rate' : sp_uniform(0.01, 0.5)}

rsearch_gbr = RandomizedSearchCV(gbr, param_distributions = params, n_jobs = -1,
                                 cv = 5, n_iter = 100, random_state = 0)

rsearch_gbr.fit(X_train, y)
print(rsearch_gbr.best_params_)

{'learning_rate': 0.2844067519636624, 'max_depth': 2, 'max_features': 4, 'min_samples_leaf': 4, 'min_samples_split': 9, 'n_estimators': 59}


###### Tuned GBR

In [109]:
gbr = GradientBoostingRegressor(**rsearch_gbr.best_params_, random_state = 0)
gbr.fit(X_train, y)
gbr.score(X_train, y)

0.9783609661980899

#### LGBMR

In [151]:
lgbmr = lgb.LGBMRegressor(random_state = 0)
lgbmr.fit(X_train, y)
lgbmr.score(X_train, y)

0.9858297595801896

##### Tuning LGBMR

In [115]:
lgbmr = lgb.LGBMRegressor(random_state = 0)

params = {'n_estimators' : sp_randint(50, 200),
         'max_depth' : sp_randint(1, 25),
         'learning_rate' : sp_uniform(0.01, 0.5)}

rsearch_lgbm = RandomizedSearchCV(lgbmr, param_distributions = params,
                                 cv = 5, n_iter = 100, n_jobs = -1, random_state = 0)

rsearch_lgbm.fit(X_train, y)
print(rsearch_lgbm.best_params_)

{'learning_rate': 0.27051830310206465, 'max_depth': 2, 'n_estimators': 183}


###### Tuned LGBMR

In [118]:
lgbmr = lgb.LGBMRegressor(**rsearch_lgbm.best_params_, random_state = 0)
lgbmr.fit(X_train, y)
lgbmr.score(X_train, y)

0.9920664311076858

In [133]:
imp = pd.DataFrame(sorted(lgbmr.feature_importances_), index = X_train.columns)
imp.T

Unnamed: 0,Year,Month,CPI,Unemp_Rate,Wages,Cotton_Price,Yield_Harvested,Production,Mill_Use,Exports,Temp_Avg,Dew_Avg,Humidity_Avg,Sea_Avg,Visibility_Avg,Wind_Avg,Precip,Normal,Holidays,GDP,Interest_Rate,Bad_Weather,ProductCategory_OtherClothing,ProductCategory_WomenClothing
0,1,1,2,3,3,4,5,8,8,9,11,11,12,12,13,14,18,18,21,24,31,34,74,133


#### Submission

In [119]:
pred = lgbmr.predict(test)

In [120]:
kaggle = pd.read_csv('Kaggle_Submission_Format.csv')

In [121]:
kaggle['Sales(In ThousandDollars)'] = pred

In [122]:
kaggle.to_csv('Iteration_7_lgbmr_all_reduced.csv', index = False)
kaggle.head()

Unnamed: 0,Year,Sales(In ThousandDollars)
0,1,2846.115314
1,2,524.765774
2,3,1021.004052
3,4,3212.268399
4,5,673.101537


In [178]:
train = pd.read_csv('train_events.csv')
train.head()

Unnamed: 0,Year,Month,ProductCategory,Sales,Avg_Sales,Sales_Lag1,Event,Federal Holiday
0,2009,1,WomenClothing,1755.0,1215.466667,4442.8,0.0,3.0
1,2009,1,MenClothing,524.0,1215.466667,1032.8,0.0,3.0
2,2009,1,OtherClothing,936.0,1215.466667,1477.2,0.0,3.0
3,2009,2,WomenClothing,1729.0,1308.433333,1755.0,1.0,1.0
4,2009,2,MenClothing,496.0,1308.433333,524.0,1.0,1.0


In [179]:
test = pd.read_csv('test_events.csv')
test.head()

Unnamed: 0,Year,Month,ProductCategory,Avg_Sales,Sales_Lag1,Event,Federal Holiday
0,2014,1,WomenClothing,1215.466667,4865.0,0.0,3.0
1,2014,1,MenClothing,1215.466667,1085.0,0.0,3.0
2,2014,1,OtherClothing,1215.466667,1566.0,0.0,3.0
3,2014,2,WomenClothing,1308.433333,2139.4,1.0,1.0
4,2014,2,MenClothing,1308.433333,522.2,1.0,1.0


In [184]:
train = pd.get_dummies(train, drop_first = True)
test = pd.get_dummies(test, drop_first = True)
display(train.head())
display(test.head())

Unnamed: 0,Year,Month,Sales,Avg_Sales,Sales_Lag1,Event,Federal Holiday,ProductCategory_OtherClothing,ProductCategory_WomenClothing
0,2009,1,1755.0,1215.466667,4442.8,0.0,3.0,0,1
1,2009,1,524.0,1215.466667,1032.8,0.0,3.0,0,0
2,2009,1,936.0,1215.466667,1477.2,0.0,3.0,1,0
3,2009,2,1729.0,1308.433333,1755.0,1.0,1.0,0,1
4,2009,2,496.0,1308.433333,524.0,1.0,1.0,0,0


Unnamed: 0,Year,Month,Avg_Sales,Sales_Lag1,Event,Federal Holiday,ProductCategory_OtherClothing,ProductCategory_WomenClothing
0,2014,1,1215.466667,4865.0,0.0,3.0,0,1
1,2014,1,1215.466667,1085.0,0.0,3.0,0,0
2,2014,1,1215.466667,1566.0,0.0,3.0,1,0
3,2014,2,1308.433333,2139.4,1.0,1.0,0,1
4,2014,2,1308.433333,522.2,1.0,1.0,0,0


### Splitting

In [185]:
train_men = train[(train['ProductCategory_OtherClothing'] == 0) & (train['ProductCategory_WomenClothing'] == 0)]
train_women = train[(train['ProductCategory_OtherClothing'] == 0) & (train['ProductCategory_WomenClothing'] == 1)]
train_other = train[(train['ProductCategory_OtherClothing'] == 1) & (train['ProductCategory_WomenClothing'] == 0)]
train_men.shape, train_women.shape, train_other.shape

((60, 9), (60, 9), (60, 9))

In [186]:
test_men = test[(test['ProductCategory_OtherClothing'] == 0) & (test['ProductCategory_WomenClothing'] == 0)]
test_women = test[(test['ProductCategory_OtherClothing'] == 0) & (test['ProductCategory_WomenClothing'] == 1)]
test_other = test[(test['ProductCategory_OtherClothing'] == 1) & (test['ProductCategory_WomenClothing'] == 0)]
test_men.shape, test_women.shape, test_other.shape

((12, 8), (12, 8), (12, 8))

In [187]:
X_train_men = train_men.drop(['Sales'],1)
X_train_women = train_women.drop(['Sales'],1)
X_train_other = train_other.drop(['Sales'],1)
y_men = train_men['Sales']
y_women = train_women['Sales']
y_other = train_other['Sales']

#### LGBMR

In [188]:
lgbmr1 = lgb.LGBMRegressor(random_state = 0)
lgbmr1.fit(X_train_men, y_men)
lgbmr1.score(X_train_men, y_men)

0.726899334626814

In [189]:
lgbmr2 = lgb.LGBMRegressor(random_state = 0)
lgbmr2.fit(X_train_women, y_women)
lgbmr2.score(X_train_women, y_women)

0.7694505721163527

In [190]:
lgbmr3 = lgb.LGBMRegressor(random_state = 0)
lgbmr3.fit(X_train_other, y_other)
lgbmr3.score(X_train_other, y_other)

0.7362196222292795

##### Tuning LGBMR

In [191]:
lgbmr1 = lgb.LGBMRegressor(random_state = 0)

params = {'n_estimators' : sp_randint(50, 200),
         'max_depth' : sp_randint(1, 20),
         'learning_rate' : sp_uniform(0, 1)}

rsearch_lgbmr1 = RandomizedSearchCV(lgbmr1, param_distributions = params,
                                 cv = 5, n_iter = 100, n_jobs = -1, random_state = 0)

rsearch_lgbmr1.fit(X_train_men, y_men)
print(rsearch_lgbmr1.best_params_)

{'learning_rate': 0.4799771723750573, 'max_depth': 18, 'n_estimators': 75}


In [192]:
lgbmr2 = lgb.LGBMRegressor(random_state = 0)

params = {'n_estimators' : sp_randint(50, 200),
         'max_depth' : sp_randint(1, 20),
         'learning_rate' : sp_uniform(0, 1)}

rsearch_lgbmr2 = RandomizedSearchCV(lgbmr2, param_distributions = params,
                                 cv = 5, n_iter = 100, n_jobs = -1, random_state = 0)

rsearch_lgbmr2.fit(X_train_women, y_women)
print(rsearch_lgbmr2.best_params_)

{'learning_rate': 0.276893750817091, 'max_depth': 15, 'n_estimators': 101}


In [193]:
lgbmr3 = lgb.LGBMRegressor(random_state = 0)

params = {'n_estimators' : sp_randint(50, 200),
         'max_depth' : sp_randint(1, 20),
         'learning_rate' : sp_uniform(0, 1)}

rsearch_lgbmr3 = RandomizedSearchCV(lgbmr3, param_distributions = params,
                                 cv = 5, n_iter = 100, n_jobs = -1, random_state = 0)

rsearch_lgbmr3.fit(X_train_other, y_other)
print(rsearch_lgbmr3.best_params_)

{'learning_rate': 0.778345482025909, 'max_depth': 4, 'n_estimators': 145}


###### Tuned LGBMR

In [194]:
lgbmr1 = lgb.LGBMRegressor(**rsearch_lgbmr1.best_params_, random_state = 0)
lgbmr1.fit(X_train_men, y_men)
lgbmr1.score(X_train_men, y_men)

0.7586384413813797

In [195]:
lgbmr2 = lgb.LGBMRegressor(**rsearch_lgbmr2.best_params_, random_state = 0)
lgbmr2.fit(X_train_women, y_women)
lgbmr2.score(X_train_women, y_women)

0.8070719365879317

In [196]:
lgbmr3 = lgb.LGBMRegressor(**rsearch_lgbmr3.best_params_, random_state = 0)
lgbmr3.fit(X_train_other, y_other)
lgbmr3.score(X_train_other, y_other)

0.7871928932064514

#### Predictions

In [197]:
pred_men = lgbmr1.predict(test_men)
pred_women = lgbmr2.predict(test_women)
pred_other = lgbmr3.predict(test_other)

In [198]:
pred_men

array([572.69029167, 545.93528352, 631.40911046, 718.76877937,
       740.36162115, 688.99104179, 660.60802362, 613.8250251 ,
       672.10594805, 693.18020108, 913.8038697 , 913.8038697 ])

In [199]:
pred = list(zip(pred_men, pred_women, pred_other))
pred

[(572.6902916702423, 2596.862962556367, 1030.5985205656232),
 (545.9352835218322, 2607.605107572323, 961.7901114190646),
 (631.409110461562, 3279.883586502269, 1092.9636019031468),
 (718.7687793664112, 3586.909190186896, 1078.790415995631),
 (740.3616211516182, 3844.908298733567, 1104.9359424844279),
 (688.9910417916923, 3299.294971808819, 1100.656808198335),
 (660.6080236157919, 2899.9169270250177, 1002.4090991603541),
 (613.8250251049354, 3105.5684733459602, 1040.5230913100436),
 (672.1059480507931, 3278.0419943475263, 966.8643210787034),
 (693.1802010777033, 3439.003830706809, 1014.6365198306136),
 (913.8038696996423, 3952.307068073521, 1261.546264127536),
 (913.8038696996423, 4289.9596235419995, 1301.109312969107)]

In [200]:
pred[0]

(572.6902916702423, 2596.862962556367, 1030.5985205656232)

In [201]:
pred[1]

(545.9352835218322, 2607.605107572323, 961.7901114190646)

In [202]:
sol = []
for i in range(len(pred)):
    print(pred_women[i], pred_men[i], pred_other[i])
    sol.append(pred_women[i])
    sol.append(pred_men[i])
    sol.append(pred_other[i])

2596.862962556367 572.6902916702423 1030.5985205656232
2607.605107572323 545.9352835218322 961.7901114190646
3279.883586502269 631.409110461562 1092.9636019031468
3586.909190186896 718.7687793664112 1078.790415995631
3844.908298733567 740.3616211516182 1104.9359424844279
3299.294971808819 688.9910417916923 1100.656808198335
2899.9169270250177 660.6080236157919 1002.4090991603541
3105.5684733459602 613.8250251049354 1040.5230913100436
3278.0419943475263 672.1059480507931 966.8643210787034
3439.003830706809 693.1802010777033 1014.6365198306136
3952.307068073521 913.8038696996423 1261.546264127536
4289.9596235419995 913.8038696996423 1301.109312969107


In [203]:
sol = np.array(sol)
sol

array([2596.86296256,  572.69029167, 1030.59852057, 2607.60510757,
        545.93528352,  961.79011142, 3279.8835865 ,  631.40911046,
       1092.9636019 , 3586.90919019,  718.76877937, 1078.790416  ,
       3844.90829873,  740.36162115, 1104.93594248, 3299.29497181,
        688.99104179, 1100.6568082 , 2899.91692703,  660.60802362,
       1002.40909916, 3105.56847335,  613.8250251 , 1040.52309131,
       3278.04199435,  672.10594805,  966.86432108, 3439.00383071,
        693.18020108, 1014.63651983, 3952.30706807,  913.8038697 ,
       1261.54626413, 4289.95962354,  913.8038697 , 1301.10931297])

#### Submission Data 

In [204]:
kaggle = pd.read_csv('Kaggle_Submission_Format.csv')

In [205]:
kaggle['Sales(In ThousandDollars)'] = sol

In [177]:
kaggle.to_csv('Iter6_splits_AvgLag_events_lgbmr.csv', index = False)
kaggle.head()

Unnamed: 0,Year,Sales(In ThousandDollars)
0,1,2504.995195
1,2,570.1514
2,3,1089.04603
3,4,2796.266626
4,5,559.719568
