In [1]:
# Importing basic libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Removing warnings at output
import warnings
warnings.filterwarnings('ignore')

In [3]:
file=pd.read_csv('Segmentdata.csv',parse_dates=['InvoiceDate']) # Parsing 'InvoiceDate' as date format
file['UnitPrice'] = file['UnitPrice'].apply(np.int64) # Changing 'UnitPrice' to 'int' data type
file.head() # Checking 1st 5 rows of the segmented data

Unnamed: 0,InvoiceDate,Invoice Time,CustomerID,InvoiceNo,StockCode,Description,Country,Quantity,UnitPrice,Revenue,Items availability,revenue_buckets,price_buckets,final_revenue
0,2017-12-14,6:00,AVpgMuGwLJeJML43KY_c,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,United Kingdom,6,2,15.3,In Stock,Very Good,Medium,20
1,2017-08-09,5:00,AVpgMuGwLJeJML43KY_c,536365,71053,WHITE METAL LANTERN,United Kingdom,6,3,20.34,In Stock,Excellent,High,30
2,2017-10-10,5:00,AVpgMuGwLJeJML43KY_c,536365,84406B,CREAM CUPID HEARTS COAT HANGER,United Kingdom,8,2,22.0,In Stock,Excellent,High,30
3,2017-08-28,7:00,AVpgMuGwLJeJML43KY_c,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,United Kingdom,6,3,20.34,In Stock,Excellent,High,30
4,2017-10-24,4:00,AVpgMuGwLJeJML43KY_c,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,United Kingdom,6,3,20.34,In Stock,Excellent,High,30


In [4]:
# Calculating unique items and countries in segmented data
print("number of unique items :",file.Description.nunique()) 
print("number of unique country:",file.Country.nunique())

number of unique items : 2439
number of unique country: 5


In [5]:
# Assigning numerical values for 'Country' column
Labels = {'United Kingdom': 1,'Greece': 2,'Malta':3,'Canada':4,'United Arab Emirates':5} 

file.Country = [Labels[item] for item in file.Country] 

In [6]:
# Filtering rows in file with Top10 'Description' values and assigning it to new variable
f1=file.loc[file['Description'].isin(['VINTAGE UNION JACK MEMOBOARD','WOOD BLACK BOARD ANT WHITE FINISH','CREAM HEART CARD HOLDER','REGENCY CAKESTAND 3 TIER',
   'WHITE HANGING HEART T-LIGHT HOLDER','BLACK HEART CARD HOLDER','PARTY BUNTING','ASSORTED COLOUR BIRD ORNAMENT',
   'MEMO BOARD RETROSPOT  DESIGN','EMPIRE DESIGN ROSETTE'])]

# Assigning numerical values for Top10 'Description'
Mask = {'VINTAGE UNION JACK MEMOBOARD':1,'WOOD BLACK BOARD ANT WHITE FINISH':2,'CREAM HEART CARD HOLDER':3,
       'REGENCY CAKESTAND 3 TIER':4,'WHITE HANGING HEART T-LIGHT HOLDER':5,'BLACK HEART CARD HOLDER':6,
       'PARTY BUNTING':7,'ASSORTED COLOUR BIRD ORNAMENT':8,'MEMO BOARD RETROSPOT  DESIGN':9,
       'EMPIRE DESIGN ROSETTE':10}

f1.Description = [Mask[item] for item in f1.Description]

In [7]:
# Assigning numerical values to 'price_buckets'
M1= {'Critical':1,'Low':2,'Medium':3,'High':4,'Very High':5}
f1.price_buckets = [M1[item] for item in f1.price_buckets] 


In [8]:
# Assigning numerical values to 'revenue_buckets'
M2= {'Very Low':1,'Low':2,'Good':3,'Very Good':4,'Excellent':5}
f1.revenue_buckets = [M2[item] for item in f1.revenue_buckets] 

In [9]:
f1.head() # Checking the new Top10 'Description' wise filtered data

Unnamed: 0,InvoiceDate,Invoice Time,CustomerID,InvoiceNo,StockCode,Description,Country,Quantity,UnitPrice,Revenue,Items availability,revenue_buckets,price_buckets,final_revenue
0,2017-12-14,6:00,AVpgMuGwLJeJML43KY_c,536365,85123A,5,1,6,2,15.3,In Stock,4,3,20
11,2018-05-26,16:00,AVpe9FXeLJeJML43zHrq,536373,85123A,5,1,6,2,15.3,In Stock,4,3,20
27,2017-09-28,4:00,AV2Z1Efc-jtxr-f39lm6,536375,85123A,5,1,6,2,15.3,In Stock,4,3,20
46,2018-05-26,16:00,AVpi9AE_LJeJML43qkYJ,536384,82484,2,1,3,6,19.35,In Stock,4,5,20
55,2018-05-26,16:00,AVpiSS1A1cnluZ0-LyAY,536384,22189,3,1,4,3,15.8,In Stock,4,4,20


In [10]:
np.random.seed(12345) # Making random function to generate pseudo random numbers
msk = np.random.rand(len(f1)) < 0.7 # Random splitting for dataset with 7:3 
train = f1[msk] # 70% data
test = f1[~msk] # 30% data
train.shape, test.shape # Displaying their rows and columns count

((396, 14), (170, 14))

In [11]:
# Creating a new columns for 'InvoiceDate' to make easier calculations
train['dayofmonth'] = train.InvoiceDate.dt.day
train['dayofyear'] = train.InvoiceDate.dt.dayofyear
train['dayofweek'] = train.InvoiceDate.dt.dayofweek
train['month'] = train.InvoiceDate.dt.month
train['year'] = train.InvoiceDate.dt.year
train['weekofyear'] = train.InvoiceDate.dt.weekofyear
train.head() # Checking new train columns

Unnamed: 0,InvoiceDate,Invoice Time,CustomerID,InvoiceNo,StockCode,Description,Country,Quantity,UnitPrice,Revenue,Items availability,revenue_buckets,price_buckets,final_revenue,dayofmonth,dayofyear,dayofweek,month,year,weekofyear
11,2018-05-26,16:00,AVpe9FXeLJeJML43zHrq,536373,85123A,5,1,6,2,15.3,In Stock,4,3,20,26,146,5,5,2018,21
27,2017-09-28,4:00,AV2Z1Efc-jtxr-f39lm6,536375,85123A,5,1,6,2,15.3,In Stock,4,3,20,28,271,3,9,2017,39
46,2018-05-26,16:00,AVpi9AE_LJeJML43qkYJ,536384,82484,2,1,3,6,19.35,In Stock,4,5,20,26,146,5,5,2018,21
55,2018-05-26,16:00,AVpiSS1A1cnluZ0-LyAY,536384,22189,3,1,4,3,15.8,In Stock,4,4,20,26,146,5,5,2018,21
68,2017-06-12,3:00,AVpfLsb-ilAPnD_xWtDE,536390,85123A,5,1,64,2,163.2,In Stock,5,3,170,12,163,0,6,2017,24


In [12]:
# Creating a new columns for 'InvoiceDate' to make easier calculations
test['dayofmonth'] = test.InvoiceDate.dt.day
test['dayofyear'] = test.InvoiceDate.dt.dayofyear
test['dayofweek'] = test.InvoiceDate.dt.dayofweek
test['month'] = test.InvoiceDate.dt.month
test['year'] = test.InvoiceDate.dt.year
test['weekofyear'] = test.InvoiceDate.dt.weekofyear
test.head() # Checking new test columns


Unnamed: 0,InvoiceDate,Invoice Time,CustomerID,InvoiceNo,StockCode,Description,Country,Quantity,UnitPrice,Revenue,Items availability,revenue_buckets,price_buckets,final_revenue,dayofmonth,dayofyear,dayofweek,month,year,weekofyear
0,2017-12-14,6:00,AVpgMuGwLJeJML43KY_c,536365,85123A,5,1,6,2,15.3,In Stock,4,3,20,14,348,3,12,2017,50
83,2018-04-24,15:00,AWIm0C3TYSSHbkXwx3S6,536396,85123A,5,1,6,2,15.3,In Stock,4,3,20,24,114,1,4,2018,17
282,2017-04-12,17:00,AV2ZzbZWvKc47QAVpILS,536464,84879,8,1,8,1,13.52,Out Of Stock,4,3,20,12,102,2,4,2017,15
425,2018-05-27,2:00,AVpfI64PilAPnD_xVyc4,536536,84879,8,1,80,1,135.2,In Stock,5,3,140,27,147,6,5,2018,21
480,2018-05-26,16:00,AVpfAXof1cnluZ0-bz3u,536557,82484,2,1,1,6,6.45,In Stock,3,5,10,26,146,5,5,2018,21


In [13]:
# Factorising numerical columns for train (an alternative for get_dummy)
train['Country']=pd.factorize(train['Country'])[0]
train['Description']=pd.factorize(train['Description'])[0]
train['final_revenue']=pd.factorize(train['final_revenue'])[0]
train['price_buckets']=pd.factorize(train['price_buckets'])[0]
train['revenue_buckets']=pd.factorize(train['revenue_buckets'])[0]
train['dayofmonth']=pd.factorize(train['dayofmonth'])[0]
train['dayofyear']=pd.factorize(train['dayofyear'])[0]
train['dayofweek']=pd.factorize(train['dayofweek'])[0]
train['month']=pd.factorize(train['month'])[0]
train['year']=pd.factorize(train['year'])[0]
train['weekofyear']=pd.factorize(train['weekofyear'])[0]
train['UnitPrice']=pd.factorize(train['UnitPrice'])[0]

# Dropping categorical columns
train.drop(['InvoiceDate','Invoice Time','CustomerID','InvoiceNo','StockCode','Items availability','Revenue'],axis=1,inplace=True)
train.shape # Checking rows and columns count

(396, 13)

In [14]:
# Factorising numerical columns for test (an alternative for get_dummy)
test['Country']=pd.factorize(test['Country'])[0]
test['Description']=pd.factorize(test['Description'])[0]
test['final_revenue']=pd.factorize(test['final_revenue'])[0]
test['price_buckets']=pd.factorize(test['price_buckets'])[0]
test['revenue_buckets']=pd.factorize(test['revenue_buckets'])[0]
test['dayofmonth']=pd.factorize(test['dayofmonth'])[0]
test['dayofyear']=pd.factorize(test['dayofyear'])[0]
test['dayofweek']=pd.factorize(test['dayofweek'])[0]
test['month']=pd.factorize(test['month'])[0]
test['year']=pd.factorize(test['year'])[0]
test['weekofyear']=pd.factorize(test['weekofyear'])[0]
test['UnitPrice']=pd.factorize(test['UnitPrice'])[0]

# Dropping categorical columns
test.drop(['InvoiceDate','Invoice Time','CustomerID','InvoiceNo','StockCode','Items availability','Revenue'],axis=1,inplace=True)
test.shape # Checking rows and columns count

(170, 13)

In [15]:
X_train = train.drop('Quantity', axis=1).values # Drop the dependent variable
X_test = test.drop('Quantity', axis=1).values   # Drop the dependent variable
y_train = train['Quantity'].values              # Find the dependent variable
y_test = test['Quantity'].values                # Find the dependent variable

In [16]:
import lightgbm as lgb # Using LightGBM as predictive model 
from sklearn.metrics import mean_squared_error # MSE for accuracy

lgb_train = lgb.Dataset(X_train, y_train) # Sending train date
lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train) # Sending test data

params = {'task':'train', 'boosting_type':'gbdt', 'objective':'regression', 
              'metric': {'rmse'}, 'num_leaves': 10, 'learning_rate': 0.05, 
              'feature_fraction': 0.8, 'max_depth': 5, 'verbose': 0, 
              'num_boost_round':20000, 'early_stopping_rounds':5000, 'nthread':-1} # setting model parameters

gbm = lgb.train(params,
                lgb_train,
                num_boost_round=20,
                valid_sets=lgb_eval,
                early_stopping_rounds=5) # sending values to models

# predict
y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration) # predict using test
# eval
print('The rmse of prediction is:', mean_squared_error(y_test, y_pred) ** 0.5) # RMSE for accuracy

[1]	valid_0's rmse: 317.821
Training until validation scores don't improve for 5000 rounds
[2]	valid_0's rmse: 317.042
[3]	valid_0's rmse: 316.555
[4]	valid_0's rmse: 316.147
[5]	valid_0's rmse: 315.768
[6]	valid_0's rmse: 315.46
[7]	valid_0's rmse: 315.171
[8]	valid_0's rmse: 314.943
[9]	valid_0's rmse: 314.925
[10]	valid_0's rmse: 314.732
[11]	valid_0's rmse: 314.777
[12]	valid_0's rmse: 314.282
[13]	valid_0's rmse: 314.382
[14]	valid_0's rmse: 314.38
[15]	valid_0's rmse: 314.287
[16]	valid_0's rmse: 314.389
[17]	valid_0's rmse: 313.995
[18]	valid_0's rmse: 314.118
[19]	valid_0's rmse: 313.739
[20]	valid_0's rmse: 313.907
[21]	valid_0's rmse: 313.559
[22]	valid_0's rmse: 313.741
[23]	valid_0's rmse: 313.419
[24]	valid_0's rmse: 313.137
[25]	valid_0's rmse: 313.31
[26]	valid_0's rmse: 313.04
[27]	valid_0's rmse: 313.033
[28]	valid_0's rmse: 312.801
[29]	valid_0's rmse: 312.989
[30]	valid_0's rmse: 312.785
[31]	valid_0's rmse: 312.797
[32]	valid_0's rmse: 312.83
[33]	valid_0's rmse: 31

[313]	valid_0's rmse: 315.377
[314]	valid_0's rmse: 315.322
[315]	valid_0's rmse: 315.292
[316]	valid_0's rmse: 315.29
[317]	valid_0's rmse: 315.296
[318]	valid_0's rmse: 315.316
[319]	valid_0's rmse: 315.349
[320]	valid_0's rmse: 315.342
[321]	valid_0's rmse: 315.335
[322]	valid_0's rmse: 315.33
[323]	valid_0's rmse: 315.389
[324]	valid_0's rmse: 315.41
[325]	valid_0's rmse: 315.406
[326]	valid_0's rmse: 315.327
[327]	valid_0's rmse: 315.371
[328]	valid_0's rmse: 315.318
[329]	valid_0's rmse: 315.349
[330]	valid_0's rmse: 315.406
[331]	valid_0's rmse: 315.406
[332]	valid_0's rmse: 315.472
[333]	valid_0's rmse: 315.488
[334]	valid_0's rmse: 315.523
[335]	valid_0's rmse: 315.522
[336]	valid_0's rmse: 315.521
[337]	valid_0's rmse: 315.596
[338]	valid_0's rmse: 315.597
[339]	valid_0's rmse: 315.575
[340]	valid_0's rmse: 315.573
[341]	valid_0's rmse: 315.573
[342]	valid_0's rmse: 315.572
[343]	valid_0's rmse: 315.637
[344]	valid_0's rmse: 315.629
[345]	valid_0's rmse: 315.607
[346]	valid_0

[591]	valid_0's rmse: 320.385
[592]	valid_0's rmse: 320.383
[593]	valid_0's rmse: 320.412
[594]	valid_0's rmse: 320.417
[595]	valid_0's rmse: 320.479
[596]	valid_0's rmse: 320.476
[597]	valid_0's rmse: 320.535
[598]	valid_0's rmse: 320.584
[599]	valid_0's rmse: 320.597
[600]	valid_0's rmse: 320.558
[601]	valid_0's rmse: 320.552
[602]	valid_0's rmse: 320.561
[603]	valid_0's rmse: 320.588
[604]	valid_0's rmse: 320.599
[605]	valid_0's rmse: 320.572
[606]	valid_0's rmse: 320.574
[607]	valid_0's rmse: 320.561
[608]	valid_0's rmse: 320.602
[609]	valid_0's rmse: 320.569
[610]	valid_0's rmse: 320.607
[611]	valid_0's rmse: 320.631
[612]	valid_0's rmse: 320.605
[613]	valid_0's rmse: 320.596
[614]	valid_0's rmse: 320.62
[615]	valid_0's rmse: 320.59
[616]	valid_0's rmse: 320.56
[617]	valid_0's rmse: 320.529
[618]	valid_0's rmse: 320.496
[619]	valid_0's rmse: 320.515
[620]	valid_0's rmse: 320.516
[621]	valid_0's rmse: 320.515
[622]	valid_0's rmse: 320.56
[623]	valid_0's rmse: 320.535
[624]	valid_0'

[885]	valid_0's rmse: 322.052
[886]	valid_0's rmse: 322.06
[887]	valid_0's rmse: 322.055
[888]	valid_0's rmse: 322.087
[889]	valid_0's rmse: 322.093
[890]	valid_0's rmse: 322.107
[891]	valid_0's rmse: 322.108
[892]	valid_0's rmse: 322.114
[893]	valid_0's rmse: 322.117
[894]	valid_0's rmse: 322.115
[895]	valid_0's rmse: 322.107
[896]	valid_0's rmse: 322.123
[897]	valid_0's rmse: 322.145
[898]	valid_0's rmse: 322.145
[899]	valid_0's rmse: 322.166
[900]	valid_0's rmse: 322.166
[901]	valid_0's rmse: 322.18
[902]	valid_0's rmse: 322.181
[903]	valid_0's rmse: 322.191
[904]	valid_0's rmse: 322.192
[905]	valid_0's rmse: 322.204
[906]	valid_0's rmse: 322.226
[907]	valid_0's rmse: 322.246
[908]	valid_0's rmse: 322.243
[909]	valid_0's rmse: 322.256
[910]	valid_0's rmse: 322.274
[911]	valid_0's rmse: 322.304
[912]	valid_0's rmse: 322.324
[913]	valid_0's rmse: 322.323
[914]	valid_0's rmse: 322.333
[915]	valid_0's rmse: 322.345
[916]	valid_0's rmse: 322.363
[917]	valid_0's rmse: 322.385
[918]	valid_

[1212]	valid_0's rmse: 323.499
[1213]	valid_0's rmse: 323.489
[1214]	valid_0's rmse: 323.496
[1215]	valid_0's rmse: 323.498
[1216]	valid_0's rmse: 323.504
[1217]	valid_0's rmse: 323.509
[1218]	valid_0's rmse: 323.506
[1219]	valid_0's rmse: 323.519
[1220]	valid_0's rmse: 323.521
[1221]	valid_0's rmse: 323.529
[1222]	valid_0's rmse: 323.524
[1223]	valid_0's rmse: 323.52
[1224]	valid_0's rmse: 323.51
[1225]	valid_0's rmse: 323.518
[1226]	valid_0's rmse: 323.517
[1227]	valid_0's rmse: 323.508
[1228]	valid_0's rmse: 323.512
[1229]	valid_0's rmse: 323.51
[1230]	valid_0's rmse: 323.501
[1231]	valid_0's rmse: 323.49
[1232]	valid_0's rmse: 323.49
[1233]	valid_0's rmse: 323.48
[1234]	valid_0's rmse: 323.48
[1235]	valid_0's rmse: 323.487
[1236]	valid_0's rmse: 323.48
[1237]	valid_0's rmse: 323.494
[1238]	valid_0's rmse: 323.498
[1239]	valid_0's rmse: 323.483
[1240]	valid_0's rmse: 323.483
[1241]	valid_0's rmse: 323.48
[1242]	valid_0's rmse: 323.479
[1243]	valid_0's rmse: 323.466
[1244]	valid_0's 

[1548]	valid_0's rmse: 323.736
[1549]	valid_0's rmse: 323.737
[1550]	valid_0's rmse: 323.74
[1551]	valid_0's rmse: 323.742
[1552]	valid_0's rmse: 323.743
[1553]	valid_0's rmse: 323.742
[1554]	valid_0's rmse: 323.74
[1555]	valid_0's rmse: 323.742
[1556]	valid_0's rmse: 323.737
[1557]	valid_0's rmse: 323.739
[1558]	valid_0's rmse: 323.739
[1559]	valid_0's rmse: 323.748
[1560]	valid_0's rmse: 323.755
[1561]	valid_0's rmse: 323.753
[1562]	valid_0's rmse: 323.754
[1563]	valid_0's rmse: 323.749
[1564]	valid_0's rmse: 323.744
[1565]	valid_0's rmse: 323.752
[1566]	valid_0's rmse: 323.744
[1567]	valid_0's rmse: 323.757
[1568]	valid_0's rmse: 323.755
[1569]	valid_0's rmse: 323.752
[1570]	valid_0's rmse: 323.761
[1571]	valid_0's rmse: 323.764
[1572]	valid_0's rmse: 323.766
[1573]	valid_0's rmse: 323.761
[1574]	valid_0's rmse: 323.76
[1575]	valid_0's rmse: 323.766
[1576]	valid_0's rmse: 323.761
[1577]	valid_0's rmse: 323.759
[1578]	valid_0's rmse: 323.763
[1579]	valid_0's rmse: 323.775
[1580]	vali

[1861]	valid_0's rmse: 324.041
[1862]	valid_0's rmse: 324.037
[1863]	valid_0's rmse: 324.032
[1864]	valid_0's rmse: 324.033
[1865]	valid_0's rmse: 324.028
[1866]	valid_0's rmse: 324.026
[1867]	valid_0's rmse: 324.021
[1868]	valid_0's rmse: 324.018
[1869]	valid_0's rmse: 324.018
[1870]	valid_0's rmse: 324.024
[1871]	valid_0's rmse: 324.026
[1872]	valid_0's rmse: 324.025
[1873]	valid_0's rmse: 324.024
[1874]	valid_0's rmse: 324.018
[1875]	valid_0's rmse: 324.014
[1876]	valid_0's rmse: 324.013
[1877]	valid_0's rmse: 324.006
[1878]	valid_0's rmse: 324.008
[1879]	valid_0's rmse: 324.003
[1880]	valid_0's rmse: 323.999
[1881]	valid_0's rmse: 324.002
[1882]	valid_0's rmse: 324.005
[1883]	valid_0's rmse: 324.001
[1884]	valid_0's rmse: 324.003
[1885]	valid_0's rmse: 324.004
[1886]	valid_0's rmse: 323.999
[1887]	valid_0's rmse: 323.998
[1888]	valid_0's rmse: 323.996
[1889]	valid_0's rmse: 323.998
[1890]	valid_0's rmse: 323.997
[1891]	valid_0's rmse: 323.999
[1892]	valid_0's rmse: 324.001
[1893]	v

[2276]	valid_0's rmse: 323.742
[2277]	valid_0's rmse: 323.741
[2278]	valid_0's rmse: 323.74
[2279]	valid_0's rmse: 323.738
[2280]	valid_0's rmse: 323.737
[2281]	valid_0's rmse: 323.741
[2282]	valid_0's rmse: 323.741
[2283]	valid_0's rmse: 323.741
[2284]	valid_0's rmse: 323.742
[2285]	valid_0's rmse: 323.738
[2286]	valid_0's rmse: 323.739
[2287]	valid_0's rmse: 323.739
[2288]	valid_0's rmse: 323.739
[2289]	valid_0's rmse: 323.738
[2290]	valid_0's rmse: 323.734
[2291]	valid_0's rmse: 323.738
[2292]	valid_0's rmse: 323.74
[2293]	valid_0's rmse: 323.741
[2294]	valid_0's rmse: 323.739
[2295]	valid_0's rmse: 323.737
[2296]	valid_0's rmse: 323.736
[2297]	valid_0's rmse: 323.734
[2298]	valid_0's rmse: 323.733
[2299]	valid_0's rmse: 323.731
[2300]	valid_0's rmse: 323.734
[2301]	valid_0's rmse: 323.733
[2302]	valid_0's rmse: 323.734
[2303]	valid_0's rmse: 323.73
[2304]	valid_0's rmse: 323.727
[2305]	valid_0's rmse: 323.733
[2306]	valid_0's rmse: 323.734
[2307]	valid_0's rmse: 323.733
[2308]	vali

[2622]	valid_0's rmse: 323.688
[2623]	valid_0's rmse: 323.692
[2624]	valid_0's rmse: 323.694
[2625]	valid_0's rmse: 323.696
[2626]	valid_0's rmse: 323.695
[2627]	valid_0's rmse: 323.695
[2628]	valid_0's rmse: 323.694
[2629]	valid_0's rmse: 323.694
[2630]	valid_0's rmse: 323.695
[2631]	valid_0's rmse: 323.692
[2632]	valid_0's rmse: 323.695
[2633]	valid_0's rmse: 323.692
[2634]	valid_0's rmse: 323.694
[2635]	valid_0's rmse: 323.695
[2636]	valid_0's rmse: 323.697
[2637]	valid_0's rmse: 323.694
[2638]	valid_0's rmse: 323.693
[2639]	valid_0's rmse: 323.691
[2640]	valid_0's rmse: 323.685
[2641]	valid_0's rmse: 323.686
[2642]	valid_0's rmse: 323.684
[2643]	valid_0's rmse: 323.683
[2644]	valid_0's rmse: 323.682
[2645]	valid_0's rmse: 323.68
[2646]	valid_0's rmse: 323.682
[2647]	valid_0's rmse: 323.68
[2648]	valid_0's rmse: 323.68
[2649]	valid_0's rmse: 323.681
[2650]	valid_0's rmse: 323.681
[2651]	valid_0's rmse: 323.68
[2652]	valid_0's rmse: 323.68
[2653]	valid_0's rmse: 323.682
[2654]	valid_

[2975]	valid_0's rmse: 323.666
[2976]	valid_0's rmse: 323.668
[2977]	valid_0's rmse: 323.67
[2978]	valid_0's rmse: 323.669
[2979]	valid_0's rmse: 323.67
[2980]	valid_0's rmse: 323.668
[2981]	valid_0's rmse: 323.669
[2982]	valid_0's rmse: 323.669
[2983]	valid_0's rmse: 323.669
[2984]	valid_0's rmse: 323.669
[2985]	valid_0's rmse: 323.668
[2986]	valid_0's rmse: 323.668
[2987]	valid_0's rmse: 323.668
[2988]	valid_0's rmse: 323.668
[2989]	valid_0's rmse: 323.666
[2990]	valid_0's rmse: 323.664
[2991]	valid_0's rmse: 323.665
[2992]	valid_0's rmse: 323.666
[2993]	valid_0's rmse: 323.664
[2994]	valid_0's rmse: 323.662
[2995]	valid_0's rmse: 323.663
[2996]	valid_0's rmse: 323.662
[2997]	valid_0's rmse: 323.663
[2998]	valid_0's rmse: 323.661
[2999]	valid_0's rmse: 323.661
[3000]	valid_0's rmse: 323.663
[3001]	valid_0's rmse: 323.662
[3002]	valid_0's rmse: 323.662
[3003]	valid_0's rmse: 323.661
[3004]	valid_0's rmse: 323.659
[3005]	valid_0's rmse: 323.659
[3006]	valid_0's rmse: 323.662
[3007]	val

[3319]	valid_0's rmse: 323.711
[3320]	valid_0's rmse: 323.712
[3321]	valid_0's rmse: 323.711
[3322]	valid_0's rmse: 323.713
[3323]	valid_0's rmse: 323.713
[3324]	valid_0's rmse: 323.714
[3325]	valid_0's rmse: 323.713
[3326]	valid_0's rmse: 323.713
[3327]	valid_0's rmse: 323.713
[3328]	valid_0's rmse: 323.714
[3329]	valid_0's rmse: 323.715
[3330]	valid_0's rmse: 323.717
[3331]	valid_0's rmse: 323.717
[3332]	valid_0's rmse: 323.716
[3333]	valid_0's rmse: 323.716
[3334]	valid_0's rmse: 323.716
[3335]	valid_0's rmse: 323.715
[3336]	valid_0's rmse: 323.715
[3337]	valid_0's rmse: 323.714
[3338]	valid_0's rmse: 323.713
[3339]	valid_0's rmse: 323.713
[3340]	valid_0's rmse: 323.714
[3341]	valid_0's rmse: 323.714
[3342]	valid_0's rmse: 323.716
[3343]	valid_0's rmse: 323.715
[3344]	valid_0's rmse: 323.713
[3345]	valid_0's rmse: 323.713
[3346]	valid_0's rmse: 323.712
[3347]	valid_0's rmse: 323.711
[3348]	valid_0's rmse: 323.711
[3349]	valid_0's rmse: 323.711
[3350]	valid_0's rmse: 323.709
[3351]	v

[3639]	valid_0's rmse: 323.735
[3640]	valid_0's rmse: 323.734
[3641]	valid_0's rmse: 323.736
[3642]	valid_0's rmse: 323.735
[3643]	valid_0's rmse: 323.735
[3644]	valid_0's rmse: 323.736
[3645]	valid_0's rmse: 323.734
[3646]	valid_0's rmse: 323.733
[3647]	valid_0's rmse: 323.733
[3648]	valid_0's rmse: 323.732
[3649]	valid_0's rmse: 323.732
[3650]	valid_0's rmse: 323.732
[3651]	valid_0's rmse: 323.733
[3652]	valid_0's rmse: 323.732
[3653]	valid_0's rmse: 323.732
[3654]	valid_0's rmse: 323.731
[3655]	valid_0's rmse: 323.731
[3656]	valid_0's rmse: 323.732
[3657]	valid_0's rmse: 323.732
[3658]	valid_0's rmse: 323.733
[3659]	valid_0's rmse: 323.734
[3660]	valid_0's rmse: 323.735
[3661]	valid_0's rmse: 323.734
[3662]	valid_0's rmse: 323.734
[3663]	valid_0's rmse: 323.735
[3664]	valid_0's rmse: 323.735
[3665]	valid_0's rmse: 323.734
[3666]	valid_0's rmse: 323.735
[3667]	valid_0's rmse: 323.736
[3668]	valid_0's rmse: 323.737
[3669]	valid_0's rmse: 323.737
[3670]	valid_0's rmse: 323.737
[3671]	v

[3989]	valid_0's rmse: 323.76
[3990]	valid_0's rmse: 323.761
[3991]	valid_0's rmse: 323.762
[3992]	valid_0's rmse: 323.761
[3993]	valid_0's rmse: 323.761
[3994]	valid_0's rmse: 323.761
[3995]	valid_0's rmse: 323.761
[3996]	valid_0's rmse: 323.76
[3997]	valid_0's rmse: 323.763
[3998]	valid_0's rmse: 323.764
[3999]	valid_0's rmse: 323.764
[4000]	valid_0's rmse: 323.765
[4001]	valid_0's rmse: 323.765
[4002]	valid_0's rmse: 323.765
[4003]	valid_0's rmse: 323.765
[4004]	valid_0's rmse: 323.764
[4005]	valid_0's rmse: 323.763
[4006]	valid_0's rmse: 323.763
[4007]	valid_0's rmse: 323.762
[4008]	valid_0's rmse: 323.762
[4009]	valid_0's rmse: 323.762
[4010]	valid_0's rmse: 323.762
[4011]	valid_0's rmse: 323.763
[4012]	valid_0's rmse: 323.764
[4013]	valid_0's rmse: 323.765
[4014]	valid_0's rmse: 323.766
[4015]	valid_0's rmse: 323.766
[4016]	valid_0's rmse: 323.766
[4017]	valid_0's rmse: 323.767
[4018]	valid_0's rmse: 323.766
[4019]	valid_0's rmse: 323.767
[4020]	valid_0's rmse: 323.767
[4021]	val

[4311]	valid_0's rmse: 323.802
[4312]	valid_0's rmse: 323.802
[4313]	valid_0's rmse: 323.802
[4314]	valid_0's rmse: 323.801
[4315]	valid_0's rmse: 323.802
[4316]	valid_0's rmse: 323.801
[4317]	valid_0's rmse: 323.801
[4318]	valid_0's rmse: 323.8
[4319]	valid_0's rmse: 323.8
[4320]	valid_0's rmse: 323.798
[4321]	valid_0's rmse: 323.798
[4322]	valid_0's rmse: 323.799
[4323]	valid_0's rmse: 323.799
[4324]	valid_0's rmse: 323.799
[4325]	valid_0's rmse: 323.8
[4326]	valid_0's rmse: 323.8
[4327]	valid_0's rmse: 323.799
[4328]	valid_0's rmse: 323.8
[4329]	valid_0's rmse: 323.799
[4330]	valid_0's rmse: 323.8
[4331]	valid_0's rmse: 323.8
[4332]	valid_0's rmse: 323.8
[4333]	valid_0's rmse: 323.8
[4334]	valid_0's rmse: 323.799
[4335]	valid_0's rmse: 323.799
[4336]	valid_0's rmse: 323.799
[4337]	valid_0's rmse: 323.799
[4338]	valid_0's rmse: 323.798
[4339]	valid_0's rmse: 323.798
[4340]	valid_0's rmse: 323.798
[4341]	valid_0's rmse: 323.797
[4342]	valid_0's rmse: 323.797
[4343]	valid_0's rmse: 323

[4638]	valid_0's rmse: 323.797
[4639]	valid_0's rmse: 323.797
[4640]	valid_0's rmse: 323.797
[4641]	valid_0's rmse: 323.796
[4642]	valid_0's rmse: 323.795
[4643]	valid_0's rmse: 323.795
[4644]	valid_0's rmse: 323.795
[4645]	valid_0's rmse: 323.795
[4646]	valid_0's rmse: 323.795
[4647]	valid_0's rmse: 323.796
[4648]	valid_0's rmse: 323.796
[4649]	valid_0's rmse: 323.795
[4650]	valid_0's rmse: 323.795
[4651]	valid_0's rmse: 323.795
[4652]	valid_0's rmse: 323.794
[4653]	valid_0's rmse: 323.795
[4654]	valid_0's rmse: 323.795
[4655]	valid_0's rmse: 323.796
[4656]	valid_0's rmse: 323.796
[4657]	valid_0's rmse: 323.795
[4658]	valid_0's rmse: 323.796
[4659]	valid_0's rmse: 323.796
[4660]	valid_0's rmse: 323.796
[4661]	valid_0's rmse: 323.795
[4662]	valid_0's rmse: 323.795
[4663]	valid_0's rmse: 323.796
[4664]	valid_0's rmse: 323.796
[4665]	valid_0's rmse: 323.795
[4666]	valid_0's rmse: 323.796
[4667]	valid_0's rmse: 323.797
[4668]	valid_0's rmse: 323.797
[4669]	valid_0's rmse: 323.797
[4670]	v

[4942]	valid_0's rmse: 323.809
[4943]	valid_0's rmse: 323.809
[4944]	valid_0's rmse: 323.81
[4945]	valid_0's rmse: 323.809
[4946]	valid_0's rmse: 323.809
[4947]	valid_0's rmse: 323.809
[4948]	valid_0's rmse: 323.81
[4949]	valid_0's rmse: 323.81
[4950]	valid_0's rmse: 323.81
[4951]	valid_0's rmse: 323.811
[4952]	valid_0's rmse: 323.811
[4953]	valid_0's rmse: 323.811
[4954]	valid_0's rmse: 323.811
[4955]	valid_0's rmse: 323.812
[4956]	valid_0's rmse: 323.811
[4957]	valid_0's rmse: 323.811
[4958]	valid_0's rmse: 323.811
[4959]	valid_0's rmse: 323.812
[4960]	valid_0's rmse: 323.811
[4961]	valid_0's rmse: 323.811
[4962]	valid_0's rmse: 323.811
[4963]	valid_0's rmse: 323.811
[4964]	valid_0's rmse: 323.811
[4965]	valid_0's rmse: 323.811
[4966]	valid_0's rmse: 323.811
[4967]	valid_0's rmse: 323.811
[4968]	valid_0's rmse: 323.811
[4969]	valid_0's rmse: 323.811
[4970]	valid_0's rmse: 323.811
[4971]	valid_0's rmse: 323.812
[4972]	valid_0's rmse: 323.811
[4973]	valid_0's rmse: 323.811
[4974]	valid

In [17]:
train.describe()

Unnamed: 0,Description,Country,Quantity,UnitPrice,revenue_buckets,price_buckets,final_revenue,dayofmonth,dayofyear,dayofweek,month,year,weekofyear
count,396.0,396.0,396.0,396.0,396.0,396.0,396.0,396.0,396.0,396.0,396.0,396.0,396.0
mean,2.926768,0.103535,32.691919,3.19697,0.964646,1.156566,5.651515,10.565657,36.012626,2.406566,5.073232,0.674242,14.305556
std,2.619729,0.351327,104.610772,2.851887,0.94869,0.783198,8.40312,8.948982,32.375367,2.126114,3.814986,0.544192,12.152411
min,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,2.0,0.0,0.0,1.0,0.0,2.0,7.0,0.0,1.0,0.0,4.0
50%,3.0,0.0,6.0,3.0,1.0,1.0,3.0,9.0,29.0,2.0,5.0,1.0,12.0
75%,5.0,0.0,12.0,6.0,1.0,2.0,7.0,19.0,59.25,4.0,9.0,1.0,22.0
max,9.0,2.0,1008.0,9.0,4.0,3.0,39.0,30.0,108.0,6.0,11.0,3.0,45.0


In [18]:
test.describe()

Unnamed: 0,Description,Country,Quantity,UnitPrice,revenue_buckets,price_buckets,final_revenue,dayofmonth,dayofyear,dayofweek,month,year,weekofyear
count,170.0,170.0,170.0,170.0,170.0,170.0,170.0,170.0,170.0,170.0,170.0,170.0,170.0
mean,2.323529,0.105882,50.752941,2.135294,1.076471,1.111765,4.529412,10.964706,29.488235,3.1,4.288235,0.441176,13.647059
std,2.404638,0.308596,318.821161,2.425076,1.125303,0.780216,5.520458,8.392653,23.643866,1.876828,2.808352,0.543448,10.794702
min,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,2.0,0.0,0.0,1.0,0.0,4.0,7.25,1.0,2.0,0.0,3.0
50%,1.0,0.0,6.0,1.0,1.0,1.0,2.0,8.0,22.5,4.0,3.0,0.0,11.0
75%,4.0,0.0,12.0,4.0,1.0,2.0,8.0,17.0,50.75,4.0,6.0,1.0,22.0
max,9.0,1.0,3906.0,10.0,4.0,3.0,23.0,29.0,77.0,6.0,11.0,2.0,37.0


In [19]:
# Array:Description,Country,UnitPrice,revenue_buckets,price_buckets,final_revenue,dayofmonth,dayofyear,dayofweek,month,
# year,weekofyear
X_prediction=np.array([[1,1,3,4,5,16,7,8,9,11,2018,30]]) # new values
predictions = gbm.predict(X_prediction) # predictive function
predictions # display 'Quantity'value 

array([121.37791008])