In [313]:
###Importing Libraries
import pandas as pd
import numpy as np
import xgboost

from sklearn.model_selection import cross_val_score,KFold
from sklearn.cross_validation import  train_test_split
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import r2_score
from xgboost import XGBRegressor
from sklearn import grid_search

In [314]:
##Loading Training and Testing Data
train_data=pd.read_csv("train.csv")
test_data=pd.read_csv("test.csv")
train_data

Unnamed: 0,Name,Date of Birth,From,To,Flight Date,Flight Time,Booking Date,Class,Fare
0,Dr. FG61 FS88,1963-10-23,Mumbai,Kolkata,2016-11-04,13:50,2016-10-19,Business,14737.67
1,Dr. FG35 FS74,1981-10-09,Lucknow,Hyderabad,2016-09-19,18:30,2016-07-31,Economy,7621.71
2,Dr. MG45 MS99,1981-07-09,Lucknow,Patna,2016-05-27,10:50,2016-05-24,Economy,5655.81
3,Mr. MG93 MS13,1988-09-08,Mumbai,Lucknow,2016-07-24,14:40,2016-07-16,Economy,7436.94
4,Miss FG28 FS14,1994-11-18,Kolkata,Delhi,2016-12-21,10:20,2016-12-09,Economy,3282.54
5,Miss FG66 FS95,2005-11-08,Chennai,Patna,2016-05-17,14:10,2016-05-07,Business,22058.03
6,Mr. MG71 MS64,1948-09-11,Mumbai,Lucknow,2016-05-25,11:30,2016-05-20,Business,13321.88
7,Dr. MG32 MS6,1943-07-19,Chennai,Mumbai,2016-05-20,18:00,2016-04-26,Economy,5290.83
8,Mr. MG69 MS30,1972-05-15,Delhi,Lucknow,2016-11-27,12:30,2016-11-27,Economy,6402.71
9,Mr. MG14 MS53,1949-10-10,Delhi,Mumbai,2016-04-10,12:00,2016-04-09,Economy,7343.52


In [315]:
###Getting Labels for Class of Travel
train_data.Class = pd.Series( np.where( train_data.Class == 'Economy' , 0 , 1 ) , name = 'Class' )
test_data.Class = pd.Series( np.where( test_data.Class == 'Economy' , 0 , 1 ) , name = 'Class' )


###Mapping Destination and Source Cities to Numbers
mapping = {'Chennai':1  , 'Delhi':2, 'Hyderabad':3, 'Kolkata':4, 'Lucknow':5, 'Mumbai':6, 'Patna':7}
train_data=train_data.replace({'To': mapping, 'From': mapping})
test_data=test_data.replace({'To': mapping, 'From': mapping})

###Converting Flight Date, Booking Date to datetime and getting Age

train_data["Flight Date"] = pd.to_datetime(train_data["Flight Date"])
train_data["Date of Birth"] = pd.to_datetime(train_data["Date of Birth"])
train_data["Age"]=(train_data["Flight Date"]-train_data["Date of Birth"])/365.25
train_data["Age"]=train_data["Age"].dt.days

test_data["Flight Date"] = pd.to_datetime(test_data["Flight Date"])
test_data["Date of Birth"] = pd.to_datetime(test_data["Date of Birth"])
test_data["Age"]=(test_data["Flight Date"]-test_data["Date of Birth"])/365.25
test_data["Age"]=test_data["Age"].dt.days

test_data["Booking Date"] = pd.to_datetime(test_data["Booking Date"])
train_data["Booking Date"] = pd.to_datetime(train_data["Booking Date"])


###Getting the weekday from Flight Date and Booking Date
train_data['Flight Day'] = train_data['Flight Date'].apply(lambda x: x.weekday())
test_data['Flight Day'] = test_data['Flight Date'].apply(lambda x: x.weekday())

train_data['Booking Day'] = train_data['Booking Date'].apply(lambda x: x.weekday())
test_data['Booking Day'] = test_data['Booking Date'].apply(lambda x: x.weekday())

###Dividing the Weekday into Two Categories
#bins = [-1, 3, 7]
#names = [0, 1]

#train_data['WeekDay']=pd.cut(train_data['Flight Day'], bins, labels=names)
#test_data['WeekDay']=pd.cut(test_data['Flight Day'], bins, labels=names)
#train_data['WeekDay']=pd.to_numeric(train_data['WeekDay'])
#test_data['WeekDay']=pd.to_numeric(test_data['WeekDay'])



##Removing Name, DateofBirth after extracting Gender and Two Numbers
### Adding Gender to Data
X_trn = []
for i in train_data['Name']:
    c1 = i.split(' ')[1][0]
    if(c1 is 'M'):
        X_trn.append(1)
    else:
        X_trn.append(0)

X_tst = []
for i in test_data['Name']:
    c1 = i.split(' ')[1][0]
    if(c1 is 'M'):
        X_tst.append(1)
    else:
        X_tst.append(0)


train_data['Gender'] = X_trn
test_data['Gender'] = X_tst


#train_data=train_data.drop(columns=['Name', 'Date of Birth'])
#test_data=test_data.drop(columns=['Name', 'Date of Birth'])


###Getting the month of booking and travel
train_data["Flight Month"] = train_data["Flight Date"].dt.month
train_data["Booking Month"] = train_data["Booking Date"].dt.month

test_data["Flight Month"] = test_data["Flight Date"].dt.month
test_data["Booking Month"] = test_data["Booking Date"].dt.month

###Getting the Weekday of booking and travel
train_data["Flight Week"] = train_data["Flight Date"].dt.week
train_data["Booking Week"] = train_data["Booking Date"].dt.week

test_data["Flight Week"] = test_data["Flight Date"].dt.week
test_data["Booking Week"] = test_data["Booking Date"].dt.week



###Finding the travel time
train_data['Flight Time'] = train_data['Flight Time'].str.extract('(\d\d)',expand=True)
test_data['Flight Time'] = test_data['Flight Time'].str.extract('(\d\d)',expand=True)


train_data['Flight Time']=pd.to_numeric(train_data['Flight Time'])
test_data['Flight Time']=pd.to_numeric(test_data['Flight Time'])



#### Getting the Day diff between the booking of Flight and travel
train_data['Diff']=(train_data['Flight Date']-train_data['Booking Date'])
test_data['Diff']=(test_data['Flight Date']-test_data['Booking Date'])
train_data['Diff']=train_data['Diff'].dt.days
test_data['Diff']=test_data['Diff'].dt.days


###Dividing the Age into Brackets
bins = [0, 5, 18, 35, 65, 100]
names = [0, 1, 2, 3, 4]

train_data['AgeRange'] = pd.cut(train_data['Age'], bins, labels=names)
test_data['AgeRange'] = pd.cut(test_data['Age'], bins, labels=names)

train_data=train_data.drop(columns=['Age'])
test_data=test_data.drop(columns=['Age'])



#####
train_data["AgeRange"]=pd.to_numeric(train_data["AgeRange"])
test_data["AgeRange"]=pd.to_numeric(test_data["AgeRange"])





#### Weekend Feature 
X = []
for i in train_data['Flight Day']:
    if(i==6 or i==5):
        X.append(1)
    else:
        X.append(0)
        
train_data['Weekend'] = X

X = []
for i in test_data['Flight Day']:
    if(i==6 or i==5):
        X.append(1)
    else:
        X.append(0)
        
test_data['Weekend'] = X

### Peak Time Feature 

X = []
for i in train_data['Flight Time']:
    if(i>=18 and i<=23):
        X.append(5)
    elif(i>=12 and i<18):
        X.append(3)
    else:
        X.append(1)
        
train_data['PeakTime'] = X

X = []
for i in test_data['Flight Time']:
    if(i>=18 and i<=23):
        X.append(5)
    elif(i>=12 and i<18):
        X.append(3)
    else:
        X.append(1)

test_data['PeakTime'] = X

### holidays feature 

holidays = ['2016-01-01','2016-03-07','2016-04-15','2016-05-21','2016-08-15','2016-09-05','2016-10-02','2016-10-30','2016-01-14','2016-03-24','2016-04-20','2016-07-06','2016-08-18','2016-09-12','2016-10-11','2016-12-13','2016-01-26','2016-03-25','2016-05-01','2016-07-06','2016-08-25','2016-09-14','2016-10-12','2016-12-25']
df = pd.to_datetime(holidays)
dr = train_data['Flight Date']

X = []
for i in dr:
    if(i in df):
        X.append(1)
    else:
        X.append(0)

train_data['holidays'] = X

dr = test_data['Flight Date']

X = []
for i in dr:
    if(i in df):
        X.append(1)
    else:
        X.append(0)

test_data['holidays'] = X




















In [316]:
train_data

Unnamed: 0,Name,Date of Birth,From,To,Flight Date,Flight Time,Booking Date,Class,Fare,Flight Day,...,FromD,FromM,FromL,ToC,ToK,ToH,ToP,ToD,ToM,ToL
0,Dr. FG61 FS88,1963-10-23,6,4,2016-11-04,13,2016-10-19,1,14737.67,4,...,0,0,0,0,0,0,0,0,0,0
1,Dr. FG35 FS74,1981-10-09,5,3,2016-09-19,18,2016-07-31,0,7621.71,0,...,0,0,0,0,0,0,0,0,0,0
2,Dr. MG45 MS99,1981-07-09,5,7,2016-05-27,10,2016-05-24,0,5655.81,4,...,0,0,0,0,0,0,0,0,0,0
3,Mr. MG93 MS13,1988-09-08,6,5,2016-07-24,14,2016-07-16,0,7436.94,6,...,0,0,0,0,0,0,0,0,0,0
4,Miss FG28 FS14,1994-11-18,4,2,2016-12-21,10,2016-12-09,0,3282.54,2,...,0,0,0,0,0,0,0,0,0,0
5,Miss FG66 FS95,2005-11-08,1,7,2016-05-17,14,2016-05-07,1,22058.03,1,...,0,0,0,0,0,0,0,0,0,0
6,Mr. MG71 MS64,1948-09-11,6,5,2016-05-25,11,2016-05-20,1,13321.88,2,...,0,0,0,0,0,0,0,0,0,0
7,Dr. MG32 MS6,1943-07-19,1,6,2016-05-20,18,2016-04-26,0,5290.83,4,...,0,0,0,0,0,0,0,0,0,0
8,Mr. MG69 MS30,1972-05-15,2,5,2016-11-27,12,2016-11-27,0,6402.71,6,...,0,0,0,0,0,0,0,0,0,0
9,Mr. MG14 MS53,1949-10-10,2,6,2016-04-10,12,2016-04-09,0,7343.52,6,...,0,0,0,0,0,0,0,0,0,0


In [317]:

###Getting the X and Y values
X = train_data[['From', 'To', 'Gender','Flight Week','Class','Diff','AgeRange' ,'PeakTime', 'Weekend','holidays']]
y=train_data[['Fare']]
test_data=test_data[['From', 'To','Gender','Flight Week','Class','Diff','AgeRange','PeakTime', 'Weekend','holidays']]
###Data is prepared
####Train test Split
X_train, X_Val, y_train, Y_Val = train_test_split(X, y)


In [318]:
###Basic Linear Regression
from sklearn.linear_model import LinearRegression
reg = LinearRegression().fit(X_train, y_train)
print(r2_score(Y_Val, reg.predict(X_Val))) 

0.4914744825107158


In [319]:
####Lasso Regression
from sklearn import linear_model
clf = linear_model.Lasso(alpha=0.5)
clf.fit(X_train,y_train)
print(r2_score(Y_Val, clf.predict(X_Val))) 

0.49150362989395346


In [312]:
####XG Boost
xgb = xgboost.XGBRegressor(n_estimators=750, learning_rate=0.09, gamma=0.05, subsample=0.85,
                           colsample_bytree=1, max_depth=4)

xgb.fit(X,y)
#print(xgb.score(X_Val,Y_Val))
y_test_xgb=xgb.predict(test_data)
prediction = pd.DataFrame(y_test_xgb, columns=['predictions']).to_csv('XGBoostFinal_hyper_2.csv')

In [274]:
#####Grid Search GBM
from sklearn.ensemble import GradientBoostingRegressor 

parameters={'min_impurity_split':[1e-1],'learning_rate':[1e-1],'min_samples_split':[7],'verbose':[2],'max_depth':[7],'min_samples_leaf':[1],'subsample':[1.0],'loss':['ls'],'n_estimators':[100]}
gbm = grid_search.GridSearchCV(GradientBoostingRegressor(),parameters) 
gbm.fit(X_train,y_train)
print(r2_score(Y_Val, gbm.predict(X_Val))) 
#y_test_gbm=gbm.predict(test_data)
#prediction = pd.DataFrame(y_test_gbm, columns=['predictions']).to_csv('GBMFinal.csv')


  y = column_or_1d(y, warn=True)


      Iter       Train Loss   Remaining Time 
         1    38412129.3367            0.73s
         2    33102327.2030            0.73s
         3    28783055.9371            0.73s
         4    25233381.3875            0.72s
         5    22260204.9662            0.77s
         6    19704897.8275            0.77s
         7    17688113.2732            0.76s
         8    16009416.7769            0.74s
         9    14541976.4151            0.73s
        10    13356801.2095            0.71s
        11    12276362.2041            0.70s
        12    11402110.6023            0.69s
        13    10652068.5925            0.68s
        14     9982839.2968            0.67s
        15     9444907.3183            0.65s
        16     8891483.3392            0.64s
        17     8476740.7329            0.64s
        18     8113057.8143            0.63s
        19     7732410.5421            0.62s
        20     7335405.4816            0.61s
        21     7063451.2020            0.60s
        2



        27     5346402.7378            0.57s
        28     5120290.4790            0.57s
        29     4918322.8983            0.57s
        30     4734277.6097            0.56s
        31     4570637.2294            0.55s
        32     4422403.4669            0.54s
        33     4234141.1637            0.54s
        34     4140178.1929            0.53s
        35     4021786.2975            0.52s
        36     3931663.2917            0.51s
        37     3825668.9009            0.50s
        38     3696960.3280            0.49s
        39     3597966.7585            0.48s
        40     3499604.0381            0.47s
        41     3413200.0947            0.46s
        42     3345840.9877            0.45s
        43     3283823.5722            0.45s
        44     3109456.0755            0.44s
        45     3018920.1341            0.43s
        46     2971275.0706            0.42s
        47     2829017.5688            0.42s
        48     2739472.9752            0.41s
        49



        53     2455366.6251            0.37s
        54     2411439.7054            0.36s
        55     2365436.9264            0.36s
        56     2323992.5923            0.35s
        57     2256778.0356            0.34s
        58     2224413.2589            0.33s
        59     2170467.7361            0.33s
        60     2134477.8609            0.32s
        61     2117675.2784            0.31s
        62     2097899.9485            0.30s
        63     2053570.8210            0.29s
        64     2028649.6618            0.28s
        65     1993935.4949            0.28s
        66     1955381.1844            0.27s
        67     1947014.8359            0.26s
        68     1913405.0880            0.25s
        69     1888192.6836            0.24s
        70     1865393.1030            0.24s
        71     1850795.8203            0.23s
        72     1840557.0511            0.22s
        73     1808599.0674            0.21s
        74     1781748.0853            0.20s
        75

  y = column_or_1d(y, warn=True)


        80     1684027.5400            0.16s
        81     1667760.1427            0.15s
        82     1663244.0491            0.14s
        83     1653309.2817            0.13s
        84     1640195.8409            0.12s
        85     1618551.5663            0.12s
        86     1603574.3437            0.11s
        87     1578006.9894            0.10s
        88     1552776.9117            0.09s
        89     1536866.0641            0.08s
        90     1524471.7743            0.08s
        91     1516692.6416            0.07s
        92     1490620.9249            0.06s
        93     1477247.4424            0.05s
        94     1470407.0386            0.05s
        95     1445885.8558            0.04s
        96     1427482.1206            0.03s
        97     1422715.0735            0.02s
        98     1415781.9383            0.02s
        99     1403801.1927            0.01s
       100     1397518.4305            0.00s
      Iter       Train Loss   Remaining Time 
         



         8    16547518.1802            0.69s
         9    15106036.8946            0.67s
        10    13844238.3122            0.66s
        11    12815875.3837            0.64s
        12    11944618.0821            0.63s
        13    11172084.2887            0.61s
        14    10369954.1712            0.60s
        15     9815237.5395            0.59s
        16     9263740.7489            0.57s
        17     8821169.0920            0.56s
        18     8338145.3711            0.55s
        19     7946207.8535            0.54s
        20     7535507.3591            0.53s
        21     7155927.6699            0.52s
        22     6881791.4122            0.51s
        23     6570847.2549            0.50s
        24     6327853.1426            0.49s
        25     6074897.2670            0.49s
        26     5872656.6447            0.48s
        27     5693303.7436            0.47s
        28     5552712.1313            0.46s
        29     5361430.9955            0.45s
        30



        42     3552084.2256            0.36s
        43     3467461.6444            0.36s
        44     3399825.9824            0.35s
        45     3341290.5438            0.35s
        46     3188181.3509            0.34s
        47     3107777.1100            0.33s
        48     3061027.3577            0.33s
        49     3005610.3503            0.32s
        50     2932388.4173            0.31s
        51     2895856.3856            0.31s
        52     2794121.2442            0.30s
        53     2711337.1471            0.30s
        54     2615721.6229            0.29s
        55     2572040.9497            0.29s
        56     2478222.1539            0.28s
        57     2428717.8456            0.28s
        58     2396600.5945            0.27s
        59     2356382.4338            0.27s
        60     2284389.7711            0.26s
        61     2259446.5986            0.26s
        62     2228405.0208            0.25s
        63     2169365.8867            0.25s
        64



        69     1988441.1467            0.21s
        70     1963095.3007            0.21s
        71     1938598.5936            0.20s
        72     1892066.9146            0.19s
        73     1855500.8060            0.19s
        74     1836834.6843            0.18s
        75     1804646.8575            0.17s
        76     1778417.5631            0.17s
        77     1749251.6522            0.16s
        78     1743613.0684            0.15s
        79     1727364.7782            0.15s
        80     1698655.7583            0.14s
        81     1687430.9306            0.13s
        82     1666549.8199            0.12s
        83     1649204.1936            0.12s
        84     1629962.4585            0.11s
        85     1604050.5285            0.10s
        86     1587909.5467            0.10s
        87     1575071.8248            0.09s
        88     1549648.8859            0.08s
        89     1533747.5758            0.08s
        90     1517324.7708            0.07s
        91

  y = column_or_1d(y, warn=True)


       100     1353911.7019            0.00s
      Iter       Train Loss   Remaining Time 
         1    34481063.4152            0.79s
         2    30082795.3603            0.83s
         3    26383940.8966            0.81s
         4    23352048.4781            0.80s
         5    20908937.5818            0.81s
         6    18773959.3421            0.81s
         7    16963297.9655            0.80s
         8    15467142.8161            0.80s
         9    14125926.1761            0.80s
        10    12983123.9884            0.79s
        11    12076496.9621            0.79s
        12    11319050.8927            0.79s
        13    10637395.2246            0.78s
        14    10062301.7237            0.77s
        15     9547762.5322            0.76s
        16     9065738.1203            0.75s
        17     8611575.3157            0.74s
        18     8153083.3689            0.73s
        19     7691421.9528            0.73s
        20     7356554.9372            0.72s




        21     6988989.6883            0.71s
        22     6694335.5858            0.71s
        23     6417027.0985            0.70s
        24     6131988.0691            0.69s
        25     5893417.4687            0.68s
        26     5687556.9881            0.67s
        27     5426859.8184            0.66s
        28     5283102.9329            0.65s
        29     5095804.6357            0.64s
        30     4949646.6222            0.63s
        31     4829126.9809            0.62s
        32     4694877.0187            0.61s
        33     4552710.7416            0.60s
        34     4365594.2248            0.59s
        35     4221705.0252            0.58s
        36     4070671.6588            0.57s
        37     3854766.0573            0.56s
        38     3777728.2657            0.55s
        39     3687368.7129            0.54s
        40     3579947.1849            0.53s
        41     3501555.3226            0.52s
        42     3423664.1048            0.51s
        43



        45     3087662.4007            0.48s
        46     2998446.6996            0.47s
        47     2910435.0410            0.46s
        48     2857078.2954            0.45s
        49     2774145.0373            0.44s
        50     2733790.9646            0.43s
        51     2665259.6998            0.42s
        52     2549000.3632            0.42s
        53     2511372.7789            0.41s
        54     2470274.9210            0.40s
        55     2380178.2921            0.39s
        56     2341454.3780            0.38s
        57     2262268.1048            0.37s
        58     2250753.7564            0.36s
        59     2208728.4722            0.35s
        60     2199141.0105            0.34s
        61     2125115.9656            0.33s
        62     2102622.8382            0.32s
        63     2095511.9371            0.31s
        64     2079423.6743            0.30s
        65     2050348.3958            0.29s
        66     1992318.0557            0.29s
        67



        71     1817539.3969            0.24s
        72     1779546.5608            0.24s
        73     1757656.8882            0.23s
        74     1752563.1123            0.22s
        75     1723918.0393            0.21s
        76     1687885.9302            0.20s
        77     1671920.7562            0.19s
        78     1647339.0294            0.19s
        79     1620393.4246            0.18s
        80     1599135.0278            0.17s
        81     1570733.1859            0.16s
        82     1566965.9756            0.15s
        83     1543150.2973            0.14s
        84     1518118.9108            0.13s
        85     1488238.8494            0.12s
        86     1468868.2802            0.12s
        87     1441958.9259            0.11s
        88     1424729.2889            0.10s
        89     1399558.6029            0.09s
        90     1381446.5203            0.08s
        91     1360387.2996            0.07s
        92     1345062.7912            0.07s
        93

  y = column_or_1d(y, warn=True)


        97     1275545.1911            0.02s
        98     1262786.5347            0.02s
        99     1247368.2012            0.01s
       100     1236804.3634            0.00s
      Iter       Train Loss   Remaining Time 
         1    37449696.7252            1.05s
         2    32573165.4286            1.05s
         3    28559454.2114            1.04s
         4    25185140.2076            1.04s
         5    22410889.3537            1.02s
         6    20081207.7301            1.02s
         7    18151562.6756            1.01s
         8    16560676.9961            1.01s
         9    15198360.9292            1.00s
        10    14109690.5348            0.99s
        11    13201306.6576            0.98s
        12    12390352.2846            0.98s
        13    11477553.0642            0.97s




        14    10910909.9083            0.98s
        15    10324928.7287            0.98s
        16     9872862.4624            0.98s
        17     9485485.3933            0.98s
        18     9101020.8534            0.96s
        19     8772001.6207            0.95s
        20     8407057.9243            0.93s
        21     7946132.7880            0.92s
        22     7694481.8283            0.90s
        23     7347237.9053            0.89s
        24     7013240.0929            0.88s
        25     6801343.7961            0.87s
        26     6572243.4125            0.86s
        27     6368757.3356            0.84s
        28     6163682.7731            0.83s
        29     5997330.7023            0.82s
        30     5847043.8467            0.81s
        31     5662377.7900            0.80s




        32     5522055.4524            0.78s
        33     5340639.9638            0.77s
        34     5129342.4786            0.76s
        35     4967833.0598            0.74s
        36     4850724.8169            0.73s
        37     4777449.9859            0.72s
        38     4624535.6618            0.71s
        39     4563311.4802            0.69s
        40     4278064.5661            0.69s
        41     4227419.9174            0.67s
        42     4163662.7516            0.66s
        43     3939794.3596            0.65s
        44     3755881.9881            0.64s
        45     3698064.7727            0.63s
        46     3599543.5416            0.62s
        47     3550877.2261            0.61s
        48     3479508.3188            0.60s
        49     3415780.9374            0.59s




        50     3373592.2936            0.58s
        51     3280696.7665            0.57s
        52     3233737.4394            0.55s
        53     3156876.6235            0.54s
        54     3113766.5447            0.53s
        55     3078734.5710            0.52s
        56     3043306.1786            0.51s
        57     3011364.4662            0.50s
        58     2942886.8002            0.48s
        59     2889767.5322            0.47s
        60     2861800.4436            0.46s
        61     2833402.4897            0.45s
        62     2804068.1325            0.44s
        63     2786625.6362            0.42s
        64     2711848.0088            0.41s
        65     2679562.1081            0.40s
        66     2610156.5547            0.39s
        67     2557031.1746            0.38s




        68     2531005.8029            0.37s
        69     2506553.0381            0.36s
        70     2459867.2668            0.35s
        71     2419045.7549            0.34s
        72     2409512.8579            0.32s
        73     2376171.7319            0.31s
        74     2364212.1282            0.30s
        75     2330321.5108            0.29s
        76     2315047.9028            0.28s
        77     2297825.3964            0.26s
        78     2262365.1255            0.25s
        79     2238578.2189            0.24s
        80     2221091.4700            0.23s
        81     2202398.1840            0.22s
        82     2181730.4701            0.21s
        83     2170702.8411            0.19s
        84     2161588.8706            0.18s
        85     2134350.4437            0.17s
        86     2104899.4283            0.16s
        87     2085117.0442            0.15s
        88     2065380.1514            0.14s
        89     2041207.2311            0.13s
        90



In [275]:
print(r2_score(Y_Val, gbm.predict(X_Val))) 


0.7914910725925524


In [276]:
from sklearn.neural_network import MLPRegressor
mlp=MLPRegressor(hidden_layer_sizes=(500, 150 ), activation='relu', solver='adam', alpha=0.0001, batch_size='auto', learning_rate='constant', learning_rate_init=0.001, power_t=0.5, max_iter=200, shuffle=True, random_state=None, tol=0.0001, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
mlp.fit(X_train, y_train)
print(r2_score(Y_Val, mlp.predict(X_Val))) 

  y = column_or_1d(y, warn=True)


0.5754365879175065


In [278]:
from sklearn.ensemble import RandomForestRegressor
regr = RandomForestRegressor(max_depth=100, random_state=0,n_estimators=500)
regr.fit(X_train, y_train)
print(r2_score(Y_Val, regr.predict(X_Val))) 


  This is separate from the ipykernel package so we can avoid doing imports until


0.729166903537898


In [320]:
param_test1 = {
 'max_depth':range(3,10,2),
 'min_child_weight':range(1,6,2)
}
gsearch1 = GridSearchCV(estimator = XGBRegressor(n_estimators=100, learning_rate=0.1, gamma=0.5, subsample=0.75,
                           colsample_bytree=0.7, max_depth=7), param_grid = param_test1,n_jobs=4,iid=False, cv=5)
gsearch1.fit(X,y)
gsearch1.grid_scores_, gsearch1.best_params_, gsearch1.best_score_



([mean: 0.58655, std: 0.02381, params: {'max_depth': 3, 'min_child_weight': 1},
  mean: 0.58636, std: 0.02385, params: {'max_depth': 3, 'min_child_weight': 3},
  mean: 0.58653, std: 0.02320, params: {'max_depth': 3, 'min_child_weight': 5},
  mean: 0.57737, std: 0.02652, params: {'max_depth': 5, 'min_child_weight': 1},
  mean: 0.57851, std: 0.02861, params: {'max_depth': 5, 'min_child_weight': 3},
  mean: 0.57808, std: 0.02905, params: {'max_depth': 5, 'min_child_weight': 5},
  mean: 0.55272, std: 0.02933, params: {'max_depth': 7, 'min_child_weight': 1},
  mean: 0.55375, std: 0.03081, params: {'max_depth': 7, 'min_child_weight': 3},
  mean: 0.55582, std: 0.03116, params: {'max_depth': 7, 'min_child_weight': 5},
  mean: 0.52586, std: 0.02873, params: {'max_depth': 9, 'min_child_weight': 1},
  mean: 0.53309, std: 0.03324, params: {'max_depth': 9, 'min_child_weight': 3},
  mean: 0.53695, std: 0.03065, params: {'max_depth': 9, 'min_child_weight': 5}],
 {'max_depth': 3, 'min_child_weight': 1