In [3]:
%matplotlib inline
import os
import importlib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
pd.set_option('display.max_columns', None)
from sklearn.preprocessing import FunctionTransformer
from sklearn.preprocessing import OrdinalEncoder
from sklearn.compose import make_column_transformer
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_val_score

In [4]:
data = pd.read_csv(os.path.join('data', 'train.csv.bz2'))
data.loc[:, 'DateOfDeparture'] = pd.to_datetime(data.loc[:, 'DateOfDeparture'])

In [5]:
import problem
X, y = problem.get_train_data()

In [6]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_val_score

In [62]:
def _nextworkday(date):
    one_day = dt.timedelta(days=1)
    next_day = date + one_day 
    while next_day.weekday() in holidays.WEEKEND or next_day in Holidays_US:
        next_day += one_day 
    return (next_day - date).days

def _lastworkday(date):
    one_day = dt.timedelta(days=1)
    last_day = date - one_day 
    while last_day.weekday() in holidays.WEEKEND or last_day in Holidays_US:
        last_day -= one_day 
    return (date - last_day).days   

def _schoolbreak(date):
    one_day = dt.timedelta(days=1)
    lastschoolday = lastworkday(date)
    nextschoolday = nextworkday(date)
    for i in range(len(school_break['start'])-1):
        if date >= school_break['start'][i] and date <= school_break['end'][i]:
            lastschoolday = (date - school_break['start'][i] + one_day).days
            nextschoolday = (school_break['end'][i] + one_day - date).days   
        elif date == school_break['end'][i] + one_day:
            lastschoolday = (date - school_break['start'][i] + one_day).days
        elif date == school_break['start'][i] - one_day:
            nextschoolday = (school_break['end'][i] + one_day - date).days   
    return lastschoolday, nextschoolday



In [63]:

#merge function直接用

def _merge_external_data(X):
    import datetime as dt
    import holidays

    #get holidays 
    Holidays_US = holidays.US()[dt.date(2011,7, 1):dt.date(2013,6, 5)] + holidays.US()[dt.date(2012,1, 1):dt.date(2012,12, 31)]

    filepath2 = os.path.join(
            os.path.dirname(__file__), 'schoolholiday.csv')

    school_break = pd.read_csv(filepath2)
    school_break.loc[:, 'start'] =pd.to_datetime(school_break.loc[:, 'start']).dt.date
    school_break.loc[:, 'end'] = pd.to_datetime(school_break.loc[:, 'end']).dt.date


    from geopy.distance import geodesic
    filepath = os.path.join(
            os.path.dirname(__file__), 'external_data_mod.csv')

    external = pd.read_csv(filepath)
    external.loc[:,"Date"] = pd.to_datetime(external.loc[:,"Date"])

    #deal with data format
    external['Precipitationmm'].replace('T',0.0, inplace=True)
    external['Precipitationmm'] = external['Precipitationmm'].astype('float')
    for i in range(len(external['POP'])):
        external['POP'][i] = external.loc[:,'POP'][i].replace(',','')
    external['POP'] = external['POP'].astype('int')   
    external.drop(columns=['Events', 'City', 'StateCodes'],inplace=True)

    external.drop(columns =['Year','Region'],inplace=True)

    # define the departure and arrival dataframe
    col_dep = ['d_' + name for name in list(external.columns)]
    col_arr = [w.replace('d_', 'a_') for w in col_dep]

    # adjust the column name for merge
    col_dep = [w.replace('d_AirPort', 'Departure') for w in col_dep]
    col_dep = [w.replace('d_Date', 'DateOfDeparture') for w in col_dep]
    col_arr = [w.replace('a_AirPort', 'Arrival') for w in col_arr]
    col_arr = [w.replace('a_Date', 'DateOfDeparture') for w in col_arr]

    # 
    d_external = external.copy()
    a_external = external.copy()

    # rename the column
    d_external.columns = col_dep
    a_external.columns = col_arr

     # merge with X_encoded
    X_encoded = X.copy()
    X_encoded.loc[:,'DateOfDeparture'] = pd.to_datetime(X_encoded.loc[:,'DateOfDeparture'])
    X_encoded = pd.merge(X_encoded, d_external, how='left', on=['DateOfDeparture', 'Departure'],
                        sort=False)
    X_encoded = pd.merge(X_encoded, a_external, how='left', on=['DateOfDeparture', 'Arrival'],
                        sort=False)
    #

    X_encoded['nextworkday']=0
    X_encoded['lastworkday']=0
    X_encoded['nextschoolday']=0
    X_encoded['lastschoolday']=0

    X_encoded.loc[:,'DateOfDeparturedate'] = pd.to_datetime(X_encoded.loc[:,'DateOfDeparture']).dt.date

    for i in range(len(X_encoded)):
        X_encoded['nextworkday'][i] = _nextworkday(X_encoded.loc[:,'DateOfDeparture'][i])
        X_encoded['lastworkday'][i] = _lastworkday(X_encoded.loc[:,'DateOfDeparture'][i])
        X_encoded['lastschoolday'] = _schoolbreak(X_encoded.loc[:,'DateOfDeparturedate'][i])[0]
        X_encoded['nextschoolday'][i] = _schoolbreak(X_encoded.loc[:,'DateOfDeparturedate'][i])[1]


    # compute geographic distance
    X_encoded["Distance"] = X_encoded.apply(
            lambda x: geodesic((x["d_Latitude"],x["d_longitude"]),(x["a_Latitude"],x["a_longitude"])).km, axis=1)

    # split year, month and etc.
    X_encoded['year'] = X_encoded.loc[:,'DateOfDeparture'].dt.year
    X_encoded['month'] = X_encoded.loc[:,'DateOfDeparture'].dt.month
    X_encoded['day'] = X_encoded.loc[:,'DateOfDeparture'].dt.day
    X_encoded['weekday'] = X_encoded.loc[:,'DateOfDeparture'].dt.weekday
    X_encoded['week'] = X_encoded.loc[:,'DateOfDeparture'].dt.week
    X_encoded['n_days'] = X_encoded.loc[:,'DateOfDeparture'].apply(lambda date: 
                                                                     (date - pd.to_datetime("1970-01-01")).days)

    X_encoded = X_encoded.join(pd.get_dummies(X_encoded['year'], prefix='y'))
    X_encoded = X_encoded.join(pd.get_dummies(X_encoded['month'], prefix='m'))
    X_encoded = X_encoded.join(pd.get_dummies(X_encoded['day'], prefix='d'))
    X_encoded = X_encoded.join(pd.get_dummies(X_encoded['weekday'], prefix='wd'))
    X_encoded = X_encoded.join(pd.get_dummies(X_encoded['week'], prefix='w'))

    # drop the original data
    X_encoded.drop(columns=['Departure','Arrival','DateOfDeparture','DateOfDeparturedate',
                            'd_Unnamed: 0','d_Unnamed: 0.1','a_Unnamed: 0','a_Unnamed: 0.1',
                           'd_coordinates','a_coordinates','d_State','a_State',
                           'd_iso_region','a_iso_region'], inplace=True)


    for column in list(X_encoded.columns[X_train.isnull().sum() > 0]):
        mean_val = X_encoded[column].mean()
        X_encoded[column].fillna(mean_val, inplace=True)
        
    return X_encoded

In [67]:
__file__ = os.path.join('submissions', 'xgboost', 'estimator.py')
X_train = _merge_external_data(X)


FileNotFoundError: [Errno 2] No such file or directory: 'submissions/xgboost/external_data_mod.csv'

In [66]:
X_train

Unnamed: 0,WeeksToDeparture,std_wtd,d_Max TemperatureC,d_Mean TemperatureC,d_Min TemperatureC,d_Dew PointC,d_MeanDew PointC,d_Min DewpointC,d_Max Humidity,d_Mean Humidity,d_Min Humidity,d_Max Sea Level PressurehPa,d_Mean Sea Level PressurehPa,d_Min Sea Level PressurehPa,d_Max VisibilityKm,d_Mean VisibilityKm,d_Min VisibilitykM,d_Max Wind SpeedKm/h,d_Mean Wind SpeedKm/h,d_Max Gust SpeedKm/h,d_Precipitationmm,d_CloudCover,d_WindDirDegrees,d_Total_ops,d_Total Delays,d_Avg_delay_time,d_Total_delay_time,d_Actual Departures,d_Actual Arrivals,d_Departure Cancellations,d_Arrival Cancellations,d_Delayed Arrivals,d_Average Delay Per Delayed Arrival,d_Price,d_oil_price,d_elevation_ft,d_Division,d_Coast,d_Great Lakes,d_StPOP,d_StRBirth,d_StRDeath,d_StRMig,d_GDP,d_POP,d_RPI,d_Latitude,d_longitude,a_Max TemperatureC,a_Mean TemperatureC,a_Min TemperatureC,a_Dew PointC,a_MeanDew PointC,a_Min DewpointC,a_Max Humidity,a_Mean Humidity,a_Min Humidity,a_Max Sea Level PressurehPa,a_Mean Sea Level PressurehPa,a_Min Sea Level PressurehPa,a_Max VisibilityKm,a_Mean VisibilityKm,a_Min VisibilitykM,a_Max Wind SpeedKm/h,a_Mean Wind SpeedKm/h,a_Max Gust SpeedKm/h,a_Precipitationmm,a_CloudCover,a_WindDirDegrees,a_Total_ops,a_Total Delays,a_Avg_delay_time,a_Total_delay_time,a_Actual Departures,a_Actual Arrivals,a_Departure Cancellations,a_Arrival Cancellations,a_Delayed Arrivals,a_Average Delay Per Delayed Arrival,a_Price,a_oil_price,a_elevation_ft,a_Division,a_Coast,a_Great Lakes,a_StPOP,a_StRBirth,a_StRDeath,a_StRMig,a_GDP,a_POP,a_RPI,a_Latitude,a_longitude,nextworkday,lastworkday,nextschoolday,lastschoolday,Distance,year,month,day,weekday,week,n_days,y_2011,y_2012,y_2013,m_1,m_2,m_3,m_4,m_5,m_6,m_7,m_8,m_9,m_10,m_11,m_12,d_1,d_2,d_3,d_4,d_5,d_6,d_7,d_8,d_9,d_10,d_11,d_12,d_13,d_14,d_15,d_16,d_17,d_18,d_19,d_20,d_21,d_22,d_23,d_24,d_25,d_26,d_27,d_28,d_29,d_30,d_31,wd_0,wd_1,wd_2,wd_3,wd_4,wd_5,wd_6,w_1,w_2,w_3,w_4,w_5,w_6,w_7,w_8,w_9,w_10,w_11,w_12,w_13,w_14,w_15,w_16,w_17,w_18,w_19,w_20,w_21,w_22,w_23,w_24,w_25,w_26,w_27,w_28,w_29,w_30,w_31,w_32,w_33,w_34,w_35,w_36,w_37,w_38,w_39,w_40,w_41,w_42,w_43,w_44,w_45,w_46,w_47,w_48,w_49,w_50,w_51,w_52
0,12.875000,9.812647,35,31,26,20,18,17,62,51,39,1014,1012,1009,16,16,16,37,25,56.0,0.00,1,208,2686.0,2.0,28.0,56.0,933,931,9,13,143,71.16,95.14,95.14,672.0,3.0,0.0,1.0,12873763,12.445823,7.869352,-3.382339,579667,2719141,43033,41.978600,-87.904800,34,29,24,22,21,19,82,63,44,1012,1010,1009,16,16,16,48,29,60.0,0.00,5,161,1909.0,0.0,0.0,0.0,789,786,4,6,84,76.02,95.14,95.14,607.0,7.0,1.0,0.0,26094422,14.612295,6.532359,8.724163,430109,1242115,43672,32.896801,-97.038002,1,1,69,6,1290.346856,2012,6,19,1,25,15510,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,14.285714,9.466734,36,32,27,16,13,10,51,36,21,1011,1009,1005,16,15,11,32,18,50.0,0.00,5,187,1586.0,3.0,23.0,69.0,407,403,0,1,35,46.77,113.84,113.84,2181.0,8.0,0.0,0.0,2755245,12.680698,7.457300,8.145301,86924,593275,35572,36.080101,-115.152000,33,25,16,-2,-6,-8,21,14,7,1011,1008,1005,16,16,16,35,15,42.0,0.00,3,207,1781.0,0.0,0.0,0.0,668,667,1,1,54,56.50,113.84,113.84,5431.0,8.0,0.0,0.0,5191709,12.556818,6.343095,7.660864,167964,635163,46675,39.861698,-104.672997,1,3,1,6,1011.046677,2012,9,10,0,37,15593,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,10.863636,9.035883,6,2,-1,0,-2,-4,92,76,60,1027,1023,1020,16,9,1,27,15,32.0,0.00,7,357,1790.0,0.0,0.0,0.0,660,662,2,2,218,37.93,112.17,112.17,5431.0,8.0,0.0,0.0,5191709,12.556818,6.343095,7.660864,167964,635163,46675,39.861698,-104.672997,22,19,16,17,16,14,93,77,61,1018,1016,1014,16,13,8,24,8,29.0,0.00,5,266,1686.0,6.0,26.0,156.0,630,632,6,5,188,40.67,112.17,112.17,125.0,9.0,1.0,0.0,38062780,13.130815,6.326351,2.772770,806415,3851202,38192,33.942501,-118.407997,4,1,4,6,1387.023784,2012,10,5,4,40,15618,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
3,11.480000,7.990202,22,19,16,12,10,6,67,56,44,1026,1024,1021,16,16,16,37,26,45.0,0.00,8,70,2528.0,15.0,24.0,360.0,1068,1076,3,4,161,42.04,,,1026.0,5.0,1.0,0.0,9813201,13.584010,7.280748,3.375007,286108,437812,40064,33.636700,-84.428101,27,19,11,12,10,9,83,58,33,1028,1026,1024,16,16,16,23,6,29.0,0.00,1,93,2373.0,0.0,0.0,0.0,824,822,3,5,50,52.08,,,672.0,3.0,0.0,1.0,12858725,12.641046,7.907522,-3.182714,551983,2708114,41743,41.978600,-87.904800,2,2,2,6,974.957134,2011,10,9,6,40,15256,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
4,11.450000,9.517159,8,2,-4,-6,-9,-13,46,39,31,1012,1008,1004,16,16,16,61,30,77.0,0.00,5,270,1804.0,2.0,29.0,58.0,679,677,4,4,114,44.47,120.85,120.85,5431.0,8.0,0.0,0.0,5191709,12.556818,6.343095,7.660864,167964,635163,46675,39.861698,-104.672997,16,12,8,10,8,7,93,79,64,1027,1025,1024,16,12,3,24,8,29.0,0.00,7,300,1114.0,83.0,76.0,6308.0,427,434,18,14,129,67.81,120.85,120.85,13.0,9.0,1.0,0.0,38062780,13.130815,6.326351,2.772770,373546,828876,53395,37.618999,-122.375000,1,4,6,6,1556.391964,2012,2,21,1,8,15391,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8897,9.263158,7.316967,17,11,4,6,2,-2,85,56,27,1021,1020,1017,16,16,16,35,19,50.0,0.00,4,327,1183.0,0.0,0.0,0.0,242,239,1,1,31,40.90,,,645.0,3.0,0.0,1.0,9875736,11.541682,9.100641,-2.546041,206508,705118,40025,42.212399,-83.353401,20,14,8,4,2,-2,76,51,26,1020,1019,1018,16,16,16,32,16,39.0,0.00,0,329,2534.0,14.0,18.0,252.0,1079,1084,0,1,79,57.24,,,1026.0,5.0,1.0,0.0,9813201,13.584010,7.280748,3.375007,286108,437812,40064,33.636700,-84.428101,1,2,1,6,956.518345,2011,10,2,6,39,15249,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
8898,12.772727,10.641034,35,29,23,19,16,11,79,52,24,1015,1013,1010,16,16,16,48,30,60.0,0.00,4,187,1838.0,0.0,0.0,0.0,755,754,24,25,166,55.47,110.77,110.77,607.0,7.0,1.0,0.0,26094422,14.612295,6.532359,8.724163,430109,1242115,43672,32.896801,-97.038002,25,17,9,17,11,4,93,68,43,1012,1011,1009,16,16,16,27,15,34.0,0.00,3,216,2564.0,11.0,16.0,176.0,899,897,9,11,141,54.70,110.77,110.77,672.0,3.0,0.0,1.0,12873763,12.445823,7.869352,-3.382339,579667,2719141,43033,41.978600,-87.904800,1,1,1,6,1290.346856,2012,9,25,1,39,15608,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
8899,11.047619,7.908705,11,7,3,8,5,2,93,82,71,1025,1021,1017,16,15,8,24,10,32.0,2.03,7,125,1019.0,143.0,74.0,10582.0,402,410,42,38,231,70.05,109.54,109.54,13.0,9.0,1.0,0.0,38062780,13.130815,6.326351,2.772770,373546,828876,53395,37.618999,-122.375000,13,8,3,-7,-9,-12,40,31,22,1021,1016,1013,16,16,16,16,8,19.0,0.00,6,197,1352.0,1.0,22.0,22.0,361,364,10,9,83,56.19,109.54,109.54,2181.0,8.0,0.0,0.0,2755245,12.680698,7.457300,8.145301,86924,593275,35572,36.080101,-115.152000,1,1,1,6,666.249783,2012,1,19,3,3,15358,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
8900,6.076923,4.030334,-6,-9,-13,-8,-12,-18,91,77,62,1018,1016,1014,16,9,2,34,14,42.0,1.02,7,268,2117.0,12.0,20.0,240.0,726,729,11,12,201,52.73,,,672.0,3.0,0.0,1.0,12890552,12.259127,8.001377,-2.733083,589812,2725731,43050,41.978600,-87.904800,1,-1,-3,-3,-6,-9,92,72,51,1018,1013,1010,16,13,2,40,9,58.0,0.25,7,296,1109.0,11.0,19.0,209.0,198,194,2,4,22,67.18,,,36.0,2.0,0.0,1.0,12781296,11.152840,10.004955,-0.211183,379899,1558313,45492,39.871899,-75.241096,1,2,1,6,1090.917520,2013,2,3,6,5,15739,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [8]:
from sklearn.ensemble import GradientBoostingRegressor
regressor = GradientBoostingRegressor(learning_rate=0.09, n_estimators=150,min_samples_split=100,max_depth=4,subsample=0.8, random_state=10)
scores = cross_val_score(
    regressor, X_train, y, cv=5, scoring='neg_mean_squared_error'
)
rmse_scores = np.sqrt(-scores)

print(
    f"RMSE: {np.mean(rmse_scores):.4f} +/- {np.std(rmse_scores):.4f}"
)



KeyboardInterrupt: 

In [9]:
from sklearn.preprocessing import OrdinalEncoder
from sklearn.compose import make_column_transformer
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.datasets import make_classification

import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, ensemble
from sklearn.inspection import permutation_importance
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

import xgboost
from xgboost import XGBRegressor
from xgboost import plot_importance
from sklearn import metrics

# Hyper-Parameters
para_learning_rate =  0.1
para_n_estimators = list(range(100,701,50))
para_max_depth = list(range(3,10))
# para_min_child_weight = list(range(1,6))
# para_gamma = list(range(0.1, 0.6, 0.1))
# para_subsample = list(range(0.5, 0.9, 0.1))
# para_colsample_bytree = list(range(0.5, 0.9, 0.1))
para_reg_alpha = [0.05,0.1,1]
para_reg_lambda = [0.05,0.1,1]


regressor = xgboost.XGBRegressor(colsample_bytree=0.7,
                 gamma=0.3,                 
                 learning_rate=0.07,
                 max_depth=6,
                 min_child_weight=3,
                 n_estimators=500,                                                                    
                 reg_alpha=0.75,
                 reg_lambda=0.5,
                 subsample=0.6,
                 seed=42) 


In [26]:
scores = cross_val_score(
    regressor, X_train, y, cv=5, scoring='neg_mean_squared_error'
)
rmse_scores = np.sqrt(-scores)

print(
    f"RMSE: {np.mean(rmse_scores):.4f} +/- {np.std(rmse_scores):.4f}"
)


RMSE: 0.3560 +/- 0.0192


# 老子来调参了

In [34]:
d = {}
for n_estimators in range(500,2000,100):
    print('n_estimators: %d'%n_estimators, end=' ')
    regressor = xgboost.XGBRegressor(colsample_bytree=0.7,
                     gamma=0.3,                 
                     learning_rate=0.07,
                     max_depth=6,
                     min_child_weight=3,
                     n_estimators=n_estimators,                                                                    
                     reg_alpha=0.75,
                     reg_lambda=0.5,
                     subsample=0.6,
                     seed=42,
                     silent = True) 
    scores = cross_val_score(
        regressor, X_train, y, cv=5, scoring='neg_mean_squared_error'
    )
    rmse_scores = np.sqrt(-scores)

    print(
        f"RMSE: {np.mean(rmse_scores):.4f} +/- {np.std(rmse_scores):.4f}"
    )
    d[n_estimators] = np.mean(rmse_scores)

print('optimal n_estimators: %d and the rmse is %f' % (min(d, key=d.get),min(d.values())))
best_n_estimator = min(d, key=d.get)

d = {}
lr = 0.00
print('\nLearning Rate!\n')
for _ in range(20):
    lr += 0.01
    print('lr: %d'%lr, end=' ')
    regressor = xgboost.XGBRegressor(colsample_bytree=0.7,
                     gamma=0.3,                 
                     learning_rate=lr,
                     max_depth=6,
                     min_child_weight=3,
                     n_estimators=500,                                                                    
                     reg_alpha=0.75,
                     reg_lambda=0.5,
                     subsample=0.6,
                     seed=42,
                     silent = True) 
    scores = cross_val_score(
        regressor, X_train, y, cv=5, scoring='neg_mean_squared_error'
    )
    rmse_scores = np.sqrt(-scores)

    print(
        f"RMSE: {np.mean(rmse_scores):.4f} +/- {np.std(rmse_scores):.4f}"
    )
    d[lr] = np.mean(rmse_scores)

print('optimal lr: %d and the rmse is %f' % (min(d, key=d.get),min(d.values())))
best_lr = min(d, key=d.get)

n_estimators: 500 RMSE: 0.3560 +/- 0.0192
n_estimators: 600 RMSE: 0.3552 +/- 0.0194
n_estimators: 700 RMSE: 0.3546 +/- 0.0194
n_estimators: 800 RMSE: 0.3542 +/- 0.0193
n_estimators: 900 RMSE: 0.3539 +/- 0.0192
n_estimators: 1000 RMSE: 0.3537 +/- 0.0191
n_estimators: 1100 RMSE: 0.3535 +/- 0.0191
n_estimators: 1200 RMSE: 0.3534 +/- 0.0191
n_estimators: 1300 RMSE: 0.3533 +/- 0.0190
n_estimators: 1400 RMSE: 0.3532 +/- 0.0190
n_estimators: 1500 RMSE: 0.3532 +/- 0.0190
n_estimators: 1600 RMSE: 0.3530 +/- 0.0190
n_estimators: 1700 RMSE: 0.3528 +/- 0.0188
n_estimators: 1800 RMSE: 0.3528 +/- 0.0188
n_estimators: 1900 RMSE: 0.3528 +/- 0.0189
optimal n_estimators: 1900 and the rmse is 0.352752

Learning Rate!

lr: 0 RMSE: 0.4228 +/- 0.0206
lr: 0 RMSE: 0.3711 +/- 0.0204
lr: 0 RMSE: 0.3592 +/- 0.0200
lr: 0 RMSE: 0.3555 +/- 0.0194
lr: 0 RMSE: 0.3548 +/- 0.0198
lr: 0 RMSE: 0.3570 +/- 0.0181
lr: 0 RMSE: 0.3560 +/- 0.0192
lr: 0 RMSE: 0.3552 +/- 0.0174
lr: 0 RMSE: 0.3577 +/- 0.0193
lr: 0 RMSE: 0.3577 +/

In [36]:
best_lr

0.05

## Feature Selection

In [12]:
from sklearn import feature_selection
selector = feature_selection.RFECV(estimator = regressor, cv=3, verbose=100)
selector.fit(X_train, y)
print(selector.n_features_)

Fitting estimator with 208 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 207 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 206 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 205 features.
Parame

Fitting estimator with 185 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 184 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 183 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 182 features.
Parame

Fitting estimator with 162 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 161 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 160 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 159 features.
Parame

Fitting estimator with 139 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 138 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 137 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 136 features.
Parame

Fitting estimator with 116 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 115 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 114 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 113 features.
Parame

Fitting estimator with 93 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 92 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 91 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 90 features.
Parameters

Fitting estimator with 70 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 69 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 68 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 67 features.
Parameters

Fitting estimator with 47 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 46 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 45 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 44 features.
Parameters

Fitting estimator with 24 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 23 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 22 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 21 features.
Parameters

Fitting estimator with 207 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 206 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 205 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 204 features.
Parame

Fitting estimator with 184 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 183 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 182 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 181 features.
Parame

Fitting estimator with 161 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 160 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 159 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 158 features.
Parame

Fitting estimator with 138 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 137 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 136 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 135 features.
Parame

Fitting estimator with 115 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 114 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 113 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 112 features.
Parame

Fitting estimator with 92 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 91 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 90 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 89 features.
Parameters

Fitting estimator with 69 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 68 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 67 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 66 features.
Parameters

Fitting estimator with 46 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 45 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 44 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 43 features.
Parameters

Fitting estimator with 23 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 22 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 21 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 20 features.
Parameters

Fitting estimator with 207 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 206 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 205 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 204 features.
Parame

Fitting estimator with 184 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 183 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 182 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 181 features.
Parame

Fitting estimator with 161 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 160 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 159 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 158 features.
Parame

Fitting estimator with 138 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 137 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 136 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 135 features.
Parame

Fitting estimator with 115 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 114 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 113 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 112 features.
Parame

Fitting estimator with 92 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 91 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 90 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 89 features.
Parameters

Fitting estimator with 69 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 68 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 67 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 66 features.
Parameters

Fitting estimator with 46 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 45 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 44 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 43 features.
Parameters

Fitting estimator with 23 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 22 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 21 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 20 features.
Parameters

Fitting estimator with 207 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 206 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 205 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 204 features.
Parame

Fitting estimator with 184 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 183 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 182 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 181 features.
Parame

Fitting estimator with 161 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 160 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 159 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 158 features.
Parame

Fitting estimator with 138 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 137 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 136 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 135 features.
Parame

Fitting estimator with 115 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 114 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 113 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 112 features.
Parame

Fitting estimator with 92 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 91 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 90 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 89 features.
Parameters

Fitting estimator with 69 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 68 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 67 features.
Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Fitting estimator with 66 features.
Parameters

In [20]:
print(selector.support_)
print(selector.ranking_)


[ True False False False  True False False False False False False False
 False False False False False False False False False False False  True
  True False False  True  True False False  True False  True False  True
  True False False  True  True  True  True  True  True  True  True  True
 False False False False False False False False False False False False
 False False False False False False False False False  True  True False
 False  True  True False False  True False False False  True  True False
 False  True  True  True  True  True  True  True  True  True  True  True
  True False  True  True  True  True  True False False False False False
 False False False False False False False  True False False False False
 False False False False False False False False False False False False
 False False False False False False False False False False False False
 False False False False False False  True False  True False  True  True
  True False False False False False False False Fa

In [29]:
X_selected_train = X_train.loc[:,selector.support_]

In [30]:
X_selected_train.columns

Index(['WeeksToDeparture', 'd_Min TemperatureC', 'd_Total_ops',
       'd_Total Delays', 'd_Actual Departures', 'd_Actual Arrivals',
       'd_Delayed Arrivals', 'd_Price', 'd_elevation_ft', 'd_Division',
       'd_StPOP', 'd_StRBirth', 'd_StRDeath', 'd_StRMig', 'd_GDP', 'd_POP',
       'd_RPI', 'd_Latitude', 'd_longitude', 'a_Total_ops', 'a_Total Delays',
       'a_Actual Departures', 'a_Actual Arrivals', 'a_Delayed Arrivals',
       'a_elevation_ft', 'a_Division', 'a_StPOP', 'a_StRBirth', 'a_StRDeath',
       'a_StRMig', 'a_GDP', 'a_POP', 'a_RPI', 'a_Latitude', 'a_longitude',
       'nextworkday', 'lastworkday', 'Distance', 'month', 'day', 'weekday',
       'week', 'n_days', 'm_10', 'wd_1', 'wd_3', 'wd_5', 'wd_6', 'w_1', 'w_14',
       'w_20', 'w_21', 'w_27', 'w_35', 'w_47', 'w_52'],
      dtype='object')

In [35]:
%%time
regressor = xgboost.XGBRegressor(colsample_bytree=0.7,
                 gamma=0.3,                 
                 learning_rate=0.05,
                 max_depth=6,
                 min_child_weight=3,
                 n_estimators=3000,                                                                    
                 reg_alpha=0.75,
                 reg_lambda=0.5,
                 subsample=0.6,
                 seed=42,
                 silent = True) 
scores = cross_val_score(
    regressor, X_selected_train, y, cv=5, scoring='neg_mean_squared_error'
)
rmse_scores = np.sqrt(-scores)

print(
    f"RMSE: {np.mean(rmse_scores):.4f} +/- {np.std(rmse_scores):.4f}"
)

Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoo

In [42]:
X_s2 = X_train[['WeeksToDeparture', 'd_Min TemperatureC', 'd_Total_ops',
       'd_Total Delays', 'd_Actual Departures', 'd_Actual Arrivals',
       'd_Delayed Arrivals', 'd_Price', 'd_elevation_ft', 'd_Division',
       'd_StPOP', 'd_StRBirth', 'd_StRDeath', 'd_StRMig', 'd_GDP', 'd_POP',
       'd_RPI', 'd_Latitude', 'd_longitude', 'a_Total_ops', 'a_Total Delays',
       'a_Actual Departures', 'a_Actual Arrivals', 'a_Delayed Arrivals',
       'a_elevation_ft', 'a_Division', 'a_StPOP', 'a_StRBirth', 'a_StRDeath',
       'a_StRMig', 'a_GDP', 'a_POP', 'a_RPI', 'a_Latitude', 'a_longitude',
       'nextworkday', 'lastworkday','Distance', 'month', 'day', 'weekday',
       'week', 'n_days', 'm_10', 'wd_1', 'wd_3', 'wd_5', 'wd_6', 'w_1', 'w_14',
       'w_20', 'w_21', 'w_27', 'w_35', 'w_47', 'w_52']]

In [49]:
X_s3 = X_train[['WeeksToDeparture', 'd_Min TemperatureC', 'd_Total_ops',
       'd_Total Delays', 'd_Actual Departures', 'd_Actual Arrivals',
       'd_Delayed Arrivals', 'd_Price', 'd_elevation_ft', 'd_Division',
       'd_StPOP','d_GDP', 'd_POP',
       'd_RPI', 'd_Latitude', 'd_longitude', 'a_Total_ops', 'a_Total Delays',
       'a_Actual Departures', 'a_Actual Arrivals', 'a_Delayed Arrivals',
       'a_elevation_ft', 'a_Division', 'a_StPOP',
       'a_StRMig', 'a_GDP', 'a_POP', 'a_RPI', 'a_Latitude', 'a_longitude',
       'nextworkday', 'lastworkday','Distance', 'month', 'day', 'weekday',
       'week', 'n_days', 'm_10', 'wd_1', 'wd_3', 'wd_5', 'wd_6', 'w_1', 'w_14',
       'w_20', 'w_21', 'w_27', 'w_35', 'w_47', 'w_52']]

In [50]:

import xgboost
from xgboost import XGBRegressor
from xgboost import plot_importance
from sklearn import metrics


regressor = xgboost.XGBRegressor(colsample_bytree=0.7,
                 gamma=0.3,                 
                 learning_rate=0.07,
                 max_depth=5,
                 min_child_weight=3,
                 n_estimators=2000,                                                                    
                 reg_alpha=0.75,
                 reg_lambda=0.5,
                 subsample=0.6,
                 seed=42,
                 silent = True) 
scores = cross_val_score(
    regressor, X_s3, y, cv=5, scoring='neg_mean_squared_error'
)
rmse_scores = np.sqrt(-scores)

print(
    f"RMSE: {np.mean(rmse_scores):.4f} +/- {np.std(rmse_scores):.4f}"
)

Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { silent } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoo