In [1]:
import pandas as pd
import lightgbm
from sklearn.preprocessing import CategoricalEncoder,OrdinalEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from bayes_opt import BayesianOptimization
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import numpy as np
import datetime
from tqdm import tqdm_notebook as tqdm
import category_encoders as ce

In [2]:
train = pd.read_csv("./train_5CLrC8b/train.csv")
test = pd.read_csv("./test.csv")
sample_submission = pd.read_csv("./sample_submission.csv")

In [3]:
concated = pd.concat([train,test],ignore_index=True)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


In [4]:
concated['booking_date'] = pd.to_datetime(concated['booking_date'],dayfirst=True)
concated['checkin_date'] = pd.to_datetime(concated['checkin_date'],dayfirst=True)
concated['checkout_date'] = pd.to_datetime(concated['checkout_date'],dayfirst=True)

In [5]:
concated['booking_month'] = concated['booking_date'].apply(lambda x:x.month)
concated['booking_year'] = concated['booking_date'].apply(lambda x:x.year)
concated['booking_day'] = concated['booking_date'].apply(lambda x:x.day)

concated['checkin_month'] = concated['checkin_date'].apply(lambda x:x.month)
concated['checkin_year'] = concated['checkin_date'].apply(lambda x:x.year)
concated['checkin_day'] = concated['checkin_date'].apply(lambda x:x.day)

concated['checkout_month'] = concated['checkout_date'].apply(lambda x:x.month)
concated['checkout_year'] = concated['checkout_date'].apply(lambda x:x.year)
concated['checkout_day'] = concated['checkout_date'].apply(lambda x:x.day)

In [6]:
concated['booking_dayofweek'] = concated['booking_date'].apply(lambda x:x.dayofweek)

concated['checkin_dayofweek'] = concated['checkin_date'].apply(lambda x:x.dayofweek)

concated['checkout_dayofweek'] = concated['checkout_date'].apply(lambda x:x.dayofweek)

In [7]:
concated['season_holidayed_code_is_na'] = concated.season_holidayed_code.apply(
    lambda x: 1 if pd.isna(x) else 0)
concated['state_code_residence_is_na'] = concated.state_code_residence.apply(
    lambda x: 1 if pd.isna(x) else 0)
concated['season_holidayed_code'] = concated.season_holidayed_code.apply(
    lambda x: 0 if pd.isna(x) else x)
concated['state_code_residence'] = concated.state_code_residence.apply(
    lambda x: 0 if pd.isna(x) else x)

In [8]:
concated['actual_days_booked'] = concated.apply(
    lambda x: (x['checkout_date'] - x['checkin_date']).days, axis=1)

In [9]:
concated['n_rooms_booked'] = concated.apply(
    lambda x: x['roomnights'] / x['actual_days_booked'], axis=1)

In [10]:
concated['person_per_room'] = concated.apply(
    lambda x: (x['numberofadults'] + x['numberofchildren']) / x['n_rooms_booked'] if x['n_rooms_booked'] else 0,
    axis=1)

In [11]:
def count_weekend(row):
    checkin_date = row['checkin_date']
    actual_days_booked = row['actual_days_booked']
    stayed_in_weekend = 0
    for i in range(actual_days_booked):
        new_day = (checkin_date + datetime.timedelta(days=i)).day_name()
        if new_day == "Saturday" or new_day == "Friday" or new_day == "Sunday":
            stayed_in_weekend += 1
    return stayed_in_weekend

In [12]:
concated['n_weekend_days'] = concated.apply(count_weekend,axis=1)

In [13]:
encoder = OrdinalEncoder(dtype=np.int32, )
concated[[
    'member_age_buckets', 'cluster_code', 'reservationstatusid_code',
    'resort_id'
]] = encoder.fit_transform(concated[[
    'member_age_buckets', 'cluster_code', 'reservationstatusid_code',
    'resort_id'
]])

In [14]:
concat_sorted = concated.sort_values('booking_date')
concat_sorted_grouped = concat_sorted.groupby('memberid')
n_previous_bookings = [0]*len(concated)
days_between_previous_bookings = [0]*len(concated)
for memberid,grouped_df in tqdm(concat_sorted_grouped):
    count = 0
    previous_booking_date = 0
    for ind,data in enumerate(grouped_df.iterrows()):
        index,row = data
        n_previous_bookings[index] = count
        count += 1
        
        if ind == 0:
            days_between_previous_bookings[index] = 0
        else:
            days_between_previous_bookings[index] = (row['booking_date'] - previous_booking_date).days
        previous_booking_date = row['booking_date']
concated['n_previous_bookings'] = n_previous_bookings
concated['days_between_previous_bookings'] = days_between_previous_bookings

HBox(children=(IntProgress(value=0, max=144823), HTML(value='')))




In [15]:
seasons = {
    3: 1,
    4: 1,
    5: 1,
    6: 2,
    7: 2,
    8: 2,
    9: 3,
    10: 3,
    11: 3,
    12: 4,
    1: 4,
    2: 4
}
def get_season_india(row):
    checking_month = row['checkin_month']
    return seasons[checking_month]

In [16]:
concated['child2person_ratio'] = concated.apply(
    lambda x: x['numberofchildren'] / x['numberofadults'] if x['numberofadults'] else 0,
    axis=1)
concated['seasons'] = concated.apply(get_season_india, axis=1)

In [17]:
concated['travelline_in_same_state'] = concated.apply(
    lambda x: 1 if x['state_code_residence'] == x['state_code_resort'] else 0,
    axis=1)

In [18]:
def count_weekday(row):
    checkin_date = row['checkin_date']
    actual_days_booked = row['actual_days_booked']
    stayed_in_weekend = 0
    stayed_in_weekday = 0
    for i in range(actual_days_booked):
        new_day = (checkin_date + datetime.timedelta(days=i)).day_name()
        if new_day == "Saturday" or new_day == "Friday" or new_day == "Sunday":
            stayed_in_weekend += 1
        else:
            stayed_in_weekday += 1
    return stayed_in_weekday

In [19]:
concated['stayed_in_weekday'] = concated.apply(count_weekday,axis=1)

In [20]:
concated['booking_advance_days'] = concated.apply(
    lambda x: (x['checkin_date'] - x['booking_date']).days, axis=1)

In [21]:
concat_sorted = concated.sort_values('checkin_date')
concat_sorted_grouped = concat_sorted.groupby('memberid')
days_between_previous_checkins = [0]*len(concated)
for memberid,grouped_df in tqdm(concat_sorted_grouped):
    previous_checking_date = 0
    for ind,data in enumerate(grouped_df.iterrows()):
        index,row = data
        if ind == 0:
            days_between_previous_checkins[index] = 0
        else:
            days_between_previous_checkins[index] = (row['checkin_date'] - previous_checking_date).days
        previous_checking_date = row['checkin_date']
concated['days_between_previous_checkins'] = days_between_previous_checkins

HBox(children=(IntProgress(value=0, max=144823), HTML(value='')))




In [22]:
concat_sorted = concated.sort_values('checkin_date')
concat_sorted_grouped = concat_sorted.groupby(['memberid','checkin_year'])
yearly_visit_number = [0]*len(concated)
total_yearly_visits = [0]*len(concated)
for memberid,grouped_df in tqdm(concat_sorted_grouped):
    num = 1
    tot = len(grouped_df)
    for ind,data in enumerate(grouped_df.iterrows()):
        index,row = data
        yearly_visit_number[index] = num
        total_yearly_visits[index] = tot
        num += 1
concated['yearly_visit_number'] = yearly_visit_number
concated['total_yearly_visits'] = total_yearly_visits

HBox(children=(IntProgress(value=0, max=309756), HTML(value='')))




In [23]:
concat_sorted_grouped = concated.groupby(['memberid'])
memberid_total_visits = [0]*len(concated)
for memberid,grouped_df in tqdm(concat_sorted_grouped):
    tot = len(grouped_df)
    for ind,data in enumerate(grouped_df.iterrows()):
        index,row = data
        memberid_total_visits[index] = tot
concated['memberid_total_visits'] = memberid_total_visits

HBox(children=(IntProgress(value=0, max=144823), HTML(value='')))




In [24]:
more_features =list(pd.read_csv('/home/prashant/Downloads/JJ.csv',header = None)[0])

In [27]:
len(more_features)

488189

In [None]:
#concated['more_fet'] = more_features

In [None]:
concated.shape

In [30]:
x = pd.DataFrame(concated.groupby('resort_id').count()['total_pax'])
y = x.to_dict()['total_pax']

uu = []
for i in concated['resort_id']:
    uu.append(y[i])
   


In [32]:
concated['count'] = uu

In [None]:
# # concated.to_csv("./concat_train_test_feat_engg.csv",index=False)
# concated = pd.read_csv("./concat_train_test_feat_engg.csv")
# train = pd.read_csv("./train_5CLrC8b/train.csv")
# test = pd.read_csv("./test.csv")
# sample_submission = pd.read_csv("./sample_submission.csv")
# concated['booking_date'] = pd.to_datetime(
#     concated['booking_date'], dayfirst=True)
# concated['checkin_date'] = pd.to_datetime(
#     concated['checkin_date'], dayfirst=True)
# concated['checkout_date'] = pd.to_datetime(
#     concated['checkout_date'], dayfirst=True)

In [None]:
concated.T

In [33]:
# concated.drop(['reservation_id','booking_date','checkin_date','checkout_date','memberid'],axis=1,inplace=True)
cat_columns = [
    'booking_type_code', 'channel_code', 'main_product_code',
    'persontravellingid', 'member_age_buckets', 'cluster_code',
    'reservationstatusid_code', 'resort_region_code', 'resort_type_code',
    'room_type_booked_code', 'season_holidayed_code', 'seasons', "resort_id",
    "state_code_residence", "state_code_resort", 'season_holidayed_code_is_na',
    'state_code_residence_is_na', 'travelline_in_same_state'
]
to_drop_columns = [
    'amount_spent_per_room_night_scaled',
    'reservation_id',
    'booking_date',
    'checkin_date',
    'checkout_date',
    'memberid',
    'resort_type_code',
    'cluster_code',
    'reservationstatusid_code',
    'state_code_resort',
    'resort_region_code',
    
]

cat_columns = list(set(cat_columns) - set(to_drop_columns))
# for c in cat_columns:
#     print(c,":",len(concated[c].unique()),"\n")

# encoder = ce.polynomial.PolynomialEncoder(cols=cat_columns)

# concated = encoder.fit_transform(concated)

In [44]:
params= {'bagging_fraction': 0.8506230273762951,
  'bagging_freq': int(11.997717202080683),
  'cat_l2': 17.76603440180926,
  'cat_smooth': 90.01338713539889,
  'drop_rate': 0.828471324834864,
  'feature_fraction': 0.5396781843922561,
  'lambda_l1': 5.11481807918487,
  'lambda_l2': 9.972192609474396,
  'max_cat_threshold': int(9834.145546034471),
  'max_cat_to_onehot': int(397.4975773409506),
  'max_delta_step': 5.509600723103176,
  'max_depth': int(9818.20211661785),
  'max_drop': int(30.8804918777279),
  'min_data_in_leaf': int(276.1481648277242),
  'min_data_per_group': int(3945.772360706398),
  'min_gain_to_split': 5.038274756279193,
  'min_sum_hessian_in_leaf': 0.9125303541207858,
  'num_leaves': int(9369.57052885872),
  'other_rate': 0.0806926301303258,
  'skip_drop': 0.7679739421390375,
    'objective': 'regression',
         'metric': 'rmse',
  'top_rate': 0.922033189148117}

In [None]:
# concated.to_csv("./concat_train_test_feat_engg.dropped.csv",index=False)
# concated = pd.read_csv("./concat_train_test_feat_engg.dropped.csv")
# sample_submission = pd.read_csv("./sample_submission.csv")
# train = pd.read_csv("./train_5CLrC8b/train.csv")
# test = pd.read_csv("./test.csv")
# sample_submission = pd.read_csv("./sample_submission.csv")

In [34]:
train = concated[:len(train)]
test = concated[-len(test):]

In [50]:
all_predictions = []
# all_training_predictions = []
X = train.drop(to_drop_columns, axis=1)
Y = train['amount_spent_per_room_night_scaled']
import lightgbm as lgb

for i in range(10):
    print(i)
    trainX, testX, trainY, testY = train_test_split(X, Y, test_size=0.15)
    d_train = lgb.Dataset(trainX, label=trainY)
    d_test = lgb.Dataset(testX,testY)
    lgb_model = lgb.train(params, d_train,1000,valid_sets=(d_test),
        early_stopping_rounds=100,
        categorical_feature=cat_columns)
    #lgb_model = lightgbm.LGBMRegressor(n_estimators=1000)
#     lgb.fit(
#         trainX,
#         trainY,
#         eval_set=(testX, testY),
#         early_stopping_rounds=100,
#         categorical_feature=cat_columns,
#     )
    #     predictions = lgb_model.predict(X)
    #     all_training_predictions.append(predictions)
    predictions = lgb_model.predict(test.drop(to_drop_columns, axis=1))
    all_predictions.append(predictions)

0


New categorical_feature is ['booking_type_code', 'channel_code', 'main_product_code', 'member_age_buckets', 'persontravellingid', 'resort_id', 'room_type_booked_code', 'season_holidayed_code', 'season_holidayed_code_is_na', 'seasons', 'state_code_residence', 'state_code_residence_is_na', 'travelline_in_same_state']
  'New categorical_feature is {}'.format(sorted(list(categorical_feature))))


[1]	valid_0's rmse: 1.07043
Training until validation scores don't improve for 100 rounds.
[2]	valid_0's rmse: 1.05896
[3]	valid_0's rmse: 1.04758
[4]	valid_0's rmse: 1.03793
[5]	valid_0's rmse: 1.02912
[6]	valid_0's rmse: 1.02237
[7]	valid_0's rmse: 1.01667
[8]	valid_0's rmse: 1.01178
[9]	valid_0's rmse: 1.0081
[10]	valid_0's rmse: 1.0045
[11]	valid_0's rmse: 1.00107
[12]	valid_0's rmse: 0.99822
[13]	valid_0's rmse: 0.995319
[14]	valid_0's rmse: 0.992853
[15]	valid_0's rmse: 0.990593
[16]	valid_0's rmse: 0.988764
[17]	valid_0's rmse: 0.98736
[18]	valid_0's rmse: 0.985609
[19]	valid_0's rmse: 0.984232
[20]	valid_0's rmse: 0.982954
[21]	valid_0's rmse: 0.981715
[22]	valid_0's rmse: 0.980714
[23]	valid_0's rmse: 0.979873
[24]	valid_0's rmse: 0.978891
[25]	valid_0's rmse: 0.977985
[26]	valid_0's rmse: 0.977446
[27]	valid_0's rmse: 0.976873
[28]	valid_0's rmse: 0.976286
[29]	valid_0's rmse: 0.975659
[30]	valid_0's rmse: 0.975131
[31]	valid_0's rmse: 0.974481
[32]	valid_0's rmse: 0.974112
[

[68]	valid_0's rmse: 0.970448
[69]	valid_0's rmse: 0.970391
[70]	valid_0's rmse: 0.970388
[71]	valid_0's rmse: 0.970325
[72]	valid_0's rmse: 0.970319
[73]	valid_0's rmse: 0.970246
[74]	valid_0's rmse: 0.970149
[75]	valid_0's rmse: 0.970137
[76]	valid_0's rmse: 0.970114
[77]	valid_0's rmse: 0.970096
[78]	valid_0's rmse: 0.970062
[79]	valid_0's rmse: 0.970054
[80]	valid_0's rmse: 0.970041
[81]	valid_0's rmse: 0.970023
[82]	valid_0's rmse: 0.969981
[83]	valid_0's rmse: 0.96995
[84]	valid_0's rmse: 0.969948
[85]	valid_0's rmse: 0.969935
[86]	valid_0's rmse: 0.969926
[87]	valid_0's rmse: 0.969907
[88]	valid_0's rmse: 0.969879
[89]	valid_0's rmse: 0.969857
[90]	valid_0's rmse: 0.969838
[91]	valid_0's rmse: 0.969815
[92]	valid_0's rmse: 0.969801
[93]	valid_0's rmse: 0.969791
[94]	valid_0's rmse: 0.969785
[95]	valid_0's rmse: 0.969783
[96]	valid_0's rmse: 0.969758
[97]	valid_0's rmse: 0.969757
[98]	valid_0's rmse: 0.969749
[99]	valid_0's rmse: 0.969744
[100]	valid_0's rmse: 0.969738
[101]	vali

[157]	valid_0's rmse: 0.976929
[158]	valid_0's rmse: 0.976929
[159]	valid_0's rmse: 0.976929
[160]	valid_0's rmse: 0.976929
[161]	valid_0's rmse: 0.976929
[162]	valid_0's rmse: 0.976929
[163]	valid_0's rmse: 0.976929
[164]	valid_0's rmse: 0.976929
[165]	valid_0's rmse: 0.976929
[166]	valid_0's rmse: 0.976929
[167]	valid_0's rmse: 0.976929
[168]	valid_0's rmse: 0.976929
[169]	valid_0's rmse: 0.976929
[170]	valid_0's rmse: 0.976929
[171]	valid_0's rmse: 0.976929
[172]	valid_0's rmse: 0.976929
[173]	valid_0's rmse: 0.976929
[174]	valid_0's rmse: 0.976929
[175]	valid_0's rmse: 0.976929
[176]	valid_0's rmse: 0.976929
[177]	valid_0's rmse: 0.976929
[178]	valid_0's rmse: 0.976929
[179]	valid_0's rmse: 0.976929
[180]	valid_0's rmse: 0.976929
[181]	valid_0's rmse: 0.976929
[182]	valid_0's rmse: 0.976929
[183]	valid_0's rmse: 0.976929
[184]	valid_0's rmse: 0.976929
[185]	valid_0's rmse: 0.976929
[186]	valid_0's rmse: 0.976929
[187]	valid_0's rmse: 0.976929
[188]	valid_0's rmse: 0.976929
[189]	va

[20]	valid_0's rmse: 0.981814
[21]	valid_0's rmse: 0.98056
[22]	valid_0's rmse: 0.979522
[23]	valid_0's rmse: 0.978626
[24]	valid_0's rmse: 0.977712
[25]	valid_0's rmse: 0.976887
[26]	valid_0's rmse: 0.976228
[27]	valid_0's rmse: 0.975705
[28]	valid_0's rmse: 0.975063
[29]	valid_0's rmse: 0.974518
[30]	valid_0's rmse: 0.973988
[31]	valid_0's rmse: 0.973395
[32]	valid_0's rmse: 0.973004
[33]	valid_0's rmse: 0.972668
[34]	valid_0's rmse: 0.972386
[35]	valid_0's rmse: 0.972138
[36]	valid_0's rmse: 0.971925
[37]	valid_0's rmse: 0.971722
[38]	valid_0's rmse: 0.971489
[39]	valid_0's rmse: 0.971296
[40]	valid_0's rmse: 0.970932
[41]	valid_0's rmse: 0.970512
[42]	valid_0's rmse: 0.970287
[43]	valid_0's rmse: 0.969925
[44]	valid_0's rmse: 0.969752
[45]	valid_0's rmse: 0.969585
[46]	valid_0's rmse: 0.969426
[47]	valid_0's rmse: 0.969153
[48]	valid_0's rmse: 0.969017
[49]	valid_0's rmse: 0.968806
[50]	valid_0's rmse: 0.968668
[51]	valid_0's rmse: 0.968591
[52]	valid_0's rmse: 0.968495
[53]	valid_

[77]	valid_0's rmse: 0.976901
[78]	valid_0's rmse: 0.976878
[79]	valid_0's rmse: 0.976862
[80]	valid_0's rmse: 0.976792
[81]	valid_0's rmse: 0.976776
[82]	valid_0's rmse: 0.976762
[83]	valid_0's rmse: 0.976742
[84]	valid_0's rmse: 0.976712
[85]	valid_0's rmse: 0.976708
[86]	valid_0's rmse: 0.976702
[87]	valid_0's rmse: 0.976674
[88]	valid_0's rmse: 0.976664
[89]	valid_0's rmse: 0.976619
[90]	valid_0's rmse: 0.976603
[91]	valid_0's rmse: 0.97658
[92]	valid_0's rmse: 0.976562
[93]	valid_0's rmse: 0.976552
[94]	valid_0's rmse: 0.976541
[95]	valid_0's rmse: 0.976528
[96]	valid_0's rmse: 0.976528
[97]	valid_0's rmse: 0.976522
[98]	valid_0's rmse: 0.976522
[99]	valid_0's rmse: 0.976522
[100]	valid_0's rmse: 0.976522
[101]	valid_0's rmse: 0.976522
[102]	valid_0's rmse: 0.976522
[103]	valid_0's rmse: 0.976522
[104]	valid_0's rmse: 0.976522
[105]	valid_0's rmse: 0.976522
[106]	valid_0's rmse: 0.976522
[107]	valid_0's rmse: 0.976522
[108]	valid_0's rmse: 0.976522
[109]	valid_0's rmse: 0.976522
[

[152]	valid_0's rmse: 0.972816
[153]	valid_0's rmse: 0.972816
[154]	valid_0's rmse: 0.972816
[155]	valid_0's rmse: 0.972816
[156]	valid_0's rmse: 0.972816
[157]	valid_0's rmse: 0.972816
[158]	valid_0's rmse: 0.972816
[159]	valid_0's rmse: 0.972816
[160]	valid_0's rmse: 0.972816
[161]	valid_0's rmse: 0.972816
[162]	valid_0's rmse: 0.972816
[163]	valid_0's rmse: 0.972816
[164]	valid_0's rmse: 0.972816
[165]	valid_0's rmse: 0.972816
[166]	valid_0's rmse: 0.972816
[167]	valid_0's rmse: 0.972816
[168]	valid_0's rmse: 0.972816
[169]	valid_0's rmse: 0.972816
[170]	valid_0's rmse: 0.972816
[171]	valid_0's rmse: 0.972816
[172]	valid_0's rmse: 0.972816
[173]	valid_0's rmse: 0.972816
[174]	valid_0's rmse: 0.972816
[175]	valid_0's rmse: 0.972816
[176]	valid_0's rmse: 0.972816
[177]	valid_0's rmse: 0.972816
[178]	valid_0's rmse: 0.972816
[179]	valid_0's rmse: 0.972816
[180]	valid_0's rmse: 0.972816
[181]	valid_0's rmse: 0.972816
[182]	valid_0's rmse: 0.972816
[183]	valid_0's rmse: 0.972816
[184]	va

[5]	valid_0's rmse: 1.02748
[6]	valid_0's rmse: 1.02086
[7]	valid_0's rmse: 1.01526
[8]	valid_0's rmse: 1.01024
[9]	valid_0's rmse: 1.00666
[10]	valid_0's rmse: 1.00305
[11]	valid_0's rmse: 0.999811
[12]	valid_0's rmse: 0.997073
[13]	valid_0's rmse: 0.994299
[14]	valid_0's rmse: 0.991853
[15]	valid_0's rmse: 0.9898
[16]	valid_0's rmse: 0.987999
[17]	valid_0's rmse: 0.986638
[18]	valid_0's rmse: 0.984969
[19]	valid_0's rmse: 0.983669
[20]	valid_0's rmse: 0.982476
[21]	valid_0's rmse: 0.981245
[22]	valid_0's rmse: 0.980266
[23]	valid_0's rmse: 0.979357
[24]	valid_0's rmse: 0.978469
[25]	valid_0's rmse: 0.977692
[26]	valid_0's rmse: 0.977112
[27]	valid_0's rmse: 0.976498
[28]	valid_0's rmse: 0.975924
[29]	valid_0's rmse: 0.975329
[30]	valid_0's rmse: 0.974855
[31]	valid_0's rmse: 0.974228
[32]	valid_0's rmse: 0.973876
[33]	valid_0's rmse: 0.973619
[34]	valid_0's rmse: 0.973354
[35]	valid_0's rmse: 0.973048
[36]	valid_0's rmse: 0.972805
[37]	valid_0's rmse: 0.972602
[38]	valid_0's rmse: 0.

[85]	valid_0's rmse: 0.964848
[86]	valid_0's rmse: 0.964831
[87]	valid_0's rmse: 0.964792
[88]	valid_0's rmse: 0.964782
[89]	valid_0's rmse: 0.964763
[90]	valid_0's rmse: 0.964761
[91]	valid_0's rmse: 0.964732
[92]	valid_0's rmse: 0.964729
[93]	valid_0's rmse: 0.964712
[94]	valid_0's rmse: 0.964695
[95]	valid_0's rmse: 0.964692
[96]	valid_0's rmse: 0.964687
[97]	valid_0's rmse: 0.964687
[98]	valid_0's rmse: 0.964677
[99]	valid_0's rmse: 0.964667
[100]	valid_0's rmse: 0.964665
[101]	valid_0's rmse: 0.964654
[102]	valid_0's rmse: 0.964639
[103]	valid_0's rmse: 0.964617
[104]	valid_0's rmse: 0.964613
[105]	valid_0's rmse: 0.964613
[106]	valid_0's rmse: 0.964613
[107]	valid_0's rmse: 0.964613
[108]	valid_0's rmse: 0.964613
[109]	valid_0's rmse: 0.964613
[110]	valid_0's rmse: 0.964616
[111]	valid_0's rmse: 0.964616
[112]	valid_0's rmse: 0.964616
[113]	valid_0's rmse: 0.964616
[114]	valid_0's rmse: 0.964616
[115]	valid_0's rmse: 0.964616
[116]	valid_0's rmse: 0.964616
[117]	valid_0's rmse: 0

In [51]:
all_predictions = np.stack(all_predictions,axis=1)
# all_training_predictions = np.stack(all_training_predictions,axis=1)

In [None]:
# trainX, testX, trainY, testY = train_test_split(all_training_predictions, Y, test_size=0.15)
# lgb_merging_model = lightgbm.LGBMRegressor(n_estimators=1000)
# lgb_merging_model.fit(
#         trainX,
#         trainY,
#         eval_set=(testX, testY),
#         eval_metric='rmse',
#         early_stopping_rounds=100,
#     )

In [52]:
final_predictions = all_predictions.mean(axis=1)

In [53]:
print(final_predictions.shape)

(146765,)


In [54]:
sample_submission['amount_spent_per_room_night_scaled'] = final_predictions
sample_submission.to_csv(
    "todu_fet2_lgb_tuning.csv",
    index=False)

In [None]:
trainX

In [64]:
d = pd.read_csv("/home/prashant/Downloads/481_47669_us_todu_fet2_lgb_tuning.csv")

In [65]:
d.head()

Unnamed: 0,reservation_id,amount_spent_per_room_night_scaled
0,7dae1ce6bc8f69481328f2be5c4943077dad5598b5f66d...,8.160679
1,fe0d4e444e1818436c88f72f1cf800536c2f785e59baeb...,7.389002
2,540bd4285ad8168e8388f84ee74a82cd4f97dc0a404d3e...,7.475447
3,09593c907ae262e46f655b4db9e14f54a19eadcfdd2679...,7.861135
4,f4c50caac68051faf37551d70bb17eebef2a20e2244cb1...,7.895197


In [66]:
for ind, row in d.iterrows():
    if ind < 100:
        d.loc[ind, 'amount_spent_per_room_night_scaled'] = d.loc[ind, 'amount_spent_per_room_night_scaled'] + 1

In [67]:
d.head()

Unnamed: 0,reservation_id,amount_spent_per_room_night_scaled
0,7dae1ce6bc8f69481328f2be5c4943077dad5598b5f66d...,9.160679
1,fe0d4e444e1818436c88f72f1cf800536c2f785e59baeb...,8.389002
2,540bd4285ad8168e8388f84ee74a82cd4f97dc0a404d3e...,8.475447
3,09593c907ae262e46f655b4db9e14f54a19eadcfdd2679...,8.861135
4,f4c50caac68051faf37551d70bb17eebef2a20e2244cb1...,8.895197


In [None]:
%matplotlib inline

In [None]:
lightgbm.plot_importance(lgb_model,figsize=(20,30))

In [68]:
d.to_csv('testing_1.csv',index=False)