In [None]:
%matplotlib inline

import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt

from xgboost import XGBRegressor
from lightgbm import LGBMRegressor

from sklearn.linear_model import LinearRegression, SGDRegressor, HuberRegressor
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingRegressor, RandomForestRegressor, GradientBoostingRegressor, BaggingRegressor

from sklearn.preprocessing import LabelEncoder

from ml_metrics import rmsle, msle

In [None]:
train = pd.read_csv('../input/Train.csv')
test = pd.read_csv('../input/Test.csv')

In [None]:
train = train[train.rain_p_h<11]
train = train[train.temperature>228]

In [None]:
train.wind_direction = train.wind_direction.apply(lambda x: 0 if x==360 else x)
test.wind_direction = test.wind_direction.apply(lambda x: 0 if x==360 else x)

In [None]:
import datetime
day=[]
time=[]
# train.to_csv('asp.csv',index=False)
date=train['date_time']
dates=date.tolist()
for date in dates:
    a,b=date.split()
    a=datetime.datetime.strptime(a,'%Y-%m-%d').strftime('%A')
    b=int(b.split(':')[0])
    day.append(a)
    time.append(b)
    
train['day']=day

In [None]:
day=[]
time=[]
date=test['date_time']
dates=date.tolist()
for date in dates:
    a,b=date.split()
    a=datetime.datetime.strptime(a,'%Y-%m-%d').strftime('%A')
    b=int(b.split(':')[0])
    day.append(a)
    time.append(b)

test['day']=day

In [None]:
def weekend(x):
    if(x == 'Saturday' or x == 'Sunday'):
        return 0
    else:
        return 1
    
train['weekend'] = train['day'].apply(weekend)
test['weekend'] = test['day'].apply(weekend)

In [None]:
train.date_time = pd.to_datetime(train.date_time, format='%Y-%m-%d %H:%M:%S')
test.date_time = pd.to_datetime(test.date_time, format='%Y-%m-%d %H:%M:%S')

In [None]:
train['hour'] = train.date_time.dt.hour
test['hour'] = test.date_time.dt.hour

In [None]:
train['date'] = train.date_time.dt.day
test['date'] = test.date_time.dt.day

In [None]:
#train['weekday'] = train.date_time.dt.weekday
#test['weekday'] = test.date_time.dt.weekday

In [None]:
train['month'] = train.date_time.dt.month
test['month'] = test.date_time.dt.month

In [None]:
train['holiday'] = train.is_holiday.apply(lambda x: 0 if x=='None' else 1)
test['holiday'] = test.is_holiday.apply(lambda x: 0 if x=='None' else 1)

In [None]:
train.drop(['is_holiday'], axis=1, inplace=True)
test.drop(['is_holiday'], axis=1, inplace=True)

In [None]:
wd = list(set(train.weather_description) - set(test.weather_description))
train = train[~train.weather_description.isin(wd)]

In [None]:
wd = list(set(train.weather_type) - set(test.weather_type))
train = train[~train.weather_type.isin(wd)]

In [None]:
train.shape, test.shape

In [None]:
for col in test.columns:
    print(f'{col} : {train[col].nunique()}, {test[col].nunique()}')

In [None]:
sns.scatterplot(train.hour, train.traffic_volume)

In [None]:
day_time = [i for i in range(6, 19)]
night_time = [19, 20, 21, 22, 23, 0, 1, 2, 3, 4, 5]
peak_time = [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
off=[1,2,3,4,23]
train['day_time'] = train.hour.isin(day_time).astype(int)
# train['night_time'] = train.hour.isin(night_time).astype(int)
train['peak_time'] = train.hour.isin(peak_time).astype(int)
train['off'] = train.hour.isin(off).astype(int)

test['day_time'] = test.hour.isin(day_time).astype(int)
# test['night_time'] = test.hour.isin(night_time).astype(int)
test['peak_time'] = test.hour.isin(peak_time).astype(int)
test['off'] = test.hour.isin(off).astype(int)

In [None]:
y = train.traffic_volume
train.drop('traffic_volume', axis=1, inplace=True)

In [None]:
train.drop(['date_time','visibility_in_miles'], axis=1, inplace=True)
test.drop(['date_time','visibility_in_miles'], axis=1, inplace=True)

In [None]:
assert train.shape[1]==test.shape[1]

In [None]:
le_weather_type = LabelEncoder()
le_weather_type.fit(train.weather_type)
train.weather_type = le_weather_type.transform(train.weather_type)
test.weather_type = le_weather_type.transform(test.weather_type)

In [None]:
le_weather_des = LabelEncoder()
le_weather_des.fit(train.weather_description)
train.weather_description = le_weather_des.transform(train.weather_description)
test.weather_description = le_weather_des.transform(test.weather_description)

In [None]:
le_day = LabelEncoder()
le_day.fit(train.day)
train.day = le_day.transform(train.day)
test.day = le_day.transform(test.day)

In [None]:
def norm(d):
    p=((d-d.min())/(d.max()-d.min()))
    return p

In [None]:
train['air_pollution_index'] = norm(train['air_pollution_index']) 
train['humidity'] = norm(train['humidity']) 
train['wind_direction'] = norm(train['wind_direction']) 
train['temperature'] = norm(train['temperature']) 
train['rain_p_h'] = norm(train['rain_p_h']) 
train['clouds_all'] = norm(train['clouds_all'])

test['air_pollution_index'] = norm(test['air_pollution_index']) 
test['humidity'] = norm(test['humidity']) 
test['wind_direction'] = norm(test['wind_direction']) 
test['temperature'] = norm(test['temperature']) 
test['rain_p_h'] = norm(test['rain_p_h']) 
test['clouds_all'] = norm(test['clouds_all'])

In [None]:
train.head(3)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor


In [None]:
cat_cols = [2, 4 , 9, 10, 11, 12, 13, 14,15,16,17,18, 19]

In [None]:
lgb = BaggingRegressor(LGBMRegressor(num_leaves=85),n_estimators=20)
lgb.fit(train, y)
pred1 = lgb.predict(test)

In [None]:
rf = BaggingRegressor(RandomForestRegressor(n_estimators=1000, max_depth=10),n_estimators=20)
rf.fit(train, y)
pred2 = rf.predict(test)

In [None]:
sub3 = pd.DataFrame()
sub3['date_time'] = pd.read_csv('../input/Test.csv')['date_time']
sub3['traffic_volume'] = (sub1['traffic_volume']*0.5)+(sub['traffic_volume']*0.5)
sub3.to_csv('sub_blend.csv',index=False)