### Libary Imports

In [1]:
import numpy as np
import pandas as pd
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.svm import SVR
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor

### Data Imports

In [2]:
train_data = pd.read_csv('Train.csv')
test_data = pd.read_csv('Test.csv')
Rider = pd.read_csv('Riders.csv')

Rename = {'Placement - Weekday (Mo = 1)':'Day of Week'}

train_data = pd.merge(train_data,Rider,on='Rider Id',how='left').set_index('Order No').rename(columns=Rename)
test_data = pd.merge(test_data,Rider,on='Rider Id',how='left').set_index('Order No').rename(columns=Rename)

X_TRAIN = train_data[['Day of Week','Distance (KM)','No_Of_Orders','Average_Rating','No_of_Ratings']]
Y_TRAIN = train_data['Time from Pickup to Arrival'].values
X_TEST = test_data[['Day of Week','Distance (KM)','No_Of_Orders','Average_Rating','No_of_Ratings']]

### Training(Random Forest)

In [4]:
RF = RandomForestRegressor(n_estimators = 200, 
                           min_samples_split=10,
                           min_samples_leaf=2,
                           max_features='sqrt',
                           max_depth=50,
                           bootstrap=True)


In [5]:
RF.fit(X_TRAIN, Y_TRAIN)

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=50,
                      max_features='sqrt', max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=2, min_samples_split=10,
                      min_weight_fraction_leaf=0.0, n_estimators=200,
                      n_jobs=None, oob_score=False, random_state=None,
                      verbose=0, warm_start=False)

### Pipeline

In [6]:
pipe = make_pipeline(RF)
pipe

Pipeline(memory=None,
         steps=[('randomforestregressor',
                 RandomForestRegressor(bootstrap=True, criterion='mse',
                                       max_depth=50, max_features='sqrt',
                                       max_leaf_nodes=None,
                                       min_impurity_decrease=0.0,
                                       min_impurity_split=None,
                                       min_samples_leaf=2, min_samples_split=10,
                                       min_weight_fraction_leaf=0.0,
                                       n_estimators=200, n_jobs=None,
                                       oob_score=False, random_state=None,
                                       verbose=0, warm_start=False))],
         verbose=False)

### Cross-validation

In [7]:
cross_val_score(pipe, X_TRAIN,Y_TRAIN, cv=10).mean()

0.3748528085221289

### Test Predictions

In [8]:
pipe.fit(X_TRAIN, Y_TRAIN)

Pipeline(memory=None,
         steps=[('randomforestregressor',
                 RandomForestRegressor(bootstrap=True, criterion='mse',
                                       max_depth=50, max_features='sqrt',
                                       max_leaf_nodes=None,
                                       min_impurity_decrease=0.0,
                                       min_impurity_split=None,
                                       min_samples_leaf=2, min_samples_split=10,
                                       min_weight_fraction_leaf=0.0,
                                       n_estimators=200, n_jobs=None,
                                       oob_score=False, random_state=None,
                                       verbose=0, warm_start=False))],
         verbose=False)

In [9]:
Y_pred = pipe.predict(X_TEST)

In [10]:
Resp_var = np.round(Y_pred.reshape(len(Y_pred),1),2)

In [12]:
Order_no = np.array(test_data.index).reshape(len(test_data),1)

In [13]:
Results = np.concatenate((Order_no,Resp_var),1)

In [14]:
Results_df = pd.DataFrame(Results,columns=['Order_No', 'Time from Pickup to Arrival'])

In [15]:
Results_df

Unnamed: 0,Order_No,Time from Pickup to Arrival
0,Order_No_19248,1647.62
1,Order_No_12736,1680.79
2,Order_No_768,937.8
3,Order_No_15332,1330.1
4,Order_No_21373,1131.73
...,...,...
7063,Order_No_3612,1462.12
7064,Order_No_7657,2661.46
7065,Order_No_1969,1607.83
7066,Order_No_10591,2389.02


In [17]:
Results_df.to_csv('Predicted(RF 0.3).csv',index=False)