In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, ElasticNet, Lasso, Ridge
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error


In [2]:
df_dropped = pd.read_csv('./data/dropped.csv')
df_mean_median = pd.read_csv('./data/mean_meadian.csv')
df_knn_median = pd.read_csv('./data/knn_median.csv')

In [3]:
X_dropped = df_dropped.drop(columns=['Time_taken (min)', 'Unnamed: 0'], axis=1)

Y_dropped = df_dropped['Time_taken (min)']

X_dropped.head()

Unnamed: 0,Delivery_person_ID,Delivery_person_Age,Delivery_person_Ratings,Road_traffic_density,Vehicle_condition,multiple_deliveries,Distance,Weather_conditions_0,Weather_conditions_1,Weather_conditions_2,...,Time_Order_picked_min_3,Time_Orderd_min_0,Time_Orderd_min_1,Time_Orderd_min_2,Time_Orderd_min_3,Time_Orderd_min_4,Time_Orderd_second_0,Time_Orderd_second_1,Time_Orderd_second_2,Time_Orderd_second_3
0,-1.68985,1.103936,-1.364886,-1.284859,1.229307,3.926757,-0.056239,0,0,1,...,1,0,0,0,0,1,0,0,0,1
1,0.842673,-1.496441,0.211997,-0.482463,0.006623,0.440171,-0.069781,0,1,0,...,0,0,0,0,1,0,0,0,0,1
2,0.280396,-1.149724,0.211997,0.319932,0.006623,0.440171,-0.044537,0,1,1,...,1,0,0,0,1,1,0,0,1,0
3,-1.015097,0.757219,-1.04951,1.122328,-1.21606,-1.303122,-0.080842,0,1,1,...,0,0,0,1,0,0,0,0,1,1
4,-0.820679,-0.976366,0.211997,-1.284859,0.006623,0.440171,-0.025745,0,0,1,...,0,0,0,1,0,1,0,1,0,0


In [4]:
Y_dropped.head()

0    2.079414
1   -0.381197
2   -0.595163
3   -0.702146
4    1.544498
Name: Time_taken (min), dtype: float64

In [5]:
x_train, x_test, y_train, y_test = train_test_split(X_dropped, Y_dropped, test_size=0.30, random_state=69, shuffle=True)

In [6]:
linear_regression = LinearRegression()
linear_regression.fit(x_train, y_train)

In [7]:
linear_regression.coef_

array([ 9.23065667e-02,  2.30510574e-01, -2.56607779e-01, -3.62979569e-01,
       -1.93428905e-01,  1.85616305e-01, -1.95815698e-03, -1.72668822e-01,
       -3.24971442e-01,  1.44644335e-01, -1.23499781e-02, -2.48842772e-02,
       -9.37669673e-03,  5.92391949e-02,  1.07574141e-01,  5.21749435e-01,
       -5.21749435e-01,  1.03615512e+00,  1.25895076e+00, -2.55470214e-02,
       -4.16120185e-02,  9.81189216e-03, -6.76942681e-02,  1.24731510e-01,
       -7.36162323e-02, -7.14296207e-02,  1.66533454e-16, -4.44662832e-02,
        2.85280555e-02, -1.38788607e-01,  3.84071390e-02, -3.53744173e-02,
       -7.58221197e-03,  1.07786720e-02,  1.08047184e-02,  7.40742603e-03,
       -2.07559464e-01, -8.81681032e-02, -1.06463871e-01,  1.38001704e-01,
       -5.49229943e-02,  1.55248441e-03,  6.78407867e-03, -1.81767559e-02,
       -1.22361290e-02])

In [8]:
linear_regression.intercept_

-0.45345332118504267

In [9]:
y_pred_train = linear_regression.predict(x_train)

r2_score(y_true=y_train, y_pred=y_pred_train) * 100, mean_absolute_error(y_train, y_pred_train), mean_squared_error(y_train, y_pred_train)

(58.20069669083963, 0.5171471021050618, 0.42185564262546327)

In [10]:
y_pred = linear_regression.predict(x_test)

In [11]:
r2_score(y_true=y_test, y_pred=y_pred) * 100, mean_absolute_error(y_test, y_pred), mean_squared_error(y_test, y_pred)

(57.55494290754905, 0.5142248597974223, 0.415298225294398)

In [12]:
ridge = Ridge()
ridge.fit(x_train, y_train)

In [13]:
y_pred = ridge.predict(x_test)

In [14]:
r2_score(y_test, y_pred) * 100, mean_absolute_error(y_test, y_pred), mean_squared_error(y_test, y_pred)

(57.55380787948366, 0.5142519204197971, 0.41530933083114374)

In [15]:
lasso = Lasso()
lasso.fit(x_train, y_train)

In [16]:
y_pred = lasso.predict(x_test)

In [17]:
r2_score(y_test, y_pred) * 100, mean_absolute_error(y_test, y_pred), mean_squared_error(y_test, y_pred)

(-0.00016572641949874622, 0.7984108002146133, 0.9784388147922645)

In [18]:
elastic_net = ElasticNet()
elastic_net.fit(x_train, y_train)
y_pred = elastic_net.predict(x_test)

In [19]:
r2_score(y_test, y_pred) * 100, mean_absolute_error(y_test, y_pred), mean_squared_error(y_test, y_pred)

(-0.00016572641949874622, 0.7984108002146133, 0.9784388147922645)

In [20]:
models = {
    'Linear_regression' : LinearRegression(),
    'Elastic_net' : ElasticNet(),
    'Lasso' : Lasso(),
    'Ridge' : Ridge(),
}

dataframes = {
    'df_dropped' : df_dropped,
    'df_mean_median' : df_mean_median,
    'df_knn_median' : df_knn_median,
}

In [21]:
from sklearn.metrics import r2_score

dataframe_name = list(dataframes.keys())
dataframe_values = list(dataframes.values())
model_names = list(models.keys())
model_value = list(models.values())

for df, df_name in zip(dataframe_values, dataframe_name):

    print('<---------------------------------------------------------------------------------->')
    # print('Dataframe Name : {}'.format(df_name))
    print('<================================ Model_performance ({}) ================================>'.format(df_name))

    # df.head()

    # train, test split

    X = df.drop(columns=['Time_taken (min)', 'Unnamed: 0'], axis=1)
    Y = df['Time_taken (min)']

    x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.30, random_state=70)

    for model, model_name in zip(model_value, model_names):

        model = model.fit(x_train, y_train)

        y_pred = model.predict(x_test)

        mean_abs_error = mean_absolute_error(y_test, y_pred)
        mean_sqr_error = mean_squared_error(y_test, y_pred)
        r2_scr = r2_score(y_test, y_pred)

        print('<++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++>')
        print('model_name : {}'.format(model_name))
        print('{} : {}'.format('r2score', r2_scr * 100))
        print('{} : {}'.format('mean_absolute_error', mean_abs_error))
        print('{} : {}'.format('mean_squared_error', mean_sqr_error))
        

<---------------------------------------------------------------------------------->
<++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++>
model_name : Linear_regression
r2score : 58.24211936400398
mean_absolute_error : 0.513415565128882
mean_squared_error : 0.413335005520907
<++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++>
model_name : Elastic_net
r2score : -0.011239115063021998
mean_absolute_error : 0.8022210464888679
mean_squared_error : 0.9899483748258799
<++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++>
model_name : Lasso
r2score : -0.011239115063021998
mean_absolute_error : 0.8022210464888679
mean_squared_error : 0.9899483748258799


<++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++>
model_name : Ridge
r2score : 58.24416855105616
mean_absolute_error : 0.5134058004172144
mean_squared_error : 0.41331472190668567
<---------------------------------------------------------------------------------->
<++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++>
model_name : Linear_regression
r2score : 55.99283356011985
mean_absolute_error : 0.5307033934823451
mean_squared_error : 0.4445833504943829
<++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++>
model_name : Elastic_net
r2score : -0.005751307500068137
mean_absolute_error : 0.8148325555475504
mean_squared_error : 1.0103102649368731
<++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++>
model_name : Lasso
r2score : -0.005751307500068137
mean_absolute_error : 0.8148325555475504
mean_squared_error : 1.0103102649368731
<+++++++++++++++++++++++++++