In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler,OneHotEncoder

In [2]:
X_train = pd.read_csv('X_train',index_col=0)
y_train = pd.read_csv('y_train',index_col=0)
X_test = pd.read_csv('X_test',index_col=0)
y_test = pd.read_csv('y_test',index_col=0)

In [3]:
def select_column(data):
    return data[['Distance_km', 'Weather',  'Traffic_Level', 'Time_of_Day', 'Vehicle_Type', 'Preparation_Time_min']]

In [4]:
X_test.columns

Index(['Order_ID', 'Distance_km', 'Weather', 'Traffic_Level', 'Time_of_Day',
       'Vehicle_Type', 'Preparation_Time_min', 'Courier_Experience_yrs'],
      dtype='object')

In [5]:
sample = X_test.head(1)[['Distance_km', 'Weather',
                         'Traffic_Level', 'Time_of_Day',
       'Vehicle_Type', 'Preparation_Time_min']]

In [6]:
sample


Unnamed: 0,Distance_km,Weather,Traffic_Level,Time_of_Day,Vehicle_Type,Preparation_Time_min
925,2.67,Clear,Medium,Night,Car,17


In [7]:
def create_speed(data):
    data['Speed_[km/m]'] =data['Distance_km'] / data['Preparation_Time_min']
    return data 

In [8]:
sample = create_speed(sample)
sample

Unnamed: 0,Distance_km,Weather,Traffic_Level,Time_of_Day,Vehicle_Type,Preparation_Time_min,Speed_[km/m]
925,2.67,Clear,Medium,Night,Car,17,0.157059


In [9]:
X_train
sample_X_train = select_column(X_train)
new_sample_X_train = create_speed(sample_X_train)
new_sample_X_train

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Speed_[km/m]'] =data['Distance_km'] / data['Preparation_Time_min']


Unnamed: 0,Distance_km,Weather,Traffic_Level,Time_of_Day,Vehicle_Type,Preparation_Time_min,Speed_[km/m]
913,12.00,Clear,Low,Evening,Car,25,0.480000
930,7.34,Foggy,Medium,Afternoon,Bike,22,0.333636
664,3.64,Clear,Low,Morning,Scooter,16,0.227500
768,3.97,Clear,High,Afternoon,Bike,19,0.208947
915,19.04,Rainy,Low,Evening,Car,12,1.586667
...,...,...,...,...,...,...,...
107,14.89,Snowy,High,Morning,Scooter,17,0.875882
272,4.55,Rainy,Low,Afternoon,Bike,5,0.910000
865,4.05,Clear,Medium,Afternoon,Scooter,21,0.192857
438,13.60,Foggy,High,Evening,Bike,9,1.511111


In [10]:
def drop_column(df):
    columns = ['Weather_Windy', 
               'Traffic_Level_Medium','Time_of_Day_Night'
               ,'Vehicle_Type_Scooter']
    
    new_df = df.drop(columns,axis = 1)
    return new_df
def int_convertor(column,df ):
    """
    df : pass dataframe in this parameter
    columns : pass column for which we need conversion to integer datatype
    
    this function will convert given column datatype to integer
    version : 1
    
    """
    try :
        df[column] = df[column].astype('int64')
    except Exception as e :
        print(e)

In [59]:
apply_transformer

Unnamed: 0,Distance_km,Preparation_Time_min,Speed_[km/m],Weather_Clear,Weather_Foggy,Weather_Rainy,Weather_Snowy,Traffic_Level_High,Traffic_Level_Low,Time_of_Day_Afternoon,Time_of_Day_Evening,Time_of_Day_Morning,Vehicle_Type_Bike,Vehicle_Type_Car
0,-1.341724,-0.00243,-0.902557,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [11]:
def apply_transformer(sample):
    transformer = ColumnTransformer(
        transformers=[
            ('numerical',StandardScaler(),['Distance_km','Preparation_Time_min',
           'Speed_[km/m]']),
            ('categorical',OneHotEncoder(),['Weather', 'Traffic_Level', 'Time_of_Day',
           'Vehicle_Type'])
                    ],
        remainder='passthrough'

    )
    
    sample_X_train = select_column(X_train)
    new_sample_X_train = create_speed(sample_X_train)
    
    transformer.fit(new_sample_X_train)
    new_sample= transformer.transform(sample)
    new_sample_df=pd.DataFrame(new_sample,columns=[
        'Distance_km','Preparation_Time_min','Speed_[km/m]',
        'Weather_Clear', 'Weather_Foggy', 'Weather_Rainy', 'Weather_Snowy',
           'Weather_Windy', 'Traffic_Level_High', 'Traffic_Level_Low',
           'Traffic_Level_Medium', 'Time_of_Day_Afternoon',
           'Time_of_Day_Evening', 'Time_of_Day_Morning', 'Time_of_Day_Night',
           'Vehicle_Type_Bike', 'Vehicle_Type_Car', 'Vehicle_Type_Scooter'
    ])
    new_sample_df = drop_column(new_sample_df)
    #new_sample_df = int_convertor(new_sample_df)
    return new_sample_df

In [39]:
new_sample

array([[-1.3417239 , -0.00243018, -0.9025571 ,  1.        ,  0.        ,
         0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
         1.        ,  0.        ,  0.        ,  0.        ,  1.        ,
         0.        ,  1.        ,  0.        ]])

In [49]:
categorical_column = transformer.named_transformers_['categorical'].get_feature_names_out()

In [52]:
categorical_column

array(['Weather_Clear', 'Weather_Foggy', 'Weather_Rainy', 'Weather_Snowy',
       'Weather_Windy', 'Traffic_Level_High', 'Traffic_Level_Low',
       'Traffic_Level_Medium', 'Time_of_Day_Afternoon',
       'Time_of_Day_Evening', 'Time_of_Day_Morning', 'Time_of_Day_Night',
       'Vehicle_Type_Bike', 'Vehicle_Type_Car', 'Vehicle_Type_Scooter'],
      dtype=object)

In [68]:
apply_transformer(sample)

Unnamed: 0,Distance_km,Preparation_Time_min,Speed_[km/m],Weather_Clear,Weather_Foggy,Weather_Rainy,Weather_Snowy,Traffic_Level_High,Traffic_Level_Low,Time_of_Day_Afternoon,Time_of_Day_Evening,Time_of_Day_Morning,Vehicle_Type_Bike,Vehicle_Type_Car
0,-1.341724,-0.00243,-0.902557,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [50]:
numerical_columns = ['Distance_km','Preparation_Time_min','Speed_[km/m]']

In [65]:
import joblib
M = joblib.load('regression_model.joblib')

In [67]:
M.predict(new_sample_df)

array([[33.34566297]])

In [66]:
new_sample_df

Unnamed: 0,Distance_km,Preparation_Time_min,Speed_[km/m],Weather_Clear,Weather_Foggy,Weather_Rainy,Weather_Snowy,Traffic_Level_High,Traffic_Level_Low,Time_of_Day_Afternoon,Time_of_Day_Evening,Time_of_Day_Morning,Vehicle_Type_Bike,Vehicle_Type_Car
0,-1.341724,-0.00243,-0.902557,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [None]:
new_sample_df = apply_transformer(sample)