In [2]:
# --- Step 1: Import Libraries ---
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
import joblib

# --- Step 2: Load and Process Data ---
data = {
    'timestamp': pd.to_datetime([
        '2025-07-30 08:30:00', '2025-07-30 09:15:00', '2025-07-30 09:45:00',
        '2025-07-30 12:00:00', '2025-07-30 13:30:00', '2025-07-30 17:00:00',
        '2025-07-30 18:30:00', '2025-07-30 19:00:00', '2025-07-30 21:00:00',
        '2025-07-30 22:30:00'
    ]),
    'cab_type': ['Mini', 'Sedan', 'Mini', 'Sedan', 'Mini', 'SUV', 'Sedan', 'Mini', 'Sedan', 'Mini'],
    'distance_km': [5.5, 12.1, 3.2, 8.0, 4.5, 6.7, 15.3, 4.1, 9.8, 7.6],
    'temperature_celsius': [28.5, 29.0, 29.2, 32.0, 31.5, 29.8, 27.5, np.nan, 26.0, 25.5],
    'weather_condition': ['Clear', 'Clear', 'Clear', 'Clear', 'Clear', 'Rainy', 'Rainy', 'Rainy', 'Clear', 'Foggy'],
    'demand_multiplier': [1.0, 1.5, 1.2, 1.0, 1.0, 2.0, 2.5, 2.2, 1.2, 1.8]
}

df = pd.DataFrame(data)

# Handle missing values (if any were there)
df['temperature_celsius'].fillna(df['temperature_celsius'].median(), inplace=True)


# One-Hot Encode
df_encoded = pd.get_dummies(df, columns=['cab_type', 'weather_condition'],drop_first=True)

#feature engineering
df_encoded['timestamp'] = pd.to_datetime(df_encoded['timestamp'])
df_encoded['hour_of_day'] = df_encoded['timestamp'].dt.hour
df_encoded['day_of_week'] = df_encoded['timestamp'].dt.dayofweek
df_encoded['is_weekend'] = (df_encoded['timestamp'].dt.dayofweek >= 5).astype(int)
df_final = df_encoded.drop('timestamp', axis=1)

# --- Step 3: Prepare Data for Modeling ---
X = df_final.drop('demand_multiplier', axis=1)
y = df_final['demand_multiplier']

# --- Step 4: Scale Features ---
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


# --- Step 5: Train the Final Model ---
rf_model = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)
rf_model.fit(X_scaled,y)

print("Final model and scaler are ready.")





Final model and scaler are ready.


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['temperature_celsius'].fillna(df['temperature_celsius'].median(), inplace=True)


In [3]:
#save the trained model to the file
joblib.dump(rf_model , 'rf_model.joblib')
print("Model saved to 'rf_model.joblib'")

# Save the scaler to a file
joblib.dump(scaler, 'scaler.joblib')
print("Scaler saved to 'scaler.joblib'")




Model saved to 'rf_model.joblib'
Scaler saved to 'scaler.joblib'
