In [1]:
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from bayes_opt import BayesianOptimization

# Load the data
file_path = 'date-wise-retail-selling-price-of-petrol-and-diesel-in-metro-cities.xlsx'
data = pd.read_excel(file_path)

# Preprocess the data
data['date'] = pd.to_datetime(data['date'], format='%Y-%m-%d')
data['day'] = data['date'].dt.day
data['month'] = data['date'].dt.month
data['year'] = data['date'].dt.year

# Encode categorical data
data = pd.get_dummies(data, columns=['city', 'fuel_type'], drop_first=True)

# Sort the data by date
data = data.sort_values(by='date')

# Prepare the dataset
X = data[['day', 'month', 'year'] + [col for col in data.columns if col.startswith('city_') or col.startswith('fuel_type_')]]
y = data['value']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=False)

# Define the evaluation function for Bayesian Optimization
def xgb_evaluate(min_child_weight, colsample_bytree, max_depth, subsample, gamma, alpha, lambd, learning_rate, n_estimators):
    params = {
        'min_child_weight': int(min_child_weight),
        'colsample_bytree': colsample_bytree,
        'max_depth': int(max_depth),
        'subsample': subsample,
        'gamma': gamma,
        'reg_alpha': alpha,
        'reg_lambda': lambd,
        'learning_rate': learning_rate,
        'n_estimators': int(n_estimators),
        'eval_metric': 'mae'
    }
    
    xgb_model = XGBRegressor(**params, random_state=42)
    xgb_model.fit(X_train, y_train)
    y_pred = xgb_model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    
    return -mae  # Negative MAE for maximization

# Define the bounds of the hyperparameters for Bayesian Optimization
pbounds = {
    'min_child_weight': (1, 10),
    'colsample_bytree': (0.1, 1.0),
    'max_depth': (3, 10),
    'subsample': (0.5, 1.0),
    'gamma': (0, 1),
    'alpha': (0, 1),
    'lambd': (0, 1),
    'learning_rate': (0.01, 0.3),
    'n_estimators': (100, 1000)
}

# Perform Bayesian Optimization
optimizer = BayesianOptimization(f=xgb_evaluate, pbounds=pbounds, random_state=42, verbose=2)
optimizer.maximize(init_points=10, n_iter=30)

# Retrieve the best parameters
best_params = optimizer.max['params']
best_params['min_child_weight'] = int(best_params['min_child_weight'])
best_params['max_depth'] = int(best_params['max_depth'])
best_params['n_estimators'] = int(best_params['n_estimators'])

# Train the final model with the best parameters
final_xgb_model = XGBRegressor(**best_params, random_state=42)
final_xgb_model.fit(X_train, y_train)

# Predict the test set
y_pred = final_xgb_model.predict(X_test)

# Calculate accuracy metrics
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Absolute Error: {mae:.2f}')
print(f'Mean Squared Error: {mse:.2f}')
print(f'R² Score: {r2:.2f}')

# Function to predict fuel price for a given date, city, and fuel type
def predict_fuel_price(day, month, year, city, fuel_type):
    city_column = f'city_{city}'
    fuel_type_column = f'fuel_type_{fuel_type}'
    
    input_data = {
        'day': [day],
        'month': [month],
        'year': [year]
    }
    for col in X.columns:
        if col not in input_data:
            input_data[col] = [1] if col == city_column or col == fuel_type_column else [0]

    input_df = pd.DataFrame(input_data)

    predicted_price = final_xgb_model.predict(input_df)
    return predicted_price[0]

# Example usage
predicted_price = predict_fuel_price(day=2, month=8, year=2025, city='Mumbai', fuel_type='Diesel')
print(f'Predicted price: {predicted_price:.2f}')


|   iter    |  target   |   alpha   | colsam... |   gamma   |   lambd   | learni... | max_depth | min_ch... | n_esti... | subsample |
-------------------------------------------------------------------------------------------------------------------------------------
| [39m1        [39m | [39m-2.946   [39m | [39m0.3745   [39m | [39m0.9556   [39m | [39m0.732    [39m | [39m0.5987   [39m | [39m0.05525  [39m | [39m4.092    [39m | [39m1.523    [39m | [39m879.6    [39m | [39m0.8006   [39m |
| [39m2        [39m | [39m-3.299   [39m | [39m0.7081   [39m | [39m0.1185   [39m | [39m0.9699   [39m | [39m0.8324   [39m | [39m0.07158  [39m | [39m4.273    [39m | [39m2.651    [39m | [39m373.8    [39m | [39m0.7624   [39m |
| [39m3        [39m | [39m-3.551   [39m | [39m0.4319   [39m | [39m0.3621   [39m | [39m0.6119   [39m | [39m0.1395   [39m | [39m0.09472  [39m | [39m5.565    [39m | [39m5.105    [39m | [39m806.7    [39m | [39m0.5998   [39m |


| [39m34       [39m | [39m-2.269   [39m | [39m0.0      [39m | [39m1.0      [39m | [39m0.0      [39m | [39m1.0      [39m | [39m0.01     [39m | [39m7.814    [39m | [39m4.135    [39m | [39m343.8    [39m | [39m1.0      [39m |
| [39m35       [39m | [39m-2.08    [39m | [39m0.7867   [39m | [39m1.0      [39m | [39m0.0      [39m | [39m0.0      [39m | [39m0.01     [39m | [39m6.683    [39m | [39m6.197    [39m | [39m343.3    [39m | [39m1.0      [39m |
| [39m36       [39m | [39m-2.387   [39m | [39m0.5675   [39m | [39m1.0      [39m | [39m0.0      [39m | [39m0.224    [39m | [39m0.01     [39m | [39m8.684    [39m | [39m7.047    [39m | [39m341.3    [39m | [39m1.0      [39m |
| [39m37       [39m | [39m-2.283   [39m | [39m0.0      [39m | [39m1.0      [39m | [39m1.0      [39m | [39m1.0      [39m | [39m0.01     [39m | [39m8.287    [39m | [39m6.719    [39m | [39m343.6    [39m | [39m0.5      [39m |
| [39m38       [39m | 

Parameters: { "lambd" } are not used.



Mean Absolute Error: 1.73
Mean Squared Error: 4.97
R² Score: 0.86
Predicted price: 95.16


In [1]:
import joblib

# Save the trained model
joblib.dump(final_xgb_model, 'model.pkl')

NameError: name 'final_xgb_model' is not defined

In [3]:
pip install joblib

Note: you may need to restart the kernel to use updated packages.
