In [1]:
import pandas as pd
from datetime import datetime
from catboost import CatBoostRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from statsmodels.tsa.arima.model import ARIMA

# Load the dataset
WheatData_Final = pd.read_excel("./WheatData2.xlsx")  

# Preprocessing
WheatData_Final['Date'] = pd.to_datetime(WheatData_Final['Date'])
WheatData_Final['year'] = WheatData_Final['Date'].dt.year
WheatData_Final['month'] = WheatData_Final['Date'].dt.month
WheatData_Final['day'] = WheatData_Final['Date'].dt.day

# Normalize numerical features
features_to_normalize = ['CPI Food Items', 'PMS PRICE', 'Central Rate (US DOLL)', 'mpr']
scaler = MinMaxScaler()
WheatData_Final[features_to_normalize] = scaler.fit_transform(WheatData_Final[features_to_normalize])

# Convert 'Location' into dummy variables (one-hot encoding)
WheatData_Final = pd.get_dummies(WheatData_Final, columns=['Location'], drop_first=True)

# Time Series Modeling
wheat_price = WheatData_Final['Alkama (Wheat)']

arima_model = ARIMA(wheat_price, order=(1, 1, 1))
arima_result = arima_model.fit()

# Extract residuals from ARIMA
WheatData_Final['Residuals'] = arima_result.resid

# Define features and target
features = ['Residuals', 'year', 'month', 'day', 'CPI Food Items', 'PMS PRICE', 'Central Rate (US DOLL)', 'mpr'] + \
           [col for col in WheatData_Final.columns if 'Location_' in col]  # Include all Location dummy variables
target = 'Alkama (Wheat)'

X = WheatData_Final[features]
y = WheatData_Final[target]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the CatBoost model
catboost_model = CatBoostRegressor(iterations=200, learning_rate=0.1, depth=6, random_state=42, verbose=0)
catboost_model.fit(X_train, y_train)

# Evaluate the model
y_pred = catboost_model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

print(f"Model Evaluation - Mean Absolute Error (MAE): {mae}")
print(f"Model Evaluation - Mean Squared Error (MSE): {mse}")

Model Evaluation - Mean Absolute Error (MAE): 32.99868282467886
Model Evaluation - Mean Squared Error (MSE): 2185.9084483329157


In [3]:
# Define the prediction function
def predict_wheat_price(location, date, cpi_food_items, pms_price, central_rate_usd, mpr):
    """
    Predicts wheat price based on date, location, and economic indicators.

    Parameters:
        location (str): Name of the location.
        date (str): Date in 'YYYY-MM-DD' format.
        cpi_food_items (float): Consumer Price Index for Food Items.
        pms_price (float): Price of Premium Motor Spirit (Petrol).
        central_rate_usd (float): Central exchange rate (USD).
        mpr (float): Monetary Policy Rate.

    Returns:
        float: Predicted Wheat Price.
    """
    try:
        # Convert date into datetime object
        date = pd.to_datetime(date)

        # Extract year, month, and day
        year = date.year
        month = date.month
        day = date.day

        # Prepare input features
        input_data = pd.DataFrame({
            'Residuals': [0],  # Set default residuals to 0
            'year': [year],
            'month': [month],
            'day': [day],
            'CPI Food Items': [cpi_food_items],
            'PMS PRICE': [pms_price],
            'Central Rate (US DOLL)': [central_rate_usd],
            'mpr': [mpr]
        })

        # Convert location to dummy variables
        location_dummies = pd.get_dummies(pd.Series([location]), prefix="Location")

        # Ensure all location columns exist
        for col in [col for col in X_train.columns if 'Location_' in col]:
            if col not in location_dummies.columns:
                location_dummies[col] = 0  # Assign 0 for missing locations

        # Add location dummy variables to input_data
        input_data = pd.concat([input_data, location_dummies], axis=1)

        # Ensure all expected columns exist
        for col in X_train.columns:
            if col not in input_data.columns:
                input_data[col] = 0  # Fill missing columns with 0

        # Reorder columns to match training data
        input_data = input_data[X_train.columns]

        # Predict the price
        predicted_price = catboost_model.predict(input_data)

        return float(predicted_price)

    except Exception as e:
        return f"Error occurred: {str(e)}"

# Example usage
predicted_price = predict_wheat_price("Lagos", "2025-04-15", 1056, 1154, 1657, 26)
print(f"Predicted Wheat Price in Lagos: {predicted_price}")

predicted_price_abuja = predict_wheat_price("Abuja", "2025-06-10", 1100, 1200, 1700, 28)
print(f"Predicted Wheat Price in Abuja: {predicted_price_abuja}")

Predicted Wheat Price in Lagos: 1121.4083445395709
Predicted Wheat Price in Abuja: 1152.4193683539022


  return float(predicted_price)
  return float(predicted_price)
