In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
import numpy as np
from google.colab import files

# Load the CSV file
df = pd.read_csv('/content/aggregated_daily_data_Azadpur_Onion_commodity2023-2018.csv')

# Convert 'Date' column to datetime
df['Arrival_Date'] = pd.to_datetime(df['Arrival_Date'])

# Create new columns 'Day', 'Month', and 'Year'
df['Day'] = df['Arrival_Date'].dt.day
df['Month'] = df['Arrival_Date'].dt.month
df['Year'] = df['Arrival_Date'].dt.year

# Define features and target
X = df[['Day', 'Month', 'Year']]
y = df[['Min_Price', 'Max_Price', 'Modal_Price']]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the Random Forest Regressor model
model = RandomForestRegressor(n_estimators=100, random_state=42)

# Fit the model
model.fit(X_train, y_train)

# Make predictions
predictions = model.predict(X_test)

# Calculate and print Mean Squared Error for each target
for i, target in enumerate(['Min_Price', 'Max_Price', 'Modal_Price']):
    mse = mean_squared_error(y_test.iloc[:, i], predictions[:, i])
    print(f'MSE for {target}: {mse}')

# Create a new dataframe for predictions from 2024-01-01 to 2024-09-08
future_dates = pd.date_range(start='2024-01-01', end='2024-09-08')
future_days = future_dates.day
future_months = future_dates.month
future_years = [2024] * len(future_days)

future_data = pd.DataFrame({'Day': future_days, 'Month': future_months, 'Year': future_years})

# Make predictions for future dates
future_predictions = model.predict(future_data)

# Create a new dataframe for the predicted prices
predicted_prices = pd.DataFrame({
    'Date': future_dates,
    'Predicted_Min_Price': future_predictions[:, 0],
    'Predicted_Max_Price': future_predictions[:, 1],
    'Predicted_Modal_Price': future_predictions[:, 2]
})

# Save the predicted prices to a CSV file
predicted_prices.to_csv('predicted_prices.csv', index=False)

# Download the CSV file
files.download('predicted_prices.csv')


MSE for Min_Price: 28529.521781481482
MSE for Max_Price: 66616.51194722221
MSE for Modal_Price: 45496.00747314814


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>