In [1]:
import pandas as pd

# 1. Load the files
agri_df = pd.read_csv('Cleaned_Agriculture_Data.csv')
climate_df = pd.read_csv('Indian_Climate_Dataset_2024_2025.csv')

# 2. Standardize Dates (Crucial for merging)
# We use .dt.normalize() to make sure both are exactly YYYY-MM-DD
agri_df['Price Date'] = pd.to_datetime(agri_df['Price Date']).dt.normalize()
climate_df['Date'] = pd.to_datetime(climate_df['Date']).dt.normalize()

# 3. Standardize State Names
# This ensures "Maharashtra " (with a space) matches "Maharashtra"
agri_df['STATE'] = agri_df['STATE'].str.strip().str.title()
climate_df['State'] = climate_df['State'].str.strip().str.title()

# Fix the specific Tamil Nadu spelling difference
agri_df['STATE'] = agri_df['STATE'].replace({'Tamilnadu': 'Tamil Nadu'})

# 4. PERFORM THE MERGE
# This matches the Price Date with the Climate Date AND the State names
merged_df = pd.merge(
    agri_df, 
    climate_df, 
    left_on=['Price Date', 'STATE'], 
    right_on=['Date', 'State'], 
    how='inner'
)

# 5. Cleanup
# Remove the duplicate Date and State columns created by the merge
merged_df = merged_df.drop(columns=['Date', 'State'])

# 6. Save the new combined dataset
merged_df.to_csv('Agri_Weather_Combined.csv', index=False)

print("--- Merge Summary ---")
print(f"Original Price Rows: {len(agri_df)}")
print(f"Successfully Merged Rows: {len(merged_df)}")
print("\nNew Weather Columns Added:")
print(['Temperature_Avg (째C)', 'Rainfall (mm)', 'Humidity (%)'])

--- Merge Summary ---
Original Price Rows: 734556
Successfully Merged Rows: 313257

New Weather Columns Added:
['Temperature_Avg (째C)', 'Rainfall (mm)', 'Humidity (%)']


In [6]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor 
df=pd.read_csv('Agri_Weather_Combined.csv')
df['STATE'] = df['STATE'].str.strip().str.replace('&', 'and').replace('Tamilnadu', 'Tamil Nadu')
df = df[df['Modal_Price'] < 15000]
df = df[(df['Modal_Price'] >= df['Min_Price']) & (df['Modal_Price'] <= df['Max_Price'])]
df = df[df['Modal_Price'] > 0]
'''df_onion = df[df['Commodity'] == 'Onion'].copy()'''
df_onion = df[(df['Commodity'] == 'Wheat') & (df['STATE'] == 'Maharashtra')].copy()
df_onion=df_onion.sort_values('Price Date')
df_onion['Date']=pd.to_datetime(df['Price Date'])
df_onion['month']=df_onion['Date'].dt.month
df_onion['dayofweek']=df_onion['Date'].dt.dayofweek
'''df_onion['price_lag_1']=df_onion['Modal_Price'].shift(1)
df_onion['price_lag_7']=df_onion['Modal_Price'].shift(7)'''
df_onion['price_lag_1'] = df_onion.groupby('Market Name')['Modal_Price'].shift(1)
df_onion['price_lag_7'] = df_onion.groupby('Market Name')['Modal_Price'].shift(7)
df_onion=df_onion.dropna()
train=df_onion[:-30]
test=df_onion[-30:]
'''X_train = train[['month', 'dayofweek', 'price_lag_1', 'price_lag_7']]
y_train = train['Modal_Price']'''
model = RandomForestRegressor(n_estimators=100)
features = [
    'month','dayofweek',
    'price_lag_1', 
    'price_lag_7', 
    'Temperature_Avg (째C)', 
    'Rainfall (mm)', 
    'Humidity (%)']
model.fit(train[features], train['Modal_Price'])
'''model = RandomForestRegressor(n_estimators=100)
model.fit(X_train, y_train)'''
predictions = model.predict(test[['month', 'dayofweek', 'price_lag_1', 'price_lag_7','Temperature_Avg (째C)', 'Rainfall (mm)', 'Humidity (%)']])
print(f"First Prediction: {predictions[0]}")
from sklearn.metrics import mean_absolute_percentage_error
y_test = test['Modal_Price']
# Replace 'y_test' and 'predictions' with your variable names
mape = mean_absolute_percentage_error(y_test, predictions)
accuracy = (1 - mape) * 100

print(f"Model Accuracy: {accuracy:.2f}%")

First Prediction: 2548.88
Model Accuracy: 94.36%
