<a href="https://colab.research.google.com/github/trisharaj11/Climate_Impact_on_Crop_Productivity/blob/main/02_Model_Training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

import joblib


In [2]:
data = pd.read_csv('merged_data_2010_2017.csv')
data.head()


Unnamed: 0,State Name,Year,Crop,Yield,Rainfall,Temperature
0,Chhattisgarh,2010,Rice,1695.77,1215.5,25.13
1,Chhattisgarh,2011,Rice,1756.23,1116.3,24.67
2,Chhattisgarh,2012,Rice,1900.97,1054.7,24.69
3,Chhattisgarh,2013,Rice,1581.86,1092.5,24.82
4,Chhattisgarh,2014,Rice,1729.89,1045.2,24.73


In [3]:
FEATURES = ['Temperature', 'Rainfall']
TARGET = 'Yield'


In [4]:
wheat_data = data[data['Crop'] == 'Wheat']
rice_data = data[data['Crop'] == 'Rice']


In [5]:
def prepare_data(df):
    X = df[FEATURES]
    y = df[TARGET]

    return train_test_split(
        X, y,
        test_size=0.2,
        random_state=42
    )


In [6]:
Xw_train, Xw_test, yw_train, yw_test = prepare_data(wheat_data)


In [7]:
Xr_train, Xr_test, yr_train, yr_test = prepare_data(rice_data)


In [8]:
scaler = StandardScaler()

Xw_train_scaled = scaler.fit_transform(Xw_train)
Xw_test_scaled = scaler.transform(Xw_test)

Xr_train_scaled = scaler.fit_transform(Xr_train)
Xr_test_scaled = scaler.transform(Xr_test)


In [9]:
lr_wheat = LinearRegression()
lr_wheat.fit(Xw_train_scaled, yw_train)


In [10]:
lr_rice = LinearRegression()
lr_rice.fit(Xr_train_scaled, yr_train)


In [11]:
def evaluate(model, X_test, y_test, name):
    preds = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, preds))
    r2 = r2_score(y_test, preds)

    print(f"{name} RMSE:", rmse)
    print(f"{name} R² Score:", r2)


In [12]:
print("Linear Regression – Wheat")
evaluate(lr_wheat, Xw_test_scaled, yw_test, "Wheat")

print("\nLinear Regression – Rice")
evaluate(lr_rice, Xr_test_scaled, yr_test, "Rice")


Linear Regression – Wheat
Wheat RMSE: 1349.4885596735028
Wheat R² Score: -0.00444483983447852

Linear Regression – Rice
Rice RMSE: 1090.749996775394
Rice R² Score: -0.001463962104173655


In [14]:
from sklearn.ensemble import RandomForestRegressor

# Random Forest for Wheat
rf_wheat = RandomForestRegressor(
    n_estimators=100,
    random_state=42
)
rf_wheat.fit(Xw_train, yw_train)

# Random Forest for Rice
rf_rice = RandomForestRegressor(
    n_estimators=100,
    random_state=42
)
rf_rice.fit(Xr_train, yr_train)


In [15]:
print("Random Forest – Wheat")
evaluate(rf_wheat, Xw_test, yw_test, "Wheat")

print("\nRandom Forest – Rice")
evaluate(rf_rice, Xr_test, yr_test, "Rice")


Random Forest – Wheat
Wheat RMSE: 1345.797763284613
Wheat R² Score: 0.001041878579790878

Random Forest – Rice
Rice RMSE: 1098.393038800986
Rice R² Score: -0.015547937939472511


In [16]:
joblib.dump(rf_wheat, 'wheat_model.pkl')
joblib.dump(rf_rice, 'rice_model.pkl')
joblib.dump(scaler, 'scaler.pkl')


['scaler.pkl']