In [10]:
import numpy as np
import pandas as pd
import warnings
warnings.simplefilter('ignore')

In [11]:
df = pd.read_csv('final_dataset.csv')
df.head()

Unnamed: 0,Rainfall_mm,Temperature_Celsius,Fertilizer_Used,Irrigation_Used,Soil_Type_Clay,Yield_tons_per_hectare
0,897.077239,27.676966,0,1,0,6.555816
1,992.673282,18.026142,1,1,1,8.527341
2,147.998025,29.794042,0,0,0,1.127443
3,986.866331,16.64419,0,1,0,6.517573
4,730.379174,31.620687,1,1,0,7.248251


In [12]:
X = df.drop(columns=['Yield_tons_per_hectare'])
y = df['Yield_tons_per_hectare']

In [13]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(X, y, train_size=0.8, random_state=30)

In [14]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
cols = ['Rainfall_mm', 'Temperature_Celsius']
x_train[cols] = sc.fit_transform(x_train[cols])
x_test[cols] = sc.transform(x_test[cols])

In [15]:
#modeling
from sklearn.linear_model import Ridge
mod = Ridge()
mod.fit(x_train,y_train)
print('intercept :',mod.intercept_)
print('Coef :',mod.coef_)

#predection
x_train_pred = mod.predict(x_train)
x_test_pred = mod.predict(x_test)

#evolution
print('Train R2 :',mod.score(x_train,y_train))
print('Test R2 :',mod.score(x_test,y_test))

#CV Score
from sklearn.model_selection import cross_val_score
cv = cross_val_score(mod,x_train,y_train,cv=5,scoring='r2')
print('CV Score :',cv.mean())

intercept : 3.2996286523143485
Coef : [1.29866531 0.14353081 1.50011208 1.19901902 0.00164233]
Train R2 : 0.9131807118340225
Test R2 : 0.9123845284113142
CV Score : 0.9131780474287096


In [16]:
#Checking RMSE
from sklearn.metrics import mean_squared_error
Train_MSE=mean_squared_error(y_train,x_train_pred)
Test_MSE=mean_squared_error(y_test,x_test_pred)
print('Train RMSE :',Train_MSE**0.5)
print('Test RMSE :',Test_MSE**0.5)
cv = cross_val_score(mod,x_train,y_train,cv=5,scoring='neg_mean_squared_error')
cv = abs(cv.mean())
print ('CV Score :',cv**0.5)

Train RMSE : 0.5000005991946244
Test RMSE : 0.5013985867300007
CV Score : 0.5000053465982635


In [23]:
# checking best alpha value
from sklearn.model_selection import GridSearchCV
est = Ridge()
listt = {'alpha':[0.08,0.09,0.1,0.2,0.3,0.4,0.5,0.6,0.7]}
mod = GridSearchCV(est,listt,cv=5)
mod.fit(x_train,y_train)
mod.best_params_

{'alpha': 0.4}

In [28]:
#modeling
from sklearn.linear_model import Ridge
mod = Ridge(alpha=0.4)
mod.fit(x_train,y_train)
print('intercept :',mod.intercept_)
print('Coef :',mod.coef_)

#predection
x_train_pred = mod.predict(x_train)
x_test_pred = mod.predict(x_test)

#evolution
print('Train R2 :',mod.score(x_train,y_train))
print('Test R2 :',mod.score(x_test,y_test))

#CV Score
from sklearn.model_selection import cross_val_score
cv = cross_val_score(mod,x_train,y_train,cv=5,scoring='r2')
print('CV Score :',cv.mean())

intercept : 3.299624604583463
Coef : [1.29866629 0.14353091 1.50011658 1.19902261 0.00164235]
Train R2 : 0.9131807118415188
Test R2 : 0.9123845294384882
CV Score : 0.9131780474343337


In [29]:
#Checking RMSE
from sklearn.metrics import mean_squared_error
Train_MSE=mean_squared_error(y_train,x_train_pred)
Test_MSE=mean_squared_error(y_test,x_test_pred)
print('Train RMSE :',Train_MSE**0.5)
print('Test RMSE :',Test_MSE**0.5)
cv = cross_val_score(mod,x_train,y_train,cv=5,scoring='neg_mean_squared_error')
cv = abs(cv.mean())
print ('CV Score :',cv**0.5)

Train RMSE : 0.5000005991730383
Test RMSE : 0.5013985837908874
CV Score : 0.5000053465579243
