In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error

df=pd.read_csv('/content/chennai_rainfall_2019_2023_mm.csv')
df

Unnamed: 0,Date,Year,Month,Temperature (°C),Humidity (%),Wind Speed (km/h),Rainfall (cm),Rainfall (mm)
0,2019-01-01,2019,1,31.0,74.5,13.3,0.5,5.0
1,2019-01-02,2019,1,29.7,87.1,9.5,0.1,1.0
2,2019-01-03,2019,1,31.3,79.5,12.4,0.3,3.0
3,2019-01-04,2019,1,33.0,80.1,13.1,0.1,1.0
4,2019-01-05,2019,1,29.5,83.5,12.0,1.4,14.0
...,...,...,...,...,...,...,...,...
1821,2023-12-27,2023,12,30.2,81.1,10.5,0.1,1.0
1822,2023-12-28,2023,12,29.6,83.9,11.5,0.4,4.0
1823,2023-12-29,2023,12,28.7,77.8,16.9,0.1,1.0
1824,2023-12-30,2023,12,30.3,82.0,11.5,0.0,0.0


In [4]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Suppose you have your dataset as df with target column 'target'
X = df.drop(["Rainfall (mm)", "Date"], axis=1)
y = df["Rainfall (mm)"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build pipeline with scaling + Ridge Regression
pipe = Pipeline([
    ("scaler", StandardScaler()),
    ("ridge", Ridge())
])

# Hyperparameter tuning for alpha
param_grid = {"ridge__alpha": [0.01, 0.1, 1, 10, 100]}
grid = GridSearchCV(pipe, param_grid, cv=5, scoring="r2")
grid.fit(X_train, y_train)

# Best model
best_model = grid.best_estimator_

# Evaluate
train_r2 = r2_score(y_train, best_model.predict(X_train))
test_r2 = r2_score(y_test, best_model.predict(X_test))
train_mse = mean_squared_error(y_train, best_model.predict(X_train))
test_mse = mean_squared_error(y_test, best_model.predict(X_test))
train_mae = mean_absolute_error(y_train, best_model.predict(X_train))
test_mae = mean_absolute_error(y_test, best_model.predict(X_test))

print("Best alpha:", grid.best_params_)
print("Training R²:", train_r2)
print("Testing R²:", test_r2)
print("Training MSE:", train_mse)
print("Testing MSE:", test_mse)
print("Training MAE:", train_mae)
print("Testing MAE:", test_mae)

Best alpha: {'ridge__alpha': 0.01}
Training R²: 0.9999999999460395
Testing R²: 0.9999999999455639
Training MSE: 1.509208219944191e-08
Testing MSE: 1.6813238383892973e-08
Training MAE: 8.459516517285159e-05
Testing MAE: 8.707647002229535e-05
