In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import TimeSeriesSplit


In [2]:
# df = pd.read_csv("/content/drive/MyDrive/Kathmandu-Precipitation/data/06-season-masking-on-not-transformed-data.csv")
# df.drop(columns=["solarenergy",
#                  "orographic_precipitation_potential",
#                  "urban_heat_island_effect_refined",
#                  "valley_temperature_inversion_index",
#                  "windspeed","winddir","season",
#                  "winddir_cos","winddir_sin",'dew',"month_cos",
#                  "month_sin","temp"],inplace=True)
df = pd.read_csv("/content/drive/MyDrive/Kathmandu-Precipitation/data/Outlier-removed-dataset.csv")

In [3]:
features = [ i for i in df.columns if i not in ["precipitation","datetime"]]
target = ["precipitation"]
X = df[features]
y = df[target]

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42,shuffle=False)


In [5]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [6]:
param_grid = {
    'kernel': [ 'rbf', 'poly'],  # Kernel types
    'C': [0.1, 1, 10, 100],              # Regularization parameter
    'gamma': ['scale', 'auto'],  # Kernel coefficient
    'epsilon': [ 0.1, 0.5, 1],      # Epsilon for the margin of tolerance
    'degree': [2, 3, 4]                  # Degree for polynomial kernel
}

In [7]:
svr_model = SVR()
tscv = TimeSeriesSplit(n_splits=7)


In [8]:
grid_search = GridSearchCV(
    estimator=svr_model,
    param_grid=param_grid,
    scoring='neg_mean_squared_error',
    cv=tscv,
    verbose=2,
    n_jobs=-1
)

In [9]:
grid_search.fit(X_train_scaled, y_train)


Fitting 7 folds for each of 144 candidates, totalling 1008 fits


  y = column_or_1d(y, warn=True)


In [10]:
best_svr_model = grid_search.best_estimator_
best_params = grid_search.best_params_
print("Best Parameters:", best_params)


Best Parameters: {'C': 10, 'degree': 2, 'epsilon': 1, 'gamma': 'auto', 'kernel': 'rbf'}


In [11]:
# Step 12: Evaluate the best model on the test set
y_pred = best_svr_model.predict(X_test_scaled)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)
print(f"Root Mean Squared Error (RMSE) on Test Set: {rmse}")
print(f"Mean Absolute Error (MAE) on Test Set: {mae}")

# Step 13: Predict precipitation for the given row
row_to_predict = scaler.transform(X.iloc[[0]])  # Scale the single row
predicted_precipitation = best_svr_model.predict(row_to_predict)
print(f"Predicted Precipitation: {predicted_precipitation[0]}")

Root Mean Squared Error (RMSE) on Test Set: 4.2791360079567236
Mean Absolute Error (MAE) on Test Set: 2.5729384064211818
Predicted Precipitation: 1.0003549578920974
