In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


In [3]:
file_path = '/content/drive/MyDrive/Dengue Dataset/dataset.csv'  #paste korba file path
data = pd.read_csv(file_path)  #ei function ta direct link theke data niye ney dataframe e


In [4]:
data.fillna(data.mean(numeric_only=True), inplace=True)  #missing value fill kore mean koray

label_encoder = LabelEncoder()
data['District'] = label_encoder.fit_transform(data['District'])   #district er names encode korbe

X = data.drop(columns=['January', 'February', 'March', 'April', 'May',
                       'June', 'July', 'August', 'September', 'October',
                       'November', 'December'])    #features select korlam output drop dia
Y = data[['January', 'February', 'March', 'April', 'May',
          'June', 'July', 'August', 'September', 'October',
          'November', 'December']]   #output select korlam

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-Test Split
X_train, X_test, Y_train, Y_test = train_test_split(X_scaled, Y, test_size=0.2, random_state=42)     #0.2 means 20% for test data


In [5]:
models = {
    "Linear Regression": LinearRegression(),
    "Support Vector Regressor": SVR(),
    "Random Forest Regressor": RandomForestRegressor(n_estimators=10, random_state=42),
    "Gradient Boosting Regressor": GradientBoostingRegressor(n_estimators=10, random_state=42),
    "XGBoost Regressor": XGBRegressor(n_estimators=10, random_state=42)
}

In [6]:

results = []

for month in Y.columns:
    for name, model in models.items():
        model.fit(X_train, Y_train[month])
        Y_pred = model.predict(X_test)

        mae = mean_absolute_error(Y_test[month], Y_pred)
        mse = mean_squared_error(Y_test[month], Y_pred)
        r2 = r2_score(Y_test[month], Y_pred)

        results.append({
            "Month": month,
            "Model": name,
            "MAE": mae,
            "MSE": mse,
            "R-squared": r2
        })


In [7]:
results_df = pd.DataFrame(results)
print("Model Performance Results:")
print(results_df)

Model Performance Results:
        Month                        Model          MAE           MSE  \
0     January            Linear Regression    12.301158  1.076428e+03   
1     January     Support Vector Regressor     7.804389  1.088472e+03   
2     January      Random Forest Regressor    12.076120  1.443932e+03   
3     January  Gradient Boosting Regressor    12.052738  1.568340e+03   
4     January            XGBoost Regressor    10.403070  1.117107e+03   
5    February            Linear Regression    11.380461  4.775748e+02   
6    February     Support Vector Regressor     6.000619  4.468382e+02   
7    February      Random Forest Regressor     7.085264  2.805538e+02   
8    February  Gradient Boosting Regressor     8.192282  4.351816e+02   
9    February            XGBoost Regressor     6.536248  4.125640e+02   
10      March            Linear Regression    13.261986  6.506695e+02   
11      March     Support Vector Regressor     6.010194  5.807261e+02   
12      March      Rando