In [3]:
# SVR for Dengue Prediction

# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.ensemble import AdaBoostRegressor

# Importing the dataset
dataset = pd.read_csv('dataset/dataset.csv', header = None)
dataset.columns = dataset.iloc[0] # set the first row as the header
dataset = dataset[1:] # remove the first row
dataset = dataset.drop(columns=['Year']) # remove the year column

# Encoding categorical data
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
dataset['District'] = label_encoder.fit_transform(dataset['District'])

# Handling missing data
dataset = dataset.apply(pd.to_numeric, errors='coerce')
# dataset.ffill(axis = 0, inplace=True)
# dataset.fillna(dataset.mean(), inplace=True)
# dataset.fillna(dataset.mode().iloc[0], inplace=True)
# dataset.fillna(dataset.median(), inplace=True)
dataset.fillna(0, inplace=True)
dataset['Total Cases'] = dataset.iloc[:, 37:49].sum(axis=1)
dataset = dataset.drop(dataset.columns[37:49], axis=1)

# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X = dataset.iloc[:, 0:-1].values # all columns except the last one
Y = dataset.iloc[:, -1].values # the last column
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.20, random_state = 0) # 75% training and 25% test

# Feature Scaling
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import Lasso
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)

# Fitting SVR to the dataset
from sklearn.svm import SVR
regressor = SVR(kernel = 'rbf')
regressor.fit(X_train, Y_train)

# Predicting the Test set results
Y_pred = regressor.predict(X_test)

# Evaluating the Algorithm
from sklearn import metrics
print('Mean Absolute Error:', metrics.mean_absolute_error(Y_test, Y_pred))
print('Mean Squared Error:', metrics.mean_squared_error(Y_test, Y_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(Y_test, Y_pred)))
print('R2 Score:', metrics.r2_score(Y_test, Y_pred))

Mean Absolute Error: 1364.8576471263193
Mean Squared Error: 9520539.747601908
Root Mean Squared Error: 3085.537189469916
R2 Score: -0.14273502160692852
