In [None]:
#Importing Libraries
import numpy as np
from numpy import mean, absolute
import pandas as pd
import matplotlib.pyplot as pl

#Importing libraries for pre-processing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import log_loss

#Importing libraries for Regressors
from sklearn.svm import SVR
from sklearn.metrics import make_scorer


In [None]:
# Load the ForestFires data set
forestfire_data = pd.read_csv('./forestfires.csv')

# Encode the data
encodeData = LabelEncoder()

#Convert months to integer using label encoder
encodeData.fit(forestfire_data['month'])
forestfire_data['encoded_months'] = encodeData.transform(forestfire_data['month'])

#Convert days to integer using label encoder
encodeData.fit(forestfire_data['day'])
forestfire_data['encoded_days'] = encodeData.transform(forestfire_data['day'])

#FEATURE SELECTION
#select top 3 features based on highest co-relation
featureSelected = ['temp', 'FFMC', 'DMC']
dataX = forestfire_data[featureSelected]

#Select area into Y
dataY = forestfire_data['area']

#split the dataset into training data (80%) and testing data (20%) using sklearn's train_test_split method
trainingData, testingData, trainingArea, testingArea = train_test_split(dataX, dataY, test_size = 0.2)

#Reshape the trainingArea
trainingArea = trainingArea.values.reshape(trainingArea.size, 1)

In [None]:
#Define function for calculation root mean squared error
def root_mean_squared_error(givenValues, predictedValues):
    return np.sqrt(mean_squared_error(givenValues, predictedValues))

#Define function for calculating mean absolute deviation
def mean_absolute_deviation(predictedValues):
    return np.mean(np.absolute(predictedValues - mean(predictedValues)))

#Define function for calulating negative log likelihood
def negative_log_likelihood(givenValues, predictedValues):
    givenValues_length = givenValues.shape[0]
    
    m = - (givenValues_length / 2) * np.log(2 * np.pi * (np.var(predictedValues)))
    n = 0
    
    for iterations in range(givenValues_length):
        n = n + (givenValues[iterations] - predictedValues[iterations]) ** 2
    n = n - n / (2 * (np.var(predictedValues)))
    
    return (m + n)

In [None]:
#Implementing SUPPORT VECTOR REGRESSION

#Initialize scaler
scaler_SVR = StandardScaler()

#Define parameter grid for support vector regressor
parameterGrid_SVR = {'C': [0.01, 0.1, 1, 10], 'epsilon': [10, 1, 0.1, 0.01, 0.001, 0.0001], 'kernel': ['rbf']}

#Define scorer
scorerRMSE = make_scorer(root_mean_squared_error, greater_is_better = False)

#Implement grid search cross validation using 10 folds
SVR_grid = GridSearchCV(SVR(), parameterGrid_SVR, refit = True, verbose = 0, scoring = scorerRMSE, cv = 10)
SVR_grid.fit(scaler_SVR.fit_transform(trainingData), scaler_SVR.fit_transform(trainingArea))

#Predict the values for areas
predictedValues = SVR_grid.predict(testingData)

#Calculate Root Mean Squared Error
SVR_RMSE = root_mean_squared_error(testingArea, predictedValues)

#Calculate Mean Absolute Deviation
SVR_MAD = mean_absolute_deviation(predictedValues)

#Calcute negative log likelihood
SVR_NLL= negative_log_likelihood(testingArea.values, predictedValues)

print('Root Mean Squared Error for Support Vector Regression: ', SVR_RMSE)
print('Mean Absolute Deviation for Support Vector Regression: ', SVR_MAD)
print('Negative Log Likelihood for Support Vector Regression: ', SVR_NLL)