In [None]:
import os
import warnings
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import datetime as dt
import math
import scipy.stats

from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from math import sqrt


import colorama
from colorama import Fore, Style

import seaborn as sns

from sklearn.svm import SVR
from sklearn.preprocessing import MinMaxScaler

In [None]:
df = pd.read_csv('Primary total energy consumption.csv',delimiter=';')

df.head()

## Data Preprocessing and Handle Missings

In [None]:
#reorganize data columns
df1 = pd.melt(df, id_vars=["Country"], var_name="Year", value_name="Value")
df1.head()

In [None]:
#Eliminate the countries which are not in European Union
values = ['Iceland','Norway','Switzerland','Turkey','Ukraine', 'United Kingdom', 'Other Europe']
df1 = df1[df1.Country.isin(values) == False]
df1

In [None]:
df1.dtypes

In [None]:
df1.head()

In [None]:
df1.isnull().sum() #sum of null data

In [None]:
df1["Value"] =[float(str(i).replace(',','.')) for i in df1["Value"]] 

In [None]:
# fill null data with the mean of each group
df1['Value']= df1.groupby('Country')['Value'].apply(lambda x: x.fillna(x.mean()))


In [None]:
#round up after comma
df1['Value']=round(df1['Value'],2) 

In [None]:
df1.head()

## Training and Testing the SVR model - Accuracy Metrics for Time Series Forecast

In [None]:
country_name=df1['Country'].unique()
print(country_name)

In [None]:
#Create training and testing datasets
warnings.filterwarnings('ignore')

dict_org = {}
dict_pred = {}
country_accuracy = {}

for name in range(len(country_name)):
    X = df1[df1['Country'] == country_name[name]][['Year','Value']]
    X["Value"] =[float(str(i).replace(',','.')) for i in X["Value"]] 
    X['Value']= X['Value'].fillna(X['Value'].mean())
    X['Value']=round(X['Value'],2) 
    X['Year'] = pd.to_datetime(X['Year']) #convert Year column to datetime
    X = X.set_index(X.columns[0])
    size = int(len(X) * 0.70)
    train, test = X[0:size], X[size:len(X)]

    #print(train,test)
    #print(train.shape,test.shape)
    # prepare data for standardization
    train_values = train['Value'].values
    test_values = test['Value'].values
    #print(train_values)
    train_values = train_values.reshape((len(train_values), 1))
    test_values = test_values.reshape((len(test_values), 1))
    #print(train_values)
    #print(test_values)
    scaler = MinMaxScaler()
    train_data = scaler.fit_transform(train_values) #Scale the training data to be in the range (0, 1)
    test_data = scaler.transform(test_values) #scale the testing data
    #print(train_data.shape)
    #print(test_data.shape)
    
    #Create data with time-steps
    timesteps=5
    train_data_timesteps=np.array([[j for j in train_data[i:i+timesteps]] for i in range(0,len(train_data)-timesteps+1)])[:,:,0]
    #print(train_data_timesteps.shape)
    
    test_data_timesteps=np.array([[j for j in test_data[i:i+timesteps]] for i in range(0,len(test_data)-timesteps+1)])[:,:,0]

    x_train, y_train = train_data_timesteps[:,:timesteps-1],train_data_timesteps[:,[timesteps-1]]
    x_test, y_test = test_data_timesteps[:,:timesteps-1],test_data_timesteps[:,[timesteps-1]]
    #print(x_train.shape, y_train.shape)
    #print(x_test.shape, y_test.shape)
    
    ## Implement SVR
    model = SVR(kernel='rbf',gamma=0.5, C=10, epsilon = 0.05)
    
    ## Prepare the model for the training data by calling the fit() function
    model.fit(x_train, y_train[:,0])
    SVR(C=10, cache_size=200, coef0=0.0, degree=3, epsilon=0.05, gamma=0.5,
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)
    
    ## Make model predictions
    y_train_pred = model.predict(x_train).reshape(-1,1)
    y_test_pred = model.predict(x_test).reshape(-1,1)
    #print(y_train_pred.shape, y_test_pred.shape)
    
    ##Evaluate your model 
    # Scaling the predictions
    y_train_pred = scaler.inverse_transform(y_train_pred)
    y_test_pred = scaler.inverse_transform(y_test_pred)

    #print(len(y_train_pred), len(y_test_pred))
    
    # Scaling the original values
    y_train = scaler.inverse_transform(y_train)
    y_test = scaler.inverse_transform(y_test)

    #print(len(y_train), len(y_test))
    
    ## Check model performance on training and testing data
    
    train_timestamps = train.index[timesteps-1:]
    
    test_timestamps = test.index[timesteps-1:]
    #print(train_timestamps)
    #print(test_timestamps)

    #print(len(train_timestamps), len(test_timestamps))
    plt.figure(figsize=(25,6))
    plt.plot(train_timestamps, y_train, color = 'red', linewidth=2.0, alpha = 0.6)
    plt.plot(train_timestamps, y_train_pred, color = 'blue', linewidth=0.8)
    plt.legend(['Actual','Predicted'])
    plt.xlabel('Year')
    plt.title(country_name[name])
    plt.show()


    #Print model checking for training data
    print('MAPE for training data: {}'.format(country_name[name]), np.mean(np.abs((y_train - y_train_pred) / y_train))*100, '%')
    
    # Plot the predictions for testing data
    plt.figure(figsize=(10,3))
    plt.plot(test_timestamps, y_test, color = 'red', linewidth=2.0, alpha = 0.4)
    plt.plot(test_timestamps, y_test_pred, color = 'blue', linewidth=0.8)
    plt.legend(['Actual','Predicted'])
    plt.xlabel(country_name[name])
    plt.show()
    
    #Print model checking for testing data
    mse= mean_squared_error(y_test, y_test_pred)
    mae=mean_absolute_error(y_test, y_test_pred)
    rmse_test=sqrt(mse) #RMSE
    mape_test=np.mean(np.abs(y_test_pred-y_test)/np.abs(y_test)) #Mean absolute percentage error
    print('MAE for test data {} :'.format(country_name[name]),mae) 
    print('MAPE for test data  {} :'.format(country_name[name]),mape_test)
    print('RMSE for test data {} :'.format(country_name[name]),rmse_test)
    print('MSE for test data {} :'.format(country_name[name]),mse)
   
        
    
   
    
        