In [11]:
import calendar
from datetime import timedelta
import numpy as np
import xlsxwriter
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings("ignore")

In [12]:
def get_factor(factor):
    if factor in ['Generation', 'Wind_Speed']:
        return True
    return False

In [13]:
def extract_data(dataframe, factor):
    dataframe = dataframe[['Date', factor]]
    dataframe['Probability'] = [1.0 if x > 0 else 0.0 for x in dataframe[factor]]
    dataframe = dataframe.drop([factor], axis=1)
    dataframe = dataframe.rename(columns={'Probability': factor})
    return dataframe

In [14]:
def add_dates(dataframe, factor, forecast_length):
    end_point = len(dataframe)
    df = pd.DataFrame(index=range(forecast_length), columns=range(2))
    df.columns = ['Date', factor]
    dataframe = dataframe.append(df)
    dataframe = dataframe.reset_index(drop=True)
    x = dataframe.at[end_point - 1, 'Date']
    x = pd.to_datetime(x, format='%Y-%m-%d')
    if get_factor(factor):
        for i in range(forecast_length):
            days_in_month = calendar.monthrange(x.year, x.month)[1]
            x = dataframe.at[dataframe.index[end_point + i], 'Date'] = x + timedelta(days=days_in_month)
    else:
        for i in range(forecast_length):
            dataframe.at[dataframe.index[end_point + i], 'Date'] = x + timedelta(days=i+1)
    dataframe['Date'] = pd.to_datetime(dataframe['Date'], format='%Y-%m-%d')
    dataframe['Month'] = dataframe['Date'].dt.month
    dataframe['Day'] = dataframe['Date'].dt.day
    return dataframe

In [15]:
def find_accuracy(fit, dataframe, new_dataframe, factor, forecast_length):
    df = new_dataframe[['Month', 'Day']]
    prediction = fit.predict(df)

    print(factor)

    if get_factor(factor):
        mape = []
        for x in range(len(dataframe)):
            temp = abs(dataframe.iloc[x][factor] - prediction[x]) / abs(dataframe.iloc[x][factor])
            if temp < float('inf'):
                mape.append(temp)
        mape = np.mean(mape)
        print('Accuracy:', (100 - (mape * 100)).__round__(2))
    else:
        print('Accuracy:', (accuracy_score(dataframe[factor], prediction[:-forecast_length].round()) * 100).__round__(2))

    print('---------------')

In [16]:
def randomForest(dataframe, factor, forecast_length):
    new_dataframe = add_dates(dataframe, factor, forecast_length)
    new_dataframe = new_dataframe.reset_index(drop=True)

    end_point = len(dataframe)
    train = new_dataframe.loc[:end_point - 1, :]
    train_x = train[['Month', 'Day']]
    train_y = train[factor]

    rfr = RandomForestRegressor(n_estimators=100, random_state=1)
    fit = rfr.fit(train_x, train_y)

    find_accuracy(fit, dataframe, new_dataframe, factor, forecast_length)

    forecast_values = []
    input_data = new_dataframe.loc[end_point:, ~new_dataframe.columns.isin(['Date', factor])]
    prediction = fit.predict(input_data)

    for i in range(end_point):
        forecast_values.append(np.NAN)
    for i in range(forecast_length):
        forecast_values.append(prediction[i])

    new_dataframe['forecast_'+factor] = forecast_values
    new_dataframe = new_dataframe.drop(columns=['Day', 'Month'])
    return new_dataframe

In [17]:
def save_excel(excel_data, sheet_name, loc, folder):
    excel_data = excel_data.fillna(' ')
    workbook = xlsxwriter.Workbook(folder+loc+'_'+sheet_name+'.xlsx')
    worksheet = workbook.add_worksheet(loc+'_'+sheet_name)
    bold = workbook.add_format({'bold': 1})
    
    headings = list(excel_data.columns)
    worksheet.write_row('A1', headings, bold)
    date_format = workbook.add_format({'num_format': 'dd-mm-yyyy'})
    
    worksheet.write_column('A2', list(excel_data[column]), date_format)
    
    for i in range(1, len(headings)):
        worksheet.write_column(chr(ord('A')+i)+'2', list(excel_data[headings[i]]))
    
    #headings = ['Date', sheet_name, 'forecast_'+sheet_name]
    #worksheet.write_row('A1', headings, bold)
    #date_format = workbook.add_format({'num_format': 'dd-mm-yyyy'})
    #worksheet.write_column('A2', list(excel_data['Date']), date_format)
    #worksheet.write_column('B2', list(excel_data[sheet_name]))
    #worksheet.write_column('C2', list(excel_data['forecast_'+sheet_name]))
    workbook.close()

In [18]:
def save_plots(excel_data, factor, forecast_length, loc, folder):
    excel_data = excel_data.fillna(0.0)
    plt.figure(figsize=(14, 4))
    if get_factor(factor):
        plt.plot(excel_data['Date'][:-forecast_length], excel_data[factor][:-forecast_length], color='blue')
        plt.plot(excel_data['Date'][-forecast_length:], excel_data['forecast_' + factor][-forecast_length:], color='red')
    else:
        plt.bar(excel_data['Date'], excel_data[factor], color='blue')
        plt.bar(excel_data['Date'], excel_data['forecast_'+factor], color='red')
    plt.xlabel('Date')
    plt.ylabel(factor)
    plt.legend(['Actual', 'Forecast'])
    plt.suptitle(loc + '_' + factor)
    plt.savefig(folder+'{}_{}'.format(loc, factor) + '.png', bbox_inches='tight', pad_inches=0)
    plt.show()
    plt.close("all")