# Imports

In [1]:
import pandas as pd
from pathlib import Path
from datetime import datetime
import numpy as np
from statsmodels.formula.api import ols
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.pyplot as plt
plt.rcParams.update({'figure.max_open_warning': 0})
import seaborn as sns
import math

# Récupération des données utiles

In [2]:
stat_ws_path = Path('/home/ec2-user/SageMaker/EtudeWindIndex/Models/Model_3/stat_m3.csv')
rmse_ws_path = Path('/home/ec2-user/SageMaker/EtudeWindIndex/Models/Model_3/rmse_m3.csv')

stat_ec_path = Path('/home/ec2-user/SageMaker/EtudeWindIndex/Models/Model_4/stat_m4.csv')
rmse_ec_path = Path('/home/ec2-user/SageMaker/EtudeWindIndex/Models/Model_4/rmse_m4.csv')

In [3]:
#Lecture des données de vitesses de vent
stat_ws = pd.read_csv(stat_ws_path, sep=';')

#Indexation par projet
stat_ws.set_index('project', inplace=True)

#Visualisation
stat_ws.sample(5)

Unnamed: 0_level_0,slope,intercept,rmse,rsquare
project,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
SAUV,784610.28,-366274.0,300231.0,0.81
VEUL,530040.81,-1870864.0,159217.0,0.94
BARB,624724.99,-1635875.0,321142.0,0.71
RIOL,214653.24,-580647.0,67036.0,0.89
JONC,617110.24,-896403.0,254822.0,0.82


In [4]:
#Lecture des données de vitesses de vent
rmse_ws = pd.read_csv(rmse_ws_path, sep=';')

#Indexation par projet et période
rmse_ws.set_index(['project', 'year', 'month'], inplace=True)

#Visualisation
rmse_ws.sample(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,rmse_windspeed
project,year,month,Unnamed: 3_level_1
STSI,2015,1,256789.2
TRFR,2015,11,240447.8
VLSQ,2009,3,1140521.0
PDFE,2014,4,19998.04
LOPV,2021,7,47618.86


In [5]:
#Lecture des données d'énergie
stat_ec = pd.read_csv(stat_ec_path, sep=';')

#Indexation par projet
stat_ec.set_index('project', inplace=True)

#Visualisation
stat_ec.sample(5)

Unnamed: 0_level_0,slope,intercept,rmse,rsquare
project,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
LOU2,3.25,147571.0,63426.0,0.62
CASH,22.48,761685.0,228433.0,0.87
BRIY,12.62,84577.0,73753.0,0.97
FREY,21.46,1287935.0,270037.0,0.74
BDBS,50.67,1249625.0,432742.0,0.91


In [6]:
#Lecture des données d'énergie
rmse_ec = pd.read_csv(rmse_ec_path, sep=';')

#Indexation par projet et période
rmse_ec.set_index(['project', 'year', 'month'], inplace=True)

#Visualisation
rmse_ec.sample(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,rmse_energy_content
project,year,month,Unnamed: 3_level_1
LUCO,2013,9,46875.009714
FITO,2018,8,58817.808491
LAPI,2011,6,201817.051946
FIEN,2017,5,26922.92103
CLIT,2014,3,55388.128071


# Comparaison de la rsme des fit linéaires entre les deux méthodes

In [7]:
#Comparaison entre l'energie content et la vitesse de vent
comp_rmse = pd.DataFrame(columns = ['project', 'ec_vs_ws'])

for project in stat_ws.index.get_level_values('project').unique() & stat_ec.index.get_level_values('project').unique() :
    
    #Positif quand c'est en faveur de l'energie
    ec_vs_ws = (stat_ws.loc[project].rmse-stat_ec.loc[project].rmse)/abs(max(stat_ws.loc[project].rmse,stat_ec.loc[project].rmse))
    comp_rmse = comp_rmse.append({'project': project,'ec_vs_ws': ec_vs_ws}, ignore_index=True)

In [8]:
#Indexation par projet
comp_rmse.set_index('project', inplace=True)

#Visualisation
comp_rmse.sample(5)

Unnamed: 0_level_0,ec_vs_ws
project,Unnamed: 1_level_1
PLES,0.328959
LUC2,0.071835
CASH,0.044845
PAAN,0.377396
OUPI,0.084525


In [9]:
ec_vs_ws = Path('/home/ec2-user/SageMaker/EtudeWindIndex/Models/Benchmark/ec_vs_ws.csv')

In [10]:
#Sauvegarde
comp_rmse.to_csv(ec_vs_ws, sep=';')

In [11]:
#Pourcentage de l'erreur relative
print("L'énergie content est égale ou meilleure dans",
      "{0:.2f}%".format((1-comp_rmse['ec_vs_ws'].loc[comp_rmse['ec_vs_ws']<0].count()/comp_rmse['ec_vs_ws'].count())*100), "des cas")

L'énergie content est égale ou meilleure dans 63.44% des cas
