# Imports

In [2]:
import pandas as pd
from pathlib import Path
from datetime import datetime
import numpy as np
from statsmodels.formula.api import ols
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.pyplot as plt
plt.rcParams.update({'figure.max_open_warning': 0})
import seaborn as sns
import math

# Récupération des données Windga utiles

In [3]:
windga_path = Path('/home/ec2-user/SageMaker/EtudeWindIndex/Data/Train_Test/rmse_windga.csv')

In [4]:
#Lecture des données WindGa
rmse_windga = pd.read_csv(windga_path, sep=';')

#Indexation par projet et période
rmse_windga.set_index(['project', 'year', 'month'], inplace=True)

#Visualisation
rmse_windga.sample(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,train,prod_100p
project,year,month,Unnamed: 3_level_1,Unnamed: 4_level_1
"BTS1, BTS2",2015,4,1,3881895.0
CASH,2017,12,1,3861828.0
PCR1,2021,2,1,1668025.0
LERO,2017,12,0,918100.5
BAMB,2019,8,1,


# Récupération des données utiles : vitesses de vent

In [5]:
stat_ws_path = Path('/home/ec2-user/SageMaker/EtudeWindIndex/Models/Model_3/stat_m3.csv')
rmse_ws_path = Path('/home/ec2-user/SageMaker/EtudeWindIndex/Models/Model_3/rmse_m3.csv')

In [6]:
#Lecture des données de vitesses de vent
stat_ws = pd.read_csv(stat_ws_path, sep=';')

#Indexation par projet
stat_ws.set_index('project', inplace=True)

#Visualisation
stat_ws.sample(5)

Unnamed: 0_level_0,slope,intercept,rmse,rsquare
project,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
FIEN,651160.26,-2411989.0,224690.0,0.91
HENI,415424.51,-1573531.0,78113.0,0.97
ERIZ,650976.69,-2209445.0,139586.0,0.96
CLAM,686130.61,-2529068.0,158762.0,0.94
CAMB,703976.17,-2093017.0,172299.0,0.96


In [7]:
#Lecture des données de vitesses de vent
rmse_ws = pd.read_csv(rmse_ws_path, sep=';')

#Indexation par projet et période
rmse_ws.set_index(['project', 'year', 'month'], inplace=True)

#Visualisation
rmse_ws.sample(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,train,rmse_windspeed
project,year,month,Unnamed: 3_level_1,Unnamed: 4_level_1
VISE,2014,11,1,9089.689968
CONI,2014,12,1,455350.528908
VLSQ,2017,7,1,232174.115288
STMB,2009,3,1,6511.045222
NIED,2013,5,1,36705.93424


# Récupération des données utiles : énergie

In [8]:
stat_ec_path = Path('/home/ec2-user/SageMaker/EtudeWindIndex/Models/Model_4/stat_m4.csv')
rmse_ec_path = Path('/home/ec2-user/SageMaker/EtudeWindIndex/Models/Model_4/rmse_m4.csv')

In [19]:
#Lecture des données d'énergie
stat_ec = pd.read_csv(stat_ec_path, sep=';')

#Indexation par projet
stat_ec.set_index('project', inplace=True)

#Visualisation
stat_ec.sample(5)

Unnamed: 0_level_0,slope,intercept,rmse,rsquare
project,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
SAUV,22.62,2602379.0,340964.0,0.75
JONC,22.02,1004636.0,291381.0,0.83
BARB,22.08,463457.0,332189.0,0.68
PCR1,10.32,194670.0,282191.0,0.58
GAR1,18.22,254142.0,95841.0,0.98


In [10]:
#Lecture des données d'énergie
rmse_ec = pd.read_csv(rmse_ec_path, sep=';')

#Indexation par projet et période
rmse_ec.set_index(['project', 'year', 'month'], inplace=True)

#Visualisation
rmse_ec.sample(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,train,rmse_energy_content
project,year,month,Unnamed: 3_level_1,Unnamed: 4_level_1
PCR1,2020,2,0,313825.261914
RODU,2015,4,0,86734.574413
COUR,2019,2,1,6856.446382
VLSQ,2011,3,1,89133.701606
STSI,2020,6,1,6475.467616


# Benchmark

In [11]:
#Création du dataframe de benchmark
benchmark = rmse_windga.merge(rmse_ws['rmse_windspeed'], left_index=True, right_index=True, copy=False)

In [17]:
#Mise en forme
benchmark = benchmark.applymap(int)

#Visualisation
benchmark.sample(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,train,prod_100p,rmse_windspeed,rmse_energy_content
project,year,month,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
LEMO,2018,4,0,2268453,20335,45837
CLAN,2020,1,1,4089565,120150,104369
CLAM,2014,6,1,1192581,52069,131666
COUR,2012,3,1,1494451,34052,110514
BAMB,2014,9,1,531365,215920,176190


In [13]:
#Création du dataframe de benchmark
benchmark = benchmark.merge(rmse_ec['rmse_energy_content'], left_index=True, right_index=True, copy=False)

In [14]:
#Mise en forme
benchmark = benchmark.applymap(int)

#Visualisation
benchmark.sample(5).style.format('{:,}')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,train,prod_100p,rmse_windspeed,rmse_energy_content
project,year,month,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
LUCO,2014,3,1,3081075,93252,51951
FREY,2012,2,1,5513287,106593,2773
GAR1,2017,12,1,2793155,104475,8689
LEMO,2018,7,1,1214573,418205,71889
ESPS,2021,6,0,1451443,228926,427369
