# Imports

In [14]:
import pandas as pd
from pathlib import Path
from datetime import datetime
import numpy as np
from statsmodels.formula.api import ols
from matplotlib.backends.backend_pdf import PdfPages
import matplotlib.pyplot as plt
plt.rcParams.update({'figure.max_open_warning': 0})
import seaborn as sns
import math

# Récupération des données Windga utiles

In [15]:
windga_path = Path('/home/ec2-user/SageMaker/EtudeWindIndex/Data/Train_Test/rmse_windga.csv')

In [16]:
#Lecture des données WindGa
rmse_windga = pd.read_csv(windga_path, sep=';')

#Indexation par projet et période
rmse_windga.set_index(['project', 'year', 'month'], inplace=True)

#Visualisation
rmse_windga.sample(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,train,prod_100p
project,year,month,Unnamed: 3_level_1,Unnamed: 4_level_1
BAMB,2012,11,1,1463464.0
RAM2,2021,9,1,1884611.0
SAUV,2021,10,0,4801524.0
SAPN,2015,12,0,1760493.0
NOUR,2013,5,0,1206485.0


# Récupération des données utiles : vitesses de vent

In [17]:
stat_ws_path = Path('/home/ec2-user/SageMaker/EtudeWindIndex/Models/Model_3/stat_m3.csv')
rmse_ws_path = Path('/home/ec2-user/SageMaker/EtudeWindIndex/Models/Model_3/rmse_m3.csv')

In [18]:
#Lecture des données de vitesses de vent
stat_ws = pd.read_csv(stat_ws_path, sep=';')

#Indexation par projet
stat_ws.set_index('project', inplace=True)

#Visualisation
stat_ws.sample(5)

Unnamed: 0_level_0,slope,intercept,rmse,rsquare
project,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
RODU,256405.25,-935182.0,93256.0,0.88
NIED,719210.34,-2422268.0,155270.0,0.95
LANE,501893.43,-1644710.0,200185.0,0.86
PDFE,530352.98,-1561030.0,100219.0,0.96
SAMI,461820.65,-1735675.0,119087.0,0.93


In [19]:
#Lecture des données de vitesses de vent
rmse_ws = pd.read_csv(rmse_ws_path, sep=';')

#Indexation par projet et période
rmse_ws.set_index(['project', 'year', 'month'], inplace=True)

#Visualisation
rmse_ws.sample(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,train,rmse_windspeed
project,year,month,Unnamed: 3_level_1,Unnamed: 4_level_1
CONI,2021,2,1,652444.3
VLSQ,2012,1,0,1330294.0
CHAB,2018,5,1,414251.6
FREY,2010,9,0,117582.4
MTAR,2019,9,1,574707.9


# Récupération des données utiles : énergie

In [20]:
stat_ec_path = Path('/home/ec2-user/SageMaker/EtudeWindIndex/Models/Model_4/stat_m4.csv')
rmse_ec_path = Path('/home/ec2-user/SageMaker/EtudeWindIndex/Models/Model_4/rmse_m4.csv')

In [21]:
#Lecture des données d'énergie
stat_ec = pd.read_csv(stat_ec_path, sep=';')

#Indexation par projet
stat_ec.set_index('project', inplace=True)

#Visualisation
stat_ec.sample(5)

Unnamed: 0_level_0,slope,intercept,rmse,rsquare
project,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CONI,19.25,1114839.0,279381.0,0.83
AMEL,22.68,424293.0,151469.0,0.96
CDBO,45.65,300893.0,217463.0,0.98
RAM1,25.25,218568.0,143572.0,0.97
TAIL,58.74,2744403.0,872337.0,0.63


In [22]:
#Lecture des données d'énergie
rmse_ec = pd.read_csv(rmse_ec_path, sep=';')

#Indexation par projet et période
rmse_ec.set_index(['project', 'year', 'month'], inplace=True)

#Visualisation
rmse_ec.sample(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,train,rmse_energy_content
project,year,month,Unnamed: 3_level_1,Unnamed: 4_level_1
ALLA,2019,9,1,262002.5
CHAB,2010,6,1,1454116.0
PDFE,2020,3,0,65105.61
LBDF,2011,10,1,235370.7
RAM2,2017,7,0,56154.78


# Benchmark

In [23]:
#Création du dataframe de benchmark
benchmark = rmse_windga.merge(rmse_ws['rmse_windspeed'], left_index=True, right_index=True, copy=False)

In [24]:
#Mise en forme
benchmark = benchmark.applymap(int)

#Visualisation
benchmark.sample(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,train,prod_100p,rmse_windspeed
project,year,month,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
GAR2,2021,6,1,526224,38745
MTL1,2015,6,1,1174249,396322
PDCE,2016,7,1,993795,61229
QVA3,2017,6,0,2359571,279369
ALLA,2015,1,1,3123542,668454


In [25]:
#Création du dataframe de benchmark
benchmark = benchmark.merge(rmse_ec['rmse_energy_content'], left_index=True, right_index=True, copy=False)

In [26]:
#Mise en forme
benchmark = benchmark.applymap(int)

#Visualisation
benchmark.sample(5).style.format('{:,}')

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,train,prod_100p,rmse_windspeed,rmse_energy_content
project,year,month,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
PAMP,2019,11,1,2129755,688144,737703
MOTG,2020,8,0,1931764,269116,249611
LBDF,2014,2,0,2735832,604824,380355
VANA,2013,8,1,450813,30511,38108
SOUR,2015,1,1,6012763,640731,13947
