This notebooks aims at exploring the dataset "bilan-electrique-demi-heure" from enedis

# Import packages

In [2]:
%run ./utils.ipynb

In [3]:
import requests
import pandas as pd
import os

# Import data

Request

In [4]:
if not(os.path.isfile("data/bilan-electrique.csv")):
    params = {
        "dataset" : "bilan-electrique-demi-heure",
        "rows" : 10000,
        "sort" : "horodate"
    }

    url = "https://data.enedis.fr/api/records/1.0/search/"

    req = requests.get(url, params=params).json() # http request on open data

In [5]:
if not(os.path.isfile("data/bilan-electrique.csv")):
    df = pd.json_normalize(req, record_path=["records"])
    column_rename_dict = {name : name[7:] for name in df.columns if name[:6] == "fields"}
    df.rename(columns=column_rename_dict, inplace=True)
    df.to_csv("data/bilan-electrique.csv")

In [6]:
df = pd.read_csv("data/bilan-electrique.csv")

In [7]:
df.head()

Unnamed: 0.1,Unnamed: 0,datasetid,recordid,record_timestamp,injection_rte,pertes,consommation_hta,soutirage_rte,production_profilee_photovoltaique,temperature_normale_lissee,...,consommation_profilee,production_profilee_aut,consommation_totale,mois,consommation_profilee_ent_hta,pseudo_rayonnement,soutirage_vers_autres_grd,horodate,consommation_telerelevee,production_photovoltaique
0,0,bilan-electrique-demi-heure,94c6ef383444e3d336761877efc53a0a455888cf,2023-05-09T01:00:00.633Z,32558140000.0,2211556000.0,9860801000.0,1170663000.0,6265.0,13.5,...,19628790000.0,1826731.0,32844720000.0,4,11570966.0,31,294942758.0,2023-04-28T21:30:00+00:00,13215930000.0,78397.0
1,1,bilan-electrique-demi-heure,3ba5f9d59a93d66534cf2fef1c0b6e256a7cda95,2023-05-09T01:00:00.633Z,32772190000.0,2224267000.0,9993632000.0,1327702000.0,3651.0,13.5,...,19645670000.0,1826731.0,33121040000.0,4,11960506.0,31,285205292.0,2023-04-28T21:00:00+00:00,13475370000.0,134228.0
2,2,bilan-electrique-demi-heure,a70daff1cc97eb677d82d496fe18a990ef26159a,2023-05-09T01:00:00.633Z,33118290000.0,2250411000.0,10108370000.0,1411545000.0,4222.0,13.6,...,19787230000.0,1826731.0,33514680000.0,4,12255523.0,29,279959566.0,2023-04-28T20:30:00+00:00,13727440000.0,269010.0
3,3,bilan-electrique-demi-heure,61e814b8cbfda2545a4a4e431b5d0cf31ca0e467,2023-05-09T01:00:00.633Z,31457110000.0,2096135000.0,10219220000.0,1402528000.0,5118.0,13.7,...,18023820000.0,1826731.0,32007690000.0,4,12755499.0,28,249430250.0,2023-04-28T20:00:00+00:00,13983870000.0,287695.0
4,4,bilan-electrique-demi-heure,3faf170837e6fbfb95b0a1e60897a5fe2af18633,2023-05-09T01:00:00.633Z,31336980000.0,2086463000.0,10223850000.0,1403755000.0,9268.0,13.8,...,17921170000.0,1826731.0,31907430000.0,4,11696870.0,26,248945575.0,2023-04-28T19:30:00+00:00,13986260000.0,232223.0


Preprocess data

In [130]:
columns = ["horodate", "consommation_hta"]

In [131]:
df = df[columns]

df["horodate"] = pd.to_datetime(df["horodate"].apply(lambda x : x[:19]),
               format='%Y-%m-%dT%H:%M:%S')

# Data exploration

Plot time serie

In [132]:
fig = px.line(
    df,
    x="horodate",
    y="consommation_hta",
    title='HTA consumption'
    )
fig.show()

Group data at day level

In [133]:
df["date"] = df["horodate"].dt.date
df_day = df.groupby("date")["consommation_hta"].sum().reset_index()

In [134]:
fig = px.line(
    df_day,
    x="date",
    y="consommation_hta",
    title='HTA consumption'
    )
fig.show()

Plot correlation diagrams

In [135]:
create_corr_plot(df_day["consommation_hta"])

In [136]:
create_corr_plot(df_day["consommation_hta"], plot_pacf=True)