In [341]:
import pandas as pd
import requests
import json
import datetime as dt
import os
import plotly as plt
import tensorflow as tf
import keras
import numpy as np
import pyod
from pyod.models.ecod import ECOD

# Generation Data

In [479]:
json_file_path = 'C:/Users/EMC/Documents/doutorado/doctorade/data/loanda/generation_data/'

os.listdir(json_file_path)

['loanda_data_curated_0.json',
 'loanda_data_curated_200000.json',
 'loanda_data_curated_400000.json',
 'loanda_data_curated_600000.json',
 'loanda_data_curated_800000.json']

In [354]:
json_file_path = 'C:/Users/EMC/Documents/doutorado/doctorade/data/loanda/generation_data/'
df_generation = pd.DataFrame()
for data_chunks in os.listdir(json_file_path):
    path = json_file_path + data_chunks
    with open(path, "r") as json_file:
    # Load the JSON data into a Python object.
        json_data = json.load(json_file)
        pd_json_data = pd.DataFrame.from_dict(json_data)
        df_generation = pd.concat([df_generation,pd_json_data])

df = df_generation
# Find None
df = df.replace('nan', None)
# To Datetime
df['E3TimeStamp'] = pd.to_datetime(df['E3TimeStamp'], format = 'mixed')

df_time = df['E3TimeStamp']

df_float = df[['PotenciaAtivaTotal', 'EnergiaTotal_DataLogger',
       'EnergiaDiaria_DataLogger', 'RadiacaoGlobal', 'TempAr', 'TempModulo',
       'VelocidadeVento', 'EnergiaTotal_LoteB', 'EnergiaDiaria_LoteB',
       'EnergiaReativa_LoteB', 'EnergiaTotal_LoteC', 'EnergiaDiaria_LoteC',
       'EnergiaReativa_LoteC', 'EnergiaTotal_LoteD', 'EnergiaDiaria_LoteD',
       'EnergiaReativa_LoteD', 'EnergiaTotal_LoteE', 'EnergiaDiaria_LoteE',
       'EnergiaReativa_LoteE', 'EnergiaTotal_LoteF', 'EnergiaDiaria_LoteF',
       'EnergiaReativa_LoteF']].astype(float)

df = pd.concat([df_time,df_float], axis = 1)

df = df.sort_values(by='E3TimeStamp')

# Criar geração por minuto
try:
    df = df.set_index('E3TimeStamp')
except:
    pass

# Diferenças negativas
df['yB'] = df.EnergiaDiaria_LoteB.diff().shift(-1)
df['yC'] = df.EnergiaDiaria_LoteC.diff().shift(-1)
df['yD'] = df.EnergiaDiaria_LoteD.diff().shift(-1)
df['yE'] = df.EnergiaDiaria_LoteE.diff().shift(-1)
df['yF'] = df.EnergiaDiaria_LoteF.diff().shift(-1)

# Excluir valores nulos pelo Lote F onde temos o início mais tardio.

df = df[~(df['yF'].isna())]

# Weather Data

In [290]:
path = 'C:/Users/EMC/Documents/doutorado/doctorade/data/loanda/weather_data/loanda_weather_curated.json'
with open(path, "r") as json_file:
# Load the JSON data into a Python object.
    json_data = json.load(json_file)
    df_weather = pd.DataFrame.from_dict(json_data)

df_time = df_weather['time_in_iso8601']

df_float = df_weather[[
       'temperature_2m_in_°C', 'relativehumidity_2m_in_%',
       'precipitation_in_mm', 'weathercode_in_wmo code_x', 'cloudcover_in_%',
       'cloudcover_low_in_%', 'cloudcover_mid_in_%', 'cloudcover_high_in_%',
       'windspeed_10m_in_km/h', 'is_day_in_', 'shortwave_radiation_in_W/m²',
       'direct_radiation_in_W/m²', 'diffuse_radiation_in_W/m²',
       'direct_normal_irradiance_in_W/m²', 'terrestrial_radiation_in_W/m²',
       'shortwave_radiation_instant_in_W/m²',
       'direct_radiation_instant_in_W/m²', 'diffuse_radiation_instant_in_W/m²',
       'direct_normal_irradiance_instant_in_W/m²',
       'terrestrial_radiation_instant_in_W/m²',
       'weathercode_in_wmo code_y', 'temperature_2m_max_in_°C',
       'temperature_2m_min_in_°C', 'temperature_2m_mean_in_°C',
       'apparent_temperature_max_in_°C', 'apparent_temperature_min_in_°C',
       'apparent_temperature_mean_in_°C', 'precipitation_sum_in_mm', 'rain_sum_in_mm',
       'precipitation_hours_in_h', 'windspeed_10m_max_in_km/h',
       'windgusts_10m_max_in_km/h', 'winddirection_10m_dominant_in_°',
       'shortwave_radiation_sum_in_MJ/m²', 'et0_fao_evapotranspiration_in_mm',
       'time_of_sunlight_in_iso8601']].astype(float)

df_weather = pd.concat([df_time,df_float], axis = 1)

df_weather = df_weather.sort_values(by='time_in_iso8601')

# Energia gerada por minuto

In [353]:
df_teste = df[(df.index >= '2021-12-09') & (df.index < '2021-12-10')]
plt.plot(x = df_teste.index,y= [df_teste['yC']], data_frame=df_teste, kind='line')

# Verificar correlation entre as variáveis

In [299]:
df[['RadiacaoGlobal', 'TempAr', 'TempModulo',
       'VelocidadeVento', 'yB', 'yC', 'yD', 'yE', 'yF']].corr()

Unnamed: 0,RadiacaoGlobal,TempAr,TempModulo,VelocidadeVento,yB,yC,yD,yE,yF
RadiacaoGlobal,1.0,0.481401,0.847573,0.184938,0.01587,0.015047,0.013716,0.042689,0.043056
TempAr,0.481401,1.0,0.832691,0.080472,-0.018537,-0.018929,-0.019907,0.013538,0.014032
TempModulo,0.847573,0.832691,1.0,0.14464,0.000499,-0.000127,-0.001725,0.03442,0.034676
VelocidadeVento,0.184938,0.080472,0.14464,1.0,0.002381,0.001922,0.00193,0.006861,0.006864
yB,0.01587,-0.018537,0.000499,0.002381,1.0,0.487009,0.491476,0.15726,0.156903
yC,0.015047,-0.018929,-0.000127,0.001922,0.487009,1.0,0.498599,0.190981,0.18816
yD,0.013716,-0.019907,-0.001725,0.00193,0.491476,0.498599,1.0,0.190072,0.182548
yE,0.042689,0.013538,0.03442,0.006861,0.15726,0.190981,0.190072,1.0,0.281885
yF,0.043056,0.014032,0.034676,0.006864,0.156903,0.18816,0.182548,0.281885,1.0


# Anomaly detection

PYOD Unsupervised Outlier Detection Using Empirical Cumulative Distribution Functions <br>
1: Realizar standard deviation <br>
2: Utilizar 10% dos dados totais para treinamento. Aproximadamente 80000 linhas.<br>
3: Aplicar ECOD para uma variável y.<br>
4: Determinar threshold de 6 vezes o std do resultado

In [364]:
df_anomaly[['yB', 'yC', 'yD', 'yE', 'yF']].isna().sum()

yB       0
yC    4130
yD    4130
yE    4130
yF    4130
dtype: int64

In [None]:
for i in df[['yB', 'yC', 'yD', 'yE', 'yF']].columns:
    print(df[i])

In [366]:
import pyod
from pyod.models.ecod import ECOD

df_anomaly = df[['yB', 'yC', 'yD', 'yE', 'yF']].copy()

train_row_number = 80000

for y in df_anomaly.columns:
    df_y = df_anomaly[[y]]

    # Normalize and save the mean and std we get,
    # for normalizing test data.
    training_mean = df_y.mean()
    training_std = df_y.std()
    df_y_normalized = (df_y - training_mean) / training_std

    train = df_y_normalized.iloc[:train_row_number,:]
    test = df_y_normalized.iloc[train_row_number:,:]

    clf = ECOD()
    clf.fit(train)

    # get outlier scores
    y_train_scores = clf.decision_scores_  # raw outlier scores on the train data
    y_test_scores = clf.decision_function(test)  # predict raw outlier scores on test

    # The threshold is 6 times the standard deviation
    results_ecod_test = df_y[[y]].iloc[train_row_number:,:][(y_test_scores > y_test_scores.std()*6)]
    results_ecod_training = df_y[[y]].iloc[:train_row_number,:][(y_train_scores > y_test_scores.std()*6)]

    total_anomalias = df_anomaly.loc[pd.concat([results_ecod_test,results_ecod_training]).index,:]

    df_y.loc[total_anomalias.index,:] = None

    df_anomaly[y] = df_y[y].interpolate()

In [371]:
df_teste = df_anomaly[(df_anomaly.index >= '2022-01-09') & (df_anomaly.index < '2022-01-16')]
plt.plot(x = df_teste.index,y= [df_teste['yB'],df_teste['yC'],df_teste['yD'],df_teste['yE'],df_teste['yF']], data_frame=df_teste, kind='line')

In [377]:
df_anomaly

Unnamed: 0_level_0,yB,yC,yD,yE,yF
E3TimeStamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-12-09 10:36:17.677,0.0,0.0,0.0,0.0,0.0
2021-12-09 10:37:17.190,0.0,0.0,0.0,0.0,0.0
2021-12-09 10:38:17.163,0.0,0.0,0.0,0.0,0.0
2021-12-09 10:39:17.227,0.0,0.0,0.0,0.0,0.0
2021-12-09 10:40:17.247,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...
2023-08-09 22:15:10.417,0.0,0.0,0.0,0.0,0.0
2023-08-09 22:16:10.357,0.0,0.0,0.0,0.0,0.0
2023-08-09 22:17:10.407,0.0,0.0,0.0,0.0,0.0
2023-08-09 22:18:10.380,0.0,0.0,0.0,0.0,0.0


In [381]:
pd.concat([df[['RadiacaoGlobal', 'TempAr', 'TempModulo','VelocidadeVento']],df_anomaly], axis = 1).corr()

Unnamed: 0,RadiacaoGlobal,TempAr,TempModulo,VelocidadeVento,yB,yC,yD,yE,yF
RadiacaoGlobal,1.0,0.481401,0.847573,0.184938,0.910588,0.906037,0.870054,0.91212,0.918641
TempAr,0.481401,1.0,0.832691,0.080472,0.428765,0.431099,0.385988,0.427425,0.435159
TempModulo,0.847573,0.832691,1.0,0.14464,0.791286,0.789195,0.734306,0.786779,0.787899
VelocidadeVento,0.184938,0.080472,0.14464,1.0,0.141015,0.133103,0.152913,0.152212,0.152998
yB,0.910588,0.428765,0.791286,0.141015,1.0,0.959665,0.907383,0.953056,0.941619
yC,0.906037,0.431099,0.789195,0.133103,0.959665,1.0,0.898639,0.945756,0.9376
yD,0.870054,0.385988,0.734306,0.152913,0.907383,0.898639,1.0,0.902271,0.903521
yE,0.91212,0.427425,0.786779,0.152212,0.953056,0.945756,0.902271,1.0,0.945724
yF,0.918641,0.435159,0.787899,0.152998,0.941619,0.9376,0.903521,0.945724,1.0


In [375]:
df[['RadiacaoGlobal', 'TempAr', 'TempModulo','VelocidadeVento', 'yB', 'yC', 'yD', 'yE', 'yF']].corr()

Unnamed: 0,RadiacaoGlobal,TempAr,TempModulo,VelocidadeVento,yB,yC,yD,yE,yF
RadiacaoGlobal,1.0,0.481401,0.847573,0.184938,0.01587,0.015047,0.013716,0.042689,0.043056
TempAr,0.481401,1.0,0.832691,0.080472,-0.018537,-0.018929,-0.019907,0.013538,0.014032
TempModulo,0.847573,0.832691,1.0,0.14464,0.000499,-0.000127,-0.001725,0.03442,0.034676
VelocidadeVento,0.184938,0.080472,0.14464,1.0,0.002381,0.001922,0.00193,0.006861,0.006864
yB,0.01587,-0.018537,0.000499,0.002381,1.0,0.487009,0.491476,0.15726,0.156903
yC,0.015047,-0.018929,-0.000127,0.001922,0.487009,1.0,0.498599,0.190981,0.18816
yD,0.013716,-0.019907,-0.001725,0.00193,0.491476,0.498599,1.0,0.190072,0.182548
yE,0.042689,0.013538,0.03442,0.006861,0.15726,0.190981,0.190072,1.0,0.281885
yF,0.043056,0.014032,0.034676,0.006864,0.156903,0.18816,0.182548,0.281885,1.0


# Criar granularidade de HORA

In [5]:
df_minutes = df[['RadiacaoGlobal','yB','yC', 'yD', 'yE', 'yF']]
df_minutes = df_minutes.resample('H').sum()
df_minutes

Unnamed: 0_level_0,RadiacaoGlobal,yB,yC,yD,yE,yF
E3TimeStamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-08-04 10:00:00,0.0,0.0,0.0,0.0,0.0,0.0
2021-08-04 11:00:00,0.0,0.0,0.0,0.0,0.0,0.0
2021-08-04 12:00:00,0.0,0.0,0.0,0.0,0.0,0.0
2021-08-04 13:00:00,0.0,0.0,0.0,0.0,0.0,0.0
2021-08-04 14:00:00,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...
2023-08-09 18:00:00,-110.0,2.1,2.2,2.3,2.2,2.4
2023-08-09 19:00:00,-422.0,0.0,0.0,0.0,0.0,0.0
2023-08-09 20:00:00,-269.0,0.0,0.0,0.0,0.0,0.0
2023-08-09 21:00:00,-255.0,0.0,0.0,0.0,0.0,0.0


In [6]:
df_hour = df[['TempAr', 'TempModulo',
       'VelocidadeVento']]
df_hour = df_hour.resample('H').mean()
df_hour

Unnamed: 0_level_0,TempAr,TempModulo,VelocidadeVento
E3TimeStamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-08-04 10:00:00,,,
2021-08-04 11:00:00,,,
2021-08-04 12:00:00,,,
2021-08-04 13:00:00,,,
2021-08-04 14:00:00,,,
...,...,...,...
2023-08-09 18:00:00,24.383333,22.200000,0.666667
2023-08-09 19:00:00,22.566667,19.750000,0.483333
2023-08-09 20:00:00,21.100000,18.066667,0.400000
2023-08-09 21:00:00,20.800000,18.066667,0.900000


In [8]:
df_generation_hour = pd.concat([df_minutes,df_hour], axis = 1)
df_generation_hour

Unnamed: 0_level_0,RadiacaoGlobal,yB,yC,yD,yE,yF,TempAr,TempModulo,VelocidadeVento
E3TimeStamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2021-08-04 10:00:00,0.0,0.0,0.0,0.0,0.0,0.0,,,
2021-08-04 11:00:00,0.0,0.0,0.0,0.0,0.0,0.0,,,
2021-08-04 12:00:00,0.0,0.0,0.0,0.0,0.0,0.0,,,
2021-08-04 13:00:00,0.0,0.0,0.0,0.0,0.0,0.0,,,
2021-08-04 14:00:00,0.0,0.0,0.0,0.0,0.0,0.0,,,
...,...,...,...,...,...,...,...,...,...
2023-08-09 18:00:00,-110.0,2.1,2.2,2.3,2.2,2.4,24.383333,22.200000,0.666667
2023-08-09 19:00:00,-422.0,0.0,0.0,0.0,0.0,0.0,22.566667,19.750000,0.483333
2023-08-09 20:00:00,-269.0,0.0,0.0,0.0,0.0,0.0,21.100000,18.066667,0.400000
2023-08-09 21:00:00,-255.0,0.0,0.0,0.0,0.0,0.0,20.800000,18.066667,0.900000


# Concat weather e generation data

In [441]:
df_generation_hour = df_generation_hour.reset_index()
df_generation_hour['time_join_generation'] = df_generation_hour['E3TimeStamp'].dt.date.astype(str) +' '+ df_generation_hour['E3TimeStamp'].dt.hour.astype(str)
df_weather['time_in_iso8601'] = pd.to_datetime(df_weather['time_in_iso8601'], format='ISO8601')
df_weather['time_join_generation'] = df_weather['time_in_iso8601'].dt.date.astype(str) +' '+ df_weather['time_in_iso8601'].dt.hour.astype(str)

In [444]:
df = df_generation_hour.merge(df_weather,how='left',on = 'time_join_generation')

In [452]:
df

Unnamed: 0_level_0,RadiacaoGlobal,yB,yC,yD,yE,yF,TempAr,TempModulo,VelocidadeVento,time_join_generation,...,apparent_temperature_mean_in_°C,precipitation_sum_in_mm,rain_sum_in_mm,precipitation_hours_in_h,windspeed_10m_max_in_km/h,windgusts_10m_max_in_km/h,winddirection_10m_dominant_in_°,shortwave_radiation_sum_in_MJ/m²,et0_fao_evapotranspiration_in_mm,time_of_sunlight_in_iso8601
E3TimeStamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-08-04 10:00:00,0.0,0.0,0.0,0.0,0.0,0.0,,,,2021-08-04 10,...,14.6,0.0,0.0,0.0,15.3,34.2,103.0,17.52,3.93,11.133333
2021-08-04 11:00:00,0.0,0.0,0.0,0.0,0.0,0.0,,,,2021-08-04 11,...,14.6,0.0,0.0,0.0,15.3,34.2,103.0,17.52,3.93,11.133333
2021-08-04 12:00:00,0.0,0.0,0.0,0.0,0.0,0.0,,,,2021-08-04 12,...,14.6,0.0,0.0,0.0,15.3,34.2,103.0,17.52,3.93,11.133333
2021-08-04 13:00:00,0.0,0.0,0.0,0.0,0.0,0.0,,,,2021-08-04 13,...,14.6,0.0,0.0,0.0,15.3,34.2,103.0,17.52,3.93,11.133333
2021-08-04 14:00:00,0.0,0.0,0.0,0.0,0.0,0.0,,,,2021-08-04 14,...,14.6,0.0,0.0,0.0,15.3,34.2,103.0,17.52,3.93,11.133333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08-09 18:00:00,-110.0,2.1,2.2,2.3,2.2,2.4,24.383333,22.200000,0.666667,2023-08-09 18,...,20.9,1.8,1.8,5.0,20.4,44.6,77.0,14.64,3.50,11.216667
2023-08-09 19:00:00,-422.0,0.0,0.0,0.0,0.0,0.0,22.566667,19.750000,0.483333,2023-08-09 19,...,20.9,1.8,1.8,5.0,20.4,44.6,77.0,14.64,3.50,11.216667
2023-08-09 20:00:00,-269.0,0.0,0.0,0.0,0.0,0.0,21.100000,18.066667,0.400000,2023-08-09 20,...,20.9,1.8,1.8,5.0,20.4,44.6,77.0,14.64,3.50,11.216667
2023-08-09 21:00:00,-255.0,0.0,0.0,0.0,0.0,0.0,20.800000,18.066667,0.900000,2023-08-09 21,...,20.9,1.8,1.8,5.0,20.4,44.6,77.0,14.64,3.50,11.216667


# EDA

In [446]:
zeros_and_nulls = pd.concat([round((df == 0).sum()/len(df)*100,2),round(df.isna().sum()/len(df)*100,2)], axis = 1)
zeros_and_nulls.rename(columns={0: '% Zeros', 1: '% Nulls'}).sort_values(by = '% Nulls', ascending=False).head(30)

Unnamed: 0,% Zeros,% Nulls
TempAr,0.63,10.48
TempModulo,0.63,10.48
VelocidadeVento,2.04,10.48
E3TimeStamp,0.0,0.0
apparent_temperature_min_in_°C,0.0,0.0
direct_radiation_instant_in_W/m²,52.19,0.0
diffuse_radiation_instant_in_W/m²,50.48,0.0
direct_normal_irradiance_instant_in_W/m²,52.19,0.0
terrestrial_radiation_instant_in_W/m²,49.99,0.0
weathercode_in_wmo code_y,16.99,0.0


In [459]:
try:
  df = df.set_index('E3TimeStamp')
except:
  pass
try:
  del df['time_join_generation']
except:
  pass
try:
  del df['time_in_iso8601']
except:
  pass


df_descricao = pd.DataFrame()

df_descricao['No_None_Values_Count'] = round(df.apply(lambda x: x.count()),2)
df_descricao['None_Values_Count'] = round(df.apply(lambda x: x.isnull().sum()),2)
df_descricao['Zeroes_Values_Count'] = round(df.apply(lambda x: (x == 0).sum()),2)
df_descricao['Min_Values'] = round(df.apply(lambda x: x.min()),2)
df_descricao['Max_Values'] = round(df.apply(lambda x: x.max()),2)
df_descricao['Mean_Values'] = round(df.apply(lambda x: x.mean()),2)
df_descricao['Std_Values'] = round(df.apply(lambda x: x.std()),2)
df_descricao['Sum of Values'] = round(df.apply(lambda x: x.sum()),0)
df_descricao['Min_Date'] = df.index.min()
df_descricao['Max_Date'] = df.index.max()
df_descricao

Unnamed: 0,No_None_Values_Count,None_Values_Count,Zeroes_Values_Count,Min_Values,Max_Values,Mean_Values,Std_Values,Sum of Values,Min_Date,Max_Date
RadiacaoGlobal,17653,0,1982,-803.0,77479.0,11868.56,18198.24,209515760.0,2021-08-04 10:00:00,2023-08-09 22:00:00
yB,17653,0,10348,0.0,327501.1,279.95,3978.32,4942011.0,2021-08-04 10:00:00,2023-08-09 22:00:00
yC,17653,0,10435,0.0,331003.1,276.7,3986.86,4884528.0,2021-08-04 10:00:00,2023-08-09 22:00:00
yD,17653,0,11160,0.0,334239.9,250.39,3930.82,4420158.0,2021-08-04 10:00:00,2023-08-09 22:00:00
yE,17653,0,11102,0.0,15313.3,177.08,377.63,3125925.0,2021-08-04 10:00:00,2023-08-09 22:00:00
yF,17653,0,11096,0.0,12715.4,176.86,361.91,3122096.0,2021-08-04 10:00:00,2023-08-09 22:00:00
TempAr,15803,1850,111,0.0,179.67,23.78,6.79,375869.0,2021-08-04 10:00:00,2023-08-09 22:00:00
TempModulo,15803,1850,111,-10.0,182.48,26.59,15.58,420125.0,2021-08-04 10:00:00,2023-08-09 22:00:00
VelocidadeVento,15803,1850,360,-10.0,9.62,0.79,3.8,12492.0,2021-08-04 10:00:00,2023-08-09 22:00:00
temperature_2m_in_°C,17653,0,0,6.3,38.6,23.35,4.79,412231.0,2021-08-04 10:00:00,2023-08-09 22:00:00


In [477]:
df_generation[(pd.to_datetime(df_generation.E3TimeStamp, format = 'mixed') >= '2021-12-09') & (pd.to_datetime(df_generation.E3TimeStamp, format = 'mixed') <= '2021-12-09 18:00:00')]

Unnamed: 0,E3TimeStamp,PotenciaAtivaTotal,EnergiaTotal_DataLogger,EnergiaDiaria_DataLogger,RadiacaoGlobal,TempAr,TempModulo,VelocidadeVento,EnergiaTotal_LoteB,EnergiaDiaria_LoteB,...,EnergiaReativa_LoteC,EnergiaTotal_LoteD,EnergiaDiaria_LoteD,EnergiaReativa_LoteD,EnergiaTotal_LoteE,EnergiaDiaria_LoteE,EnergiaReativa_LoteE,EnergiaTotal_LoteF,EnergiaDiaria_LoteF,EnergiaReativa_LoteF
63416,2021-12-09 09:34:42.630000000,2883,1992737,5207,901.0,25.0,48.0,2.0,949063.2,1756.0,...,8.282,206490.6,1725.4,9.346,,,,,,
63417,2021-12-09 09:35:42.653000000,2889,1992786,5257,903.0,26.0,49.0,3.0,949079.8,1772.3,...,10.492,206506.9,1741.5,11.178,,,,,,
63418,2021-12-09 09:36:42.640000000,2897,1992835,5304,906.0,26.0,49.0,2.0,949095.8,1788.4,...,-3.314,206523.0,1757.5,-4.719,,,,,,
63419,2021-12-09 09:37:42.650000000,2904,1992883,5353,915.0,26.0,50.0,3.0,949111.6,1803.8,...,5.247,206539.1,1773.8,5.573,,,,,,
63420,2021-12-09 09:38:42.660000000,2914,1992930,5401,917.0,26.0,50.0,4.0,949126.8,1820.0,...,8.82,206555.2,1790.0,9.826,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
192933,2021-12-09 17:55:24.557000000,436,2017072,28567,186.0,31.0,32.0,1.0,953885.3,6577.9,...,2.931,211501.3,6735.9,3.587,7796.2,6820.7,3.184,1766.6,1766.6,3.094
192934,2021-12-09 17:56:24.563000000,420,2017079,28574,182.0,31.0,31.0,1.0,953886.6,6579.2,...,3.151,211502.9,6737.5,3.804,7797.6,6822.1,3.462,1767.8,1767.8,3.319
192935,2021-12-09 17:57:24.560000000,405,2017085,28580,178.0,31.0,31.0,1.0,953887.8,6580.4,...,2.919,211504.4,6739.0,3.588,7799.0,6823.5,3.199,1769.1,1769.1,3.12
192936,2021-12-09 17:58:24.563000000,390,2017092,28587,173.0,31.0,31.0,2.0,953889.0,6581.6,...,2.882,211505.8,6740.4,3.559,7800.5,6825.0,3.168,1770.3,1770.3,3.064


In [476]:
pd.to_datetime(df_generation.E3TimeStamp, format = 'mixed')

0        2021-08-17 07:42:20.090
1        2021-08-17 07:43:20.043
2        2021-08-17 07:44:20.037
3        2021-08-17 07:45:20.127
4        2021-08-17 07:46:20.027
                   ...          
949868   2023-08-09 22:16:10.357
949869   2023-08-09 22:17:10.407
949870   2023-08-09 22:18:10.380
949871   2023-08-09 22:19:10.373
949872   2023-08-09 22:20:10.317
Name: E3TimeStamp, Length: 949873, dtype: datetime64[ns]