In [1]:
### General ###

import numpy as np
import pandas as pd
import calendar

#### Visualization Tools ###

# Base of sns
import matplotlib.pyplot as plt

# Born on top of matplotlib, but more attractive
import seaborn as sns

# For interactive visualization, not good for too many datapoints
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objs as go

In [2]:
df = pd.read_csv('./dataset/viento_dataset.csv', delimiter=';')

df

Unnamed: 0,codigoNacional,idEquipo,momento,ddInst,ffInst,dd02Minutos,ff02Minutos,dd10Minutos,ff10Minutos
0,360011,0,2019-02-05 18:27:00,218.0,9.7,218.0,4.2,212.0,7.8
1,360011,0,2019-02-05 18:28:00,208.0,8.0,212.0,4.5,213.0,7.7
2,360011,0,2019-02-05 18:29:00,230.0,6.2,204.0,3.8,214.0,7.2
3,360011,0,2019-02-05 18:30:00,213.0,6.8,218.0,3.2,219.0,7.2
4,360011,0,2019-02-05 18:31:00,232.0,9.3,223.0,4.0,221.0,7.5
...,...,...,...,...,...,...,...,...,...
2150608,360011,0,2023-07-31 23:56:00,28.0,4.7,34.0,2.3,44.0,3.2
2150609,360011,0,2023-07-31 23:57:00,28.0,4.9,28.0,2.5,41.0,3.4
2150610,360011,0,2023-07-31 23:58:00,34.0,5.2,28.0,2.5,39.0,3.6
2150611,360011,0,2023-07-31 23:59:00,32.0,5.4,30.0,2.6,37.0,3.8


In [3]:
null_cols = df.columns[df.isnull().sum()>0].to_list()
print(null_cols)

df = df.drop(columns=null_cols)

df.info()

df

[]
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2150613 entries, 0 to 2150612
Data columns (total 9 columns):
 #   Column          Dtype  
---  ------          -----  
 0   codigoNacional  int64  
 1   idEquipo        int64  
 2   momento         object 
 3   ddInst          float64
 4   ffInst          float64
 5   dd02Minutos     float64
 6   ff02Minutos     float64
 7   dd10Minutos     float64
 8   ff10Minutos     float64
dtypes: float64(6), int64(2), object(1)
memory usage: 147.7+ MB


Unnamed: 0,codigoNacional,idEquipo,momento,ddInst,ffInst,dd02Minutos,ff02Minutos,dd10Minutos,ff10Minutos
0,360011,0,2019-02-05 18:27:00,218.0,9.7,218.0,4.2,212.0,7.8
1,360011,0,2019-02-05 18:28:00,208.0,8.0,212.0,4.5,213.0,7.7
2,360011,0,2019-02-05 18:29:00,230.0,6.2,204.0,3.8,214.0,7.2
3,360011,0,2019-02-05 18:30:00,213.0,6.8,218.0,3.2,219.0,7.2
4,360011,0,2019-02-05 18:31:00,232.0,9.3,223.0,4.0,221.0,7.5
...,...,...,...,...,...,...,...,...,...
2150608,360011,0,2023-07-31 23:56:00,28.0,4.7,34.0,2.3,44.0,3.2
2150609,360011,0,2023-07-31 23:57:00,28.0,4.9,28.0,2.5,41.0,3.4
2150610,360011,0,2023-07-31 23:58:00,34.0,5.2,28.0,2.5,39.0,3.6
2150611,360011,0,2023-07-31 23:59:00,32.0,5.4,30.0,2.6,37.0,3.8


In [4]:
df['momento'] = pd.to_datetime(df['momento'])

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2150613 entries, 0 to 2150612
Data columns (total 9 columns):
 #   Column          Dtype         
---  ------          -----         
 0   codigoNacional  int64         
 1   idEquipo        int64         
 2   momento         datetime64[ns]
 3   ddInst          float64       
 4   ffInst          float64       
 5   dd02Minutos     float64       
 6   ff02Minutos     float64       
 7   dd10Minutos     float64       
 8   ff10Minutos     float64       
dtypes: datetime64[ns](1), float64(6), int64(2)
memory usage: 147.7 MB


- ddInst: 0-360 direction of the wind.
- ffinst: 0-31.5 intensity of the wind in knots.
- momento: 
- *02Minutos, *10minutos: mean in a window of time. Max and Min.

In [7]:
a = df.copy()
#data = a.groupby(['momento']).ffInst.mean().reset_index()
data = a.set_index("momento").resample("W").ffInst.mean().reset_index()
px.line(data, x='momento', y='ffInst')