In [None]:
# install dependencies
! pip install ydata-profiling   # its necessary to restart the kernel

In [3]:
import pandas as pd
import datetime

# read raw_df
df = pd.read_csv('https://raw.githubusercontent.com/srlucasromulo/PPGCC-DS-DeepLearning/main/data/sjdr_raw.csv', low_memory=False, sep=',')

# rectify date/hour columns
df.data = df.data.apply(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d').date())
df.hora = df.hora.apply(lambda x: datetime.time(int(x/100)))

# separate day, month, year
df['dia'] = df.data.apply(lambda x: x.day)
df['mes'] = df.data.apply(lambda x: x.month)
df['ano'] = df.data.apply(lambda x: x.year)

# join date/hour
df['data_hora'] = df.apply(lambda x: datetime.datetime.combine(x['data'], x['hora']), axis=1)
df.sort_values(by='data_hora', inplace=True, ignore_index=True)

# create time feature (represents time passing) from index
df['time'] = df.index

# create seasons features
def get_season(d):
	if d < datetime.date(day=21, month=3, year=d.year): return 'verão'
	if datetime.date(day=21, month=3, year=d.year) <= d < datetime.date(day=21, month=6, year=d.year): return 'outono'
	if datetime.date(day=21, month=6, year=d.year) <= d < datetime.date(day=23, month=9, year=d.year): return'inverno'
	if datetime.date(day=23, month=9, year=d.year) <= d < datetime.date(day=21, month=12, year=d.year): return'primavera'
	if datetime.date(day=21, month=12, year=d.year) <= d: return'verão'
df['estacao'] = df.data.apply(lambda x: get_season(x))

# save to file
df.to_csv('./data/sjdr.csv', sep=',', index=False)

In [101]:
# profile dataset
from ydata_profiling import ProfileReport
import pandas as pd

df = pd.read_csv('https://raw.githubusercontent.com/srlucasromulo/PPGCC-DS-DeepLearning/main/data/sjdr.csv', low_memory=False, sep=',')
df = df.drop(columns=['data', 'hora'])

# make profile
profile = ProfileReport(df, title="Projeto Mestrado", explorative=True, tsmode=True, sortby='time')

# save profile
profile.to_file("report_raw.html")