In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import Imputer


In [2]:
funceme_df = pd.read_csv("scraping/funceme_media_macrorregiao.csv", index_col=0 ,parse_dates=['datahora'])
media_observado_a_substituir = funceme_df.loc['1973-08-01 12:00:00']['Observado(mm)'].mean()
media_desvio_a_substituir = funceme_df.loc['1973-08-01 12:00:00']['Desvio(%)'].mean()

#Trata missing number
funceme_df['Normal(mm)'].fillna(media_observado_a_substituir, inplace=True)
funceme_df['Observado(mm)'].fillna(media_observado_a_substituir, inplace=True)
funceme_df['Desvio(%)'].fillna(media_desvio_a_substituir, inplace=True)

#Obtém média para cada coluna dos dados pluviométricos
medias = []
observado = []
desvio = []
for indices_unicos in funceme_df.index.unique():
    medias.append(funceme_df.loc[indices_unicos]['Normal(mm)'].mean())
    observado.append(funceme_df.loc[indices_unicos]['Observado(mm)'].mean())
    desvio.append(funceme_df.loc[indices_unicos]['Desvio(%)'].mean())


In [3]:
funceme_df.head(20)

Unnamed: 0_level_0,Macrorregioes,Normal(mm),Observado(mm),Desvio(%)
datahora,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1973-01-01 12:00:00,Cariri,148.3,103.8,-30.0
1973-01-01 12:00:00,Ibiapaba,108.5,176.7,62.9
1973-01-01 12:00:00,Jaguaribana,83.6,95.0,13.5
1973-01-01 12:00:00,Litoral De Fortaleza,97.2,112.7,15.9
1973-01-01 12:00:00,Litoral De Pecem,88.1,128.9,46.3
1973-01-01 12:00:00,Litoral Norte,111.0,203.0,82.9
1973-01-01 12:00:00,Macico De Baturite,95.9,93.1,-2.9
1973-01-01 12:00:00,Sertao Central E Inhamuns,88.1,73.1,-17.0
1973-02-01 12:00:00,Cariri,166.8,94.7,-43.2
1973-02-01 12:00:00,Ibiapaba,145.7,162.6,11.6


In [4]:
#Cria novo Pandas Dataframe
funceme_media_estadual_df = pd.DataFrame(index=funceme_df.index.unique().tolist())

#Adiciona dados mensais do estado ao Dataframe criado
funceme_media_estadual_df = pd.concat(
    [funceme_media_estadual_df, 
        pd.DataFrame(data=medias, index=funceme_media_estadual_df.index, columns= ['Normal(mm)']),
        pd.DataFrame(data=observado, index=funceme_media_estadual_df.index, columns= ['Observado(mm)']),
        pd.DataFrame(data=desvio, index=funceme_media_estadual_df.index, columns= ['Desvio(%)'])],
    axis=1, join_axes=[funceme_media_estadual_df.index])
funceme_media_estadual_df.head()


Unnamed: 0,Normal(mm),Observado(mm),Desvio(%)
1973-01-01 12:00:00,102.5875,123.2875,21.45
1973-02-01 12:00:00,136.9375,160.375,16.6125
1973-03-01 12:00:00,223.3125,261.5875,15.7
1973-04-01 12:00:00,204.15,350.5875,73.3
1973-05-01 12:00:00,105.525,165.175,53.5


## Set values to class

In [27]:
rainy_seasonal_months = [2,3,4]

indexes = []
rows = []

for index,row in funceme_media_estadual_df.iterrows():
    if index.month not in rainy_seasonal_months:
        continue
    indexes.append(index)
    rows.append(row)

In [37]:
filtrado_df = pd.DataFrame(index=indexes, columns=funceme_media_estadual_df.columns,data=rows)
filtrado_df.head(10)


Unnamed: 0,Normal(mm),Observado(mm),Desvio(%)
1973-02-01 12:00:00,136.9375,160.375,16.6125
1973-03-01 12:00:00,223.3125,261.5875,15.7
1973-04-01 12:00:00,204.15,350.5875,73.3
1974-02-01 12:00:00,127.9375,204.0625,58.5375
1974-03-01 12:00:00,220.5125,359.1125,63.075
1974-04-01 12:00:00,208.05,447.8,116.3875
1975-02-01 12:00:00,127.9375,146.4875,14.275
1975-03-01 12:00:00,220.5125,273.3,24.85
1975-04-01 12:00:00,208.05,195.0625,-6.6375
1976-02-01 12:00:00,127.9375,206.3125,61.525


In [44]:
filtrado_df = filtrado_df.groupby(filtrado_df.index.year).mean()


In [50]:
# filtrado_df

In [74]:
strong = 'strong'
normal = 'normal'
weak = 'weak'

classes = []

for index, row in filtrado_df.iterrows():
#     print(index)
    if row['Observado(mm)'] < 50:
        classes.append(weak)
        continue
    
    if row['Observado(mm)'] < 200:
        classes.append(normal)
        continue
        
    classes.append(strong)
filtrado_df['classes'] = classes
filtrado_df.head(100)

Unnamed: 0,Normal(mm),Observado(mm),Desvio(%),classes
1973,188.133333,257.516667,35.204167,strong
1974,185.5,336.991667,79.333333,strong
1975,185.5,204.95,10.829167,strong
1976,185.5,193.8375,11.3875,normal
1977,185.5,195.433333,8.241667,normal
1978,185.5,158.775,-10.791667,normal
1979,185.5,115.241667,-35.941667,normal
1980,185.5,171.520833,11.9625,normal
1981,185.5,153.483333,-23.516667,normal
1982,185.5,149.208333,-18.4,normal
