# Codigo relativo à disciplina de projetos de engenharia II

- O codigo tem por objetivo criar um modelo de séries temporais meteorologicas para a cidade de Belém. Para isto, foi utilizado a linguagem de progração Python e os registros presentes no site do Instituto Nacional de Meteorologia.

In [None]:
# Celula relativa à importação de bibliotecas

import pandas as pd
import numpy as np
import requests
import json
from calendar import monthrange
import numpy as np
from itertools import product
from datetime import date
from itertools import chain
import matplotlib.pyplot as plt
plt.style.use('ggplot')
import seaborn as sns

## Codigo relativo à extração dos dados


In [None]:
def allDays(ano, mes):
  '''Retorna uma lista com todos os dias de um determinado mes e ano'''
  
  return ['{:04d}-{:02d}-{:02d}'.format(ano, mes, dia) for dia in range(1, monthrange(ano, mes)[1] + 1)]

In [None]:
# Estrutura que retornará uma lista com todos os dias entre 1995-01 e 2021-12b

anos = [ano for ano in range(2021, 2023, 1)]

datas_inicio = sorted([ f'{ano}-01-01' for ano in anos ])
datas_fim = sorted([ f'{ano}-12-31' for ano in anos ])

In [None]:
def getData(string):
  ''' Função que recebe uma String proveniente do site do INMET e retorna somente os dados relativos à cidade de Belem'''
  resultado = json.loads(string.text)

  resultado = pd.DataFrame.from_dict(resultado)
  
  return resultado

def getInfosAboutClimate(data1, data2):
  '''Função que faz uma chamada de API ao site do INMET e retorna o resultado relativo à cidade de Belem'''

  try:
    print(data1, data2)

    link = 'https://apitempo.inmet.gov.br/estacao/{}/{}/A201'.format(data1, data2)

    res = requests.get(link)

    res = getData(res)

    return res
  except:
    pass



In [None]:
# Montando Um drive para salvar o arquivo em csv

from google.colab import drive
drive.mount('drive')

Drive already mounted at drive; to attempt to forcibly remount, call drive.mount("drive", force_remount=True).


In [None]:
dados = [getInfosAboutClimate(data1, data2) for data1, data2 in zip(datas_inicio, datas_fim)]

2021-01-01 2021-12-31
2022-01-01 2022-12-31


In [None]:
dados = pd.concat(dados)

In [None]:
# Salvando os dados no Drive anteriormente montado

dados.to_csv('data.csv', index = False)
!cp data.csv "drive/My Drive/"

In [None]:
dados

Unnamed: 0,DC_NOME,PRE_INS,TEM_SEN,VL_LATITUDE,PRE_MAX,UF,RAD_GLO,PTO_INS,TEM_MIN,VL_LONGITUDE,UMD_MIN,PTO_MAX,VEN_DIR,DT_MEDICAO,CHUVA,PRE_MIN,UMD_MAX,VEN_VEL,PTO_MIN,TEM_MAX,VEN_RAJ,TEM_INS,UMD_INS,CD_ESTACAO,HR_MEDICAO
0,BELÉM,1009.4,28,-1.411228,1009.4,PA,-0.275,23.3,25,-48.439512,83,23.4,111,2021-01-01,0,1008.6,90,0,22.6,25.7,2.9,25,90,A201,0000
1,BELÉM,1009.8,27.7,-1.411228,1009.8,PA,-0.472,23,24.8,-48.439512,88,23.3,130,2021-01-01,0,1009.4,90,0.2,22.9,25,1.6,24.9,89,A201,0100
2,BELÉM,1010.8,27.8,-1.411228,1010.8,PA,-1.094,23.4,24.6,-48.439512,89,23.4,138,2021-01-01,0,1009.8,92,0,23,24.9,1.9,24.7,92,A201,0200
3,BELÉM,1010.9,27.8,-1.411228,1011.2,PA,1.783,23.2,24.6,-48.439512,89,23.5,160,2021-01-01,0,1010.8,93,0.2,22.9,24.9,1.8,24.9,90,A201,0300
4,BELÉM,,,-1.411228,,PA,,,,-48.439512,,,,2021-01-01,,,,,,,,,,A201,0400
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8755,BELÉM,,,-1.411228,,PA,,,,-48.439512,,,,2022-12-31,,,,,,,,,,A201,1900
8756,BELÉM,,,-1.411228,,PA,,,,-48.439512,,,,2022-12-31,,,,,,,,,,A201,2000
8757,BELÉM,,,-1.411228,,PA,,,,-48.439512,,,,2022-12-31,,,,,,,,,,A201,2100
8758,BELÉM,,,-1.411228,,PA,,,,-48.439512,,,,2022-12-31,,,,,,,,,,A201,2200


In [None]:
def getFloat(x):
  '''Função que retorna um float caso o registro tenha essa possibilidade, senão, retorna o proprio registro'''
  try:
    return float(x)
  except:
    return x

# Como a API retorna todos os dados numa string, inclusive numeros, faz-se necessario transformar os dados pertinentes em float
dados_float = dados.copy()

for coluna in dados.drop(columns = ['DC_NOME', 'UF', 'DT_MEDICAO', 'CD_ESTACAO', 'HR_MEDICAO']).columns:
  dados_float[coluna] = dados[coluna].apply(lambda x: getFloat(x) if x is not None else None)

## Codigo relativo à analise exploratoria e engenharia de feature

In [None]:
!pip install https://github.com/pandas-profiling/pandas-profiling/archive/master.zip

In [None]:
# Montando e carregando as informações do Drive

from google.colab import drive
drive.mount('/content/drive')

dados_float = pd.read_csv("drive/My Drive/data.csv")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas_profiling

profile = pandas_profiling.ProfileReport(dados_float, title="Report de Dados INMET - Belém (Proj. Eng II)", explorative=True)
profile.to_file("profile.html")
!cp "profile.html" "drive/My Drive/"

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

In [None]:
def info(df):
    '''Funcao que retorna um resumo dos dados, incluindo valores faltantes'''
    df_info = pd.DataFrame({'Colunas': df.columns, 'types': df.dtypes,
                          'NA #': df.isna().sum(),
                          'NA %': (df.isna().sum() / df.shape[0]) * 100})
    df_info = df_info.join(df.describe().T)
    
    return df_info


In [None]:
info(dados_float)

Unnamed: 0,Colunas,types,NA #,NA %,count,mean,std,min,25%,50%,75%,max
DC_NOME,DC_NOME,object,0,0.0,,,,,,,,
PRE_INS,PRE_INS,float64,1510,1.43547,103682.0,1008.833078,1.956324,1001.2,1007.5,1008.8,1010.2,1015.9
TEM_SEN,TEM_SEN,float64,1831,1.740627,103361.0,29.137919,2.381056,8.3,27.3,28.5,31.0,37.6
VL_LATITUDE,VL_LATITUDE,float64,0,0.0,105192.0,-1.411228,3.341787e-13,-1.411228,-1.411228,-1.411228,-1.411228,-1.411228
PRE_MAX,PRE_MAX,float64,1534,1.458286,103658.0,1009.128568,1.927461,1001.4,1007.8,1009.2,1010.5,1016.0
UF,UF,object,0,0.0,,,,,,,,
RAD_GLO,RAD_GLO,float64,3225,3.065822,101967.0,631.639219,930.1913,-3.6,-2.858,31.136,1147.3815,5230.005
PTO_INS,PTO_INS,float64,1585,1.506769,103607.0,22.982381,0.9894104,6.4,22.4,23.0,23.6,27.9
TEM_MIN,TEM_MIN,float64,1560,1.483003,103632.0,26.284903,2.570285,18.9,24.2,25.4,28.2,35.6
VL_LONGITUDE,VL_LONGITUDE,float64,0,0.0,105192.0,-48.439512,4.153142e-11,-48.439512,-48.439512,-48.439512,-48.439512,-48.439512


In [None]:
targets = ['TEM_INS','UMD_INS','VEN_VEL']

In [None]:
!pip uninstall -y scipy

Found existing installation: scipy 1.7.3
Uninstalling scipy-1.7.3:
  Successfully uninstalled scipy-1.7.3


In [None]:
!pip --no-cache-dir install scipy

Collecting scipy
  Downloading scipy-1.7.3-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (38.1 MB)
[K     |████████████████████████████████| 38.1 MB 1.4 MB/s 
Installing collected packages: scipy
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
albumentations 0.1.12 requires imgaug<0.2.7,>=0.2.5, but you have imgaug 0.2.9 which is incompatible.[0m
Successfully installed scipy-1.7.3


In [None]:
dados_float['DT_HR'] = dados_float.apply(lambda x: x['DT_MEDICAO'] + ' - ' + str(x['HR_MEDICAO']).zfill(4) , axis = 1)

In [None]:
from datetime import datetime

dados_float['DT_MEDICAO'] = dados_float['DT_MEDICAO'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d'))

In [None]:

dados_float_auto_arima = dados_float[dados_float['DT_MEDICAO'].dt.date < datetime.today().date() ].reset_index(drop=True)

dados_float_auto_arima = dados_float_auto_arima[dados_float_auto_arima['DT_MEDICAO'].dt.date >= datetime.strptime('2021-12-01', '%Y-%m-%d').date() ].reset_index(drop=True)

dados_float_auto_arima[targets] = dados_float_auto_arima[targets].apply(lambda x: x.fillna(x.mean()),axis=0)

In [None]:
dados_float_auto_arima

Unnamed: 0,DC_NOME,PRE_INS,TEM_SEN,VL_LATITUDE,PRE_MAX,UF,RAD_GLO,PTO_INS,TEM_MIN,VL_LONGITUDE,UMD_MIN,PTO_MAX,VEN_DIR,DT_MEDICAO,CHUVA,PRE_MIN,UMD_MAX,VEN_VEL,PTO_MIN,TEM_MAX,VEN_RAJ,TEM_INS,UMD_INS,CD_ESTACAO,HR_MEDICAO,DT_HR
0,BELÉM,1006.8,29.9,-1.411228,1006.8,PA,-1.058,23.9,26.7,-48.439512,78.0,23.9,46.0,2021-12-01,0.0,1006.2,83.0,0.3,22.9,27.1,3.9,27.0,83.0,A201,0,2021-12-01 - 0000
1,BELÉM,1006.9,30.1,-1.411228,1007.1,PA,-2.266,24.3,27.0,-48.439512,82.0,24.3,33.0,2021-12-01,0.0,1006.8,85.0,0.6,23.8,27.1,4.9,27.0,85.0,A201,100,2021-12-01 - 0100
2,BELÉM,1007.2,30.3,-1.411228,1007.3,PA,-2.067,24.6,26.9,-48.439512,85.0,24.6,53.0,2021-12-01,0.0,1006.9,87.0,0.5,24.3,27.1,4.5,27.0,87.0,A201,200,2021-12-01 - 0200
3,BELÉM,1007.0,29.9,-1.411228,1007.2,PA,-1.991,24.5,26.6,-48.439512,86.0,24.6,51.0,2021-12-01,0.0,1007.0,88.0,0.3,24.4,27.0,4.5,26.6,88.0,A201,300,2021-12-01 - 0300
4,BELÉM,1006.4,29.9,-1.411228,1007.0,PA,-1.754,24.5,26.4,-48.439512,88.0,24.5,46.0,2021-12-01,0.0,1006.4,89.0,0.1,24.4,26.6,3.2,26.6,88.0,A201,400,2021-12-01 - 0400
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1147,BELÉM,1006.4,31.9,-1.411228,1008.1,PA,1234.744,25.6,24.4,-48.439512,86.0,26.7,43.0,2022-01-17,10.0,1006.3,98.0,0.0,23.5,28.3,0.0,28.2,86.0,A201,1900,2022-01-17 - 1900
1148,BELÉM,1006.5,32.7,-1.411228,1006.7,PA,1342.897,25.3,27.9,-48.439512,75.0,26.6,30.0,2022-01-17,0.0,1006.3,89.0,0.0,24.6,30.5,0.0,29.4,79.0,A201,2000,2022-01-17 - 2000
1149,BELÉM,1007.1,30.1,-1.411228,1007.1,PA,178.093,24.1,27.1,-48.439512,76.0,25.5,22.0,2022-01-17,0.0,1006.4,85.0,0.0,24.1,29.5,0.0,27.1,84.0,A201,2100,2022-01-17 - 2100
1150,BELÉM,1007.7,29.0,-1.411228,1007.9,PA,14.450,24.2,25.6,-48.439512,84.0,24.7,32.0,2022-01-17,0.6,1007.1,93.0,0.0,24.1,27.1,0.0,25.6,92.0,A201,2200,2022-01-17 - 2200


In [None]:
!pip install pmdarima

In [None]:
import pmdarima as pm

results = []

for target in targets:

  model = pm.auto_arima(dados_float_auto_arima.loc[: ,target], 
                      start_p=1, d = 0, start_q=0,
                      test='adf',
                      max_p=2, max_d = 1, max_q=1, 
                      D = 1, start_Q = 0, max_P = 2, max_D = 1, max_Q = 0,
                      m=24,
                      seasonal=True, trace=True,
                      error_action='warn',  
                      suppress_warnings=True, 
                      stepwise=True, random_state = 1234)

  pred = model.predict(n_periods = 18*24)

  results.append(pred)

  print('\n')

Performing stepwise search to minimize aic
 ARIMA(1,0,0)(1,1,0)[24] intercept   : AIC=3693.047, Time=7.00 sec
 ARIMA(0,0,0)(0,1,0)[24] intercept   : AIC=4614.260, Time=0.25 sec
 ARIMA(0,0,1)(0,1,0)[24] intercept   : AIC=4145.192, Time=1.48 sec
 ARIMA(0,0,0)(0,1,0)[24]             : AIC=4613.160, Time=0.18 sec
 ARIMA(1,0,0)(0,1,0)[24] intercept   : AIC=3956.634, Time=0.75 sec
 ARIMA(1,0,0)(2,1,0)[24] intercept   : AIC=3572.911, Time=21.12 sec
 ARIMA(0,0,0)(2,1,0)[24] intercept   : AIC=4182.062, Time=15.16 sec
 ARIMA(2,0,0)(2,1,0)[24] intercept   : AIC=3573.351, Time=25.50 sec
 ARIMA(1,0,1)(2,1,0)[24] intercept   : AIC=3572.926, Time=24.75 sec
 ARIMA(0,0,1)(2,1,0)[24] intercept   : AIC=3748.350, Time=16.93 sec
 ARIMA(2,0,1)(2,1,0)[24] intercept   : AIC=3571.911, Time=67.93 sec
 ARIMA(2,0,1)(1,1,0)[24] intercept   : AIC=inf, Time=40.95 sec
 ARIMA(2,0,1)(2,1,0)[24]             : AIC=3570.628, Time=18.37 sec
 ARIMA(2,0,1)(1,1,0)[24]             : AIC=inf, Time=18.07 sec
 ARIMA(1,0,1)(2,1,0)

In [None]:
def allDays(ano, mes):
  '''Retorna uma lista com todos os dias de um determinado mes e ano'''
  
  return ['{:04d}-{:02d}-{:02d}'.format(ano, mes, dia) for dia in range(1, monthrange(ano, mes)[1] + 1)]

In [None]:
anos = [2022]
meses = [1, 2]
horas = [str(hora).zfill(4) for hora in range(0, 2400, 100)]

datas = sorted([ allDays(int(ano), int(mes)) for mes, ano in product(meses, anos) ])
datas = sum(datas, [])

datas = sorted([ f'{data} - {hora}' for data, hora in product(datas, horas)])

datas = datas[17*24: 17*24 + 18*24]

In [None]:
pred_date = pd.DataFrame({'DT_HR': datas, 'TEM_INS': results[0], 'UMD_INS': results[1],'VEN_VEL': results[2] })

In [None]:
pred_date[-24:].to_csv('predics_lmss.csv', index = False)