# Codigo relativo à disciplina de projetos de engenharia II

- O codigo tem por objetivo criar um modelo de séries temporais meteorologicas para a cidade de Belém. Para isto, foi utilizado a linguagem de progração Python e os registros presentes no site do Instituto Nacional de Meteorologia.

In [None]:
# Celula relativa à importação de bibliotecas

import pandas as pd
import requests
import json
from calendar import monthrange
import numpy as np
from itertools import product
from datetime import date
from itertools import chain
import matplotlib.pyplot as plt
plt.style.use('ggplot')
import seaborn as sns

## Codigo relativo à extração dos dados


In [None]:
def allDays(ano, mes):
  '''Retorna uma lista com todos os dias de um determinado mes e ano'''
  
  return ['{:04d}-{:02d}-{:02d}'.format(ano, mes, dia) for dia in range(1, monthrange(ano, mes)[1] + 1)]

In [None]:
# Estrutura que retornará uma lista com todos os dias entre 1995-01 e 2021-12b

anos = [ano for ano in range(2000, 2022, 1)]

datas_inicio = sorted([ f'{ano}-01-01' for ano in anos ])
datas_fim = sorted([ f'{ano}-12-31' for ano in anos ])

In [None]:
def getData(string):
  ''' Função que recebe uma String proveniente do site do INMET e retorna somente os dados relativos à cidade de Belem'''
  resultado = json.loads(string.text)

  resultado = pd.DataFrame.from_dict(resultado)
  
  return resultado

def getInfosAboutClimate(data1, data2):
  '''Função que faz uma chamada de API ao site do INMET e retorna o resultado relativo à cidade de Belem'''

  try:
    print(data1, data2)

    link = 'https://apitempo.inmet.gov.br/estacao/{}/{}/A201'.format(data1, data2)

    res = requests.get(link)

    res = getData(res)

    return res
  except:
    pass



In [None]:
# Montando Um drive para salvar o arquivo em csv

from google.colab import drive
drive.mount('drive')

Drive already mounted at drive; to attempt to forcibly remount, call drive.mount("drive", force_remount=True).


In [None]:
dados = [getInfosAboutClimate(data1, data2) for data1, data2 in zip(datas_inicio, datas_fim)]

2000-01-01 2000-12-31
2001-01-01 2001-12-31
2002-01-01 2002-12-31
2003-01-01 2003-12-31
2004-01-01 2004-12-31
2005-01-01 2005-12-31
2006-01-01 2006-12-31
2007-01-01 2007-12-31
2008-01-01 2008-12-31
2009-01-01 2009-12-31
2010-01-01 2010-12-31
2011-01-01 2011-12-31
2012-01-01 2012-12-31
2013-01-01 2013-12-31
2014-01-01 2014-12-31
2015-01-01 2015-12-31
2016-01-01 2016-12-31
2017-01-01 2017-12-31
2018-01-01 2018-12-31
2019-01-01 2019-12-31
2020-01-01 2020-12-31
2021-01-01 2021-12-31


In [None]:
dados = pd.concat(dados)

In [None]:
# Salvando os dados no Drive anteriormente montado

dados.to_csv('data.csv', index = False)
!cp data.csv "drive/My Drive/"

In [None]:
dados

Unnamed: 0,DC_NOME,PRE_INS,TEM_SEN,VL_LATITUDE,PRE_MAX,UF,RAD_GLO,PTO_INS,TEM_MIN,VL_LONGITUDE,UMD_MIN,PTO_MAX,VEN_DIR,DT_MEDICAO,CHUVA,PRE_MIN,UMD_MAX,VEN_VEL,PTO_MIN,TEM_MAX,VEN_RAJ,TEM_INS,UMD_INS,CD_ESTACAO,HR_MEDICAO
0,BELÉM,,,-1.411228,,PA,,,,-48.439512,,,,2000-01-01,,,,,,,,,,A201,0000
1,BELÉM,,,-1.411228,,PA,,,,-48.439512,,,,2000-01-01,,,,,,,,,,A201,0100
2,BELÉM,,,-1.411228,,PA,,,,-48.439512,,,,2000-01-01,,,,,,,,,,A201,0200
3,BELÉM,,,-1.411228,,PA,,,,-48.439512,,,,2000-01-01,,,,,,,,,,A201,0300
4,BELÉM,,,-1.411228,,PA,,,,-48.439512,,,,2000-01-01,,,,,,,,,,A201,0400
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8755,BELÉM,1006.1,29.4,-1.411228,1006.1,PA,331.026,24.8,26.9,-48.439512,78,25.5,328,2021-12-31,0,1005.7,89,1.8,24.2,28.6,6.5,27,88,A201,1900
8756,BELÉM,1006.3,25.2,-1.411228,1006.5,PA,187.427,23.4,24.1,-48.439512,87,25,68,2021-12-31,4.4,1006.1,95,2.5,23.2,27,9.6,24.2,95,A201,2000
8757,BELÉM,1006.7,27.4,-1.411228,1006.7,PA,103.985,23.5,24.1,-48.439512,94,23.5,352,2021-12-31,2.2,1006.1,96,0.4,23.2,24.4,7.5,24.2,95,A201,2100
8758,BELÉM,1007.6,27.6,-1.411228,1007.6,PA,5.826,23.6,24.2,-48.439512,95,23.6,27,2021-12-31,0,1006.7,96,0,23.4,24.3,2.5,24.3,96,A201,2200


In [None]:
def getFloat(x):
  '''Função que retorna um float caso o registro tenha essa possibilidade, senão, retorna o proprio registro'''
  try:
    return float(x)
  except:
    return x

# Como a API retorna todos os dados numa string, inclusive numeros, faz-se necessario transformar os dados pertinentes em float
dados_float = dados.copy()

for coluna in dados.drop(columns = ['DC_NOME', 'UF', 'DT_MEDICAO', 'CD_ESTACAO', 'HR_MEDICAO']).columns:
  dados_float[coluna] = dados[coluna].apply(lambda x: getFloat(x) if x is not None else None)

## Codigo relativo à analise exploratoria e engenharia de feature

In [None]:
!pip install https://github.com/pandas-profiling/pandas-profiling/archive/master.zip

Collecting https://github.com/pandas-profiling/pandas-profiling/archive/master.zip
  Downloading https://github.com/pandas-profiling/pandas-profiling/archive/master.zip
[K     \ 25.9 MB 768 kB/s
Collecting pydantic>=1.8.1
  Downloading pydantic-1.9.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.9 MB)
[K     |████████████████████████████████| 10.9 MB 4.9 MB/s 
[?25hCollecting PyYAML>=5.0.0
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 40.7 MB/s 
Collecting visions[type_image_path]==0.7.4
  Downloading visions-0.7.4-py3-none-any.whl (102 kB)
[K     |████████████████████████████████| 102 kB 11.2 MB/s 
Collecting htmlmin>=0.1.12
  Downloading htmlmin-0.1.12.tar.gz (19 kB)
Collecting phik>=0.11.1
  Downloading phik-0.12.0-cp37-cp37m-manylinux2010_x86_64.whl (675 kB)
[K     |████████████████████████████████| 675 kB 66.5 MB/s 
[?25hColle

In [None]:
# Montando e carregando as informações do Drive

from google.colab import drive
drive.mount('/content/drive')

dados_float = pd.read_csv("drive/My Drive/data.csv")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas_profiling

profile = pandas_profiling.ProfileReport(dados_float, title="Report de Dados INMET - Belém (Proj. Eng II)", explorative=True)
profile.to_file("profile.html")
!cp "profile.html" "drive/My Drive/"

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

In [None]:
def info(df):
    '''Funcao que retorna um resumo dos dados, incluindo valores faltantes'''
    df_info = pd.DataFrame({'Colunas': df.columns, 'types': df.dtypes,
                          'NA #': df.isna().sum(),
                          'NA %': (df.isna().sum() / df.shape[0]) * 100})
    df_info = df_info.join(df.describe().T)
    
    return df_info


In [None]:
info(dados_float)

Unnamed: 0,Colunas,types,NA #,NA %,count,mean,std,min,25%,50%,75%,max
DC_NOME,DC_NOME,object,0,0.0,,,,,,,,
PRE_INS,PRE_INS,float64,44389,23.0157,148475.0,1008.775427,1.963659,952.2,1007.4,1008.8,1010.1,1016.1
TEM_SEN,TEM_SEN,float64,44782,23.219471,148082.0,29.076009,2.412646,8.3,27.2,28.5,30.9,66.8
VL_LATITUDE,VL_LATITUDE,float64,0,0.0,192864.0,-1.411228,5.163661e-12,-1.411228,-1.411228,-1.411228,-1.411228,-1.411228
PRE_MAX,PRE_MAX,float64,44450,23.047329,148414.0,1009.082562,1.94583,1001.4,1007.8,1009.1,1010.4,1049.5
UF,UF,object,0,0.0,,,,,,,,
RAD_GLO,RAD_GLO,float64,48175,24.978741,144689.0,2149.945933,5875.049,-43.0,-3.2,48.966,1652.167,43208.0
PTO_INS,PTO_INS,float64,44456,23.05044,148408.0,23.013074,1.065826,6.4,22.4,23.0,23.6,43.5
TEM_MIN,TEM_MIN,float64,44460,23.052514,148404.0,26.281883,2.584555,0.0,24.2,25.4,28.2,45.0
VL_LONGITUDE,VL_LONGITUDE,float64,0,0.0,192864.0,-48.439512,1.181422e-10,-48.439512,-48.439512,-48.439512,-48.439512,-48.439512


In [None]:
targets = ['TEM_INS','UMD_INS','VEN_VEL']

In [None]:
!pip uninstall -y scipy

Found existing installation: scipy 1.4.1
Uninstalling scipy-1.4.1:
  Successfully uninstalled scipy-1.4.1


In [None]:
!pip --no-cache-dir install scipy

Collecting scipy
  Downloading scipy-1.7.3-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (38.1 MB)
[K     |████████████████████████████████| 38.1 MB 1.2 MB/s 
Installing collected packages: scipy
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
albumentations 0.1.12 requires imgaug<0.2.7,>=0.2.5, but you have imgaug 0.2.9 which is incompatible.[0m
Successfully installed scipy-1.7.3


In [None]:
dados_float['DT_HR'] = dados_float.apply(lambda x: x['DT_MEDICAO'] + ' - ' + str(x['HR_MEDICAO']).zfill(4) , axis = 1)

In [None]:

results = []

for target in targets:
  from statsmodels.tsa.statespace.sarimax import SARIMAX

  model = SARIMAX(dados_float[target], order=(2,2,1))
  model_fit = model.fit()

  pred = model_fit.get_forecast(steps = 59*24)

  pred_ci = pred.conf_int().mean(axis = 1)

  results.append(pred_ci)

In [None]:
def allDays(ano, mes):
  '''Retorna uma lista com todos os dias de um determinado mes e ano'''
  
  return ['{:04d}-{:02d}-{:02d}'.format(ano, mes, dia) for dia in range(1, monthrange(ano, mes)[1] + 1)]

In [None]:
anos = [2022]
meses = [1, 2]
horas = [str(hora).zfill(4) for hora in range(0, 2400, 100)]

datas = sorted([ allDays(int(ano), int(mes)) for mes, ano in product(meses, anos) ])
datas = sum(datas, [])

datas = sorted([ f'{data} - {hora}' for data, hora in product(datas, horas)])

In [None]:
pred_date = pd.DataFrame({'DT_HR': datas, 'TEM_INS': results[0], 'UMD_INS': results[1],'VEN_VEL': results[2] })

In [None]:
!pip uninstall -y scipy 
!pip --no-cache-dir install scipy 

Found existing installation: scipy 1.7.3
Uninstalling scipy-1.7.3:
  Successfully uninstalled scipy-1.7.3
Collecting scipy
  Downloading scipy-1.7.3-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (38.1 MB)
[K     |████████████████████████████████| 38.1 MB 1.2 MB/s 
Installing collected packages: scipy
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
albumentations 0.1.12 requires imgaug<0.2.7,>=0.2.5, but you have imgaug 0.2.9 which is incompatible.[0m
Successfully installed scipy-1.7.3


In [None]:
!pip install --force-reinstall scipy

Collecting scipy
  Using cached scipy-1.7.3-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (38.1 MB)
Collecting numpy<1.23.0,>=1.16.5
  Using cached numpy-1.21.5-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (15.7 MB)
Installing collected packages: numpy, scipy
  Attempting uninstall: numpy
    Found existing installation: numpy 1.19.5
    Uninstalling numpy-1.19.5:
      Successfully uninstalled numpy-1.19.5
  Attempting uninstall: scipy
    Found existing installation: scipy 1.7.3
    Uninstalling scipy-1.7.3:
      Successfully uninstalled scipy-1.7.3
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
yellowbrick 1.3.post1 requires numpy<1.20,>=1.16.0, but you have numpy 1.21.5 which is incompatible.
datascience 0.10.6 requires folium==0.2.1, but you have folium 0.8.3 which is incompatible.
albumentations 0.1.12 requires imgaug<0.2.7

In [None]:
import pmdarima as pm

# Seasonal - fit stepwise auto-ARIMA
smodel = pm.auto_arima(dados_float['TEM_INS'].dropna(), start_p=1, start_q=1,
                         test='adf',
                         max_p=3, max_q=3, m=12,
                         start_P=0, seasonal=True,
                         d=None, D=1, trace=True,
                         error_action='ignore',  
                         suppress_warnings=True, 
                         stepwise=True)


Performing stepwise search to minimize aic
