<a href="https://colab.research.google.com/github/pedrohbp00/DengAI/blob/main/dengAI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### **Projeto Prático de Ciência da Computação**


 > Objetivo: desenvolver uma aplicação capaz de prever os casos de dengue na cidade de San Juan e Iquitos, localizadas em Porto Rico e Peru, respectivamente.


### **Dataset/Fonte dos Dados**

> Dataset: https://www.drivendata.org/competitions/44/dengai-predicting-disease-spread/data/

> Fonte dos dados: os dados foram coletados por agências governamentais dos EUA e estão disponíveis no seguinte link: http://www.cdc.gov/ 

https://bdm.unb.br/bitstream/10483/21569/1/2018_LucasVilelaTaveiraBrilhante_tcc.pdf

In [1]:
## Importando a biblioteca Pandas, principal biblioteca para análise de dados
import pandas as pd

In [16]:
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [8]:
## Carregando o Dataframe com os dados do Dataset
df = pd.read_csv('https://raw.githubusercontent.com/pedrohbp00/DengAI/main/dengue_features_train.csv')
df_test = pd.read_csv('https://raw.githubusercontent.com/pedrohbp00/DengAI/main/dengue_features_test.csv')
df_labels_train = pd.read_csv('https://raw.githubusercontent.com/pedrohbp00/DengAI/main/dengue_labels_train.csv')

In [9]:
## 5 primeiras linhas do dataframe
df.head()

Unnamed: 0,city,year,weekofyear,week_start_date,ndvi_ne,ndvi_nw,ndvi_se,ndvi_sw,precipitation_amt_mm,reanalysis_air_temp_k,...,reanalysis_precip_amt_kg_per_m2,reanalysis_relative_humidity_percent,reanalysis_sat_precip_amt_mm,reanalysis_specific_humidity_g_per_kg,reanalysis_tdtr_k,station_avg_temp_c,station_diur_temp_rng_c,station_max_temp_c,station_min_temp_c,station_precip_mm
0,sj,1990,18,1990-04-30,0.1226,0.103725,0.198483,0.177617,12.42,297.572857,...,32.0,73.365714,12.42,14.012857,2.628571,25.442857,6.9,29.4,20.0,16.0
1,sj,1990,19,1990-05-07,0.1699,0.142175,0.162357,0.155486,22.82,298.211429,...,17.94,77.368571,22.82,15.372857,2.371429,26.714286,6.371429,31.7,22.2,8.6
2,sj,1990,20,1990-05-14,0.03225,0.172967,0.1572,0.170843,34.54,298.781429,...,26.1,82.052857,34.54,16.848571,2.3,26.714286,6.485714,32.2,22.8,41.4
3,sj,1990,21,1990-05-21,0.128633,0.245067,0.227557,0.235886,15.36,298.987143,...,13.9,80.337143,15.36,16.672857,2.428571,27.471429,6.771429,33.3,23.3,4.0
4,sj,1990,22,1990-05-28,0.1962,0.2622,0.2512,0.24734,7.52,299.518571,...,12.2,80.46,7.52,17.21,3.014286,28.942857,9.371429,35.0,23.9,5.8


In [12]:
## Identificanto as colunas/atributos
df.columns

Index(['city', 'year', 'weekofyear', 'week_start_date', 'ndvi_ne', 'ndvi_nw',
       'ndvi_se', 'ndvi_sw', 'precipitation_amt_mm', 'reanalysis_air_temp_k',
       'reanalysis_avg_temp_k', 'reanalysis_dew_point_temp_k',
       'reanalysis_max_air_temp_k', 'reanalysis_min_air_temp_k',
       'reanalysis_precip_amt_kg_per_m2',
       'reanalysis_relative_humidity_percent', 'reanalysis_sat_precip_amt_mm',
       'reanalysis_specific_humidity_g_per_kg', 'reanalysis_tdtr_k',
       'station_avg_temp_c', 'station_diur_temp_rng_c', 'station_max_temp_c',
       'station_min_temp_c', 'station_precip_mm'],
      dtype='object')

In [13]:
## Visão geral da base de dados, tipos de atributos, etc
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1456 entries, 0 to 1455
Data columns (total 24 columns):
 #   Column                                 Non-Null Count  Dtype  
---  ------                                 --------------  -----  
 0   city                                   1456 non-null   object 
 1   year                                   1456 non-null   int64  
 2   weekofyear                             1456 non-null   int64  
 3   week_start_date                        1456 non-null   object 
 4   ndvi_ne                                1262 non-null   float64
 5   ndvi_nw                                1404 non-null   float64
 6   ndvi_se                                1434 non-null   float64
 7   ndvi_sw                                1434 non-null   float64
 8   precipitation_amt_mm                   1443 non-null   float64
 9   reanalysis_air_temp_k                  1446 non-null   float64
 10  reanalysis_avg_temp_k                  1446 non-null   float64
 11  rean

In [14]:
## Exibindo os valores únicos por quantidade de um determinado atributo. Ex: 'city' ou cidade
## Sigla das cidades: sj = San Juan, iq = Iquitos
df['city'].value_counts()

sj    936
iq    520
Name: city, dtype: int64

In [None]:
df.describe(include = 'all')

Unnamed: 0,city,year,weekofyear,week_start_date,ndvi_ne,ndvi_nw,ndvi_se,ndvi_sw,precipitation_amt_mm,reanalysis_air_temp_k,...,reanalysis_precip_amt_kg_per_m2,reanalysis_relative_humidity_percent,reanalysis_sat_precip_amt_mm,reanalysis_specific_humidity_g_per_kg,reanalysis_tdtr_k,station_avg_temp_c,station_diur_temp_rng_c,station_max_temp_c,station_min_temp_c,station_precip_mm
count,1456,1456.0,1456.0,1456,1262.0,1404.0,1434.0,1434.0,1443.0,1446.0,...,1446.0,1446.0,1443.0,1446.0,1446.0,1413.0,1413.0,1436.0,1442.0,1434.0
unique,2,,,1049,,,,,,,...,,,,,,,,,,
top,sj,,,2005-06-18,,,,,,,...,,,,,,,,,,
freq,936,,,2,,,,,,,...,,,,,,,,,,
mean,,2001.031593,26.503434,,0.142294,0.130553,0.203783,0.202305,45.760388,298.701852,...,40.151819,82.161959,45.760388,16.746427,4.903754,27.185783,8.059328,32.452437,22.10215,39.32636
std,,5.408314,15.019437,,0.140531,0.119999,0.07386,0.083903,43.715537,1.36242,...,43.434399,7.153897,43.715537,1.542494,3.546445,1.292347,2.128568,1.959318,1.574066,47.455314
min,,1990.0,1.0,,-0.40625,-0.4561,-0.015533,-0.063457,0.0,294.635714,...,0.0,57.787143,0.0,11.715714,1.357143,21.4,4.528571,26.7,14.7,0.0
25%,,1997.0,13.75,,0.04495,0.049217,0.155087,0.144209,9.8,297.658929,...,13.055,77.177143,9.8,15.557143,2.328571,26.3,6.514286,31.1,21.1,8.7
50%,,2002.0,26.5,,0.128817,0.121429,0.19605,0.18945,38.34,298.646429,...,27.245,80.301429,38.34,17.087143,2.857143,27.414286,7.3,32.8,22.2,23.85
75%,,2005.0,39.25,,0.248483,0.2166,0.248846,0.246982,70.235,299.833571,...,52.2,86.357857,70.235,17.978214,7.625,28.157143,9.566667,33.9,23.3,53.9


In [None]:
df.isnull()

Unnamed: 0,city,year,weekofyear,week_start_date,ndvi_ne,ndvi_nw,ndvi_se,ndvi_sw,precipitation_amt_mm,reanalysis_air_temp_k,...,reanalysis_precip_amt_kg_per_m2,reanalysis_relative_humidity_percent,reanalysis_sat_precip_amt_mm,reanalysis_specific_humidity_g_per_kg,reanalysis_tdtr_k,station_avg_temp_c,station_diur_temp_rng_c,station_max_temp_c,station_min_temp_c,station_precip_mm
0,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1451,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1452,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1453,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1454,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
