# InfluxDB Challenge

https://www.influxdata.com/influxdb-challenge/

## Climate Weather Surface of Brazil - Hourly

Hourly Climate data from São Paulo during 2021, March

https://www.kaggle.com/datasets/PROPPG-PPG/hourly-weather-surface-brazil-southeast-region?select=southeast.csv

In [None]:
import os
import kaggle
from zipfile import ZipFile
import pandas as pd
from datetime import datetime

import influxdb_client, os, time
from influxdb_client import InfluxDBClient, Point, WritePrecision
from influxdb_client.client.write_api import SYNCHRONOUS

In [None]:
"""
---Define variables below using your prefered method---
os.environ['KAGGLE_USERNAME'] = ''
os.environ['KAGGLE_KEY'] = ''
"""

token = os.environ.get('INFLUXDB_TOKEN')
org = os.environ.get('INFLUXDB_ORG')
url = os.environ.get('INFLUXDB_URL')
bucket = os.environ.get('INFLUXDB_BUCKET')

filename ="southeast.csv"

In [None]:
!kaggle datasets download -f southeast.csv PROPPG-PPG/hourly-weather-surface-brazil-southeast-region

In [None]:
from zipfile import ZipFile
with ZipFile(filename+'.zip', 'r') as zipObj:
    zipObj.extractall()

In [None]:
df_weather_brazil_southeast = pd.read_csv(filename)

df_weather_brazil_southeast['datetime'] = pd.to_datetime(df_weather_brazil_southeast.Data.astype(str) + ' ' +df_weather_brazil_southeast.Hora.astype(str))
df_weather_brazil_southeast['temperature(°C)'] = df_weather_brazil_southeast['TEMPERATURA DO AR - BULBO SECO, HORARIA (°C)']
df_weather_brazil_southeast['humidity(%)'] = df_weather_brazil_southeast['UMIDADE RELATIVA DO AR, HORARIA (%)']

df_weather_brazil_southeast = df_weather_brazil_southeast[['datetime', 
                                                           'temperature(°C)', 
                                                           'humidity(%)', 
                                                           'region', 
                                                           'state', 
                                                           'station_code'
                                                          ]]

# arbitrary filter due the most cold register in Brazil-Southeast was -9.7 ºC
df_weather_brazil_southeast = df_weather_brazil_southeast[
    # arbitrary filter due the most cold register in Brazil-Southeast was -9.7 ºC
    (df_weather_brazil_southeast['temperature(°C)'] > -10.0) & 
    (df_weather_brazil_southeast['temperature(°C)'] != -9999.0) & 
    (df_weather_brazil_southeast['humidity(%)'] != -9999.0) & 
    (df_weather_brazil_southeast['datetime'] >= '2021-03-01 00:00:00') &
    (df_weather_brazil_southeast['datetime'] <= '2021-03-31 23:59:59') &
    (df_weather_brazil_southeast['state'] == 'SP')
]

In [None]:
measurements = df_weather_brazil_southeast.to_dict('records')

In [None]:
client = influxdb_client.InfluxDBClient(url=url, token=token, org=org)
write_api = client.write_api(write_options=SYNCHRONOUS)

In [None]:
%%time
for measure in measurements:
    point = (
        Point('weatherBrazil')
        .tag('region', measure['region'])
        .tag('state', measure['state'])
        .tag('station_code', measure['station_code'])
        .field('temperature(°C)', measure['temperature(°C)'])
        .field('humidity(%)', measure['humidity(%)'])
        # (Obs: The year-month was changed to 2022-September because Buckets in Cloud retain 30 days)
        .time(measure['datetime'].replace(year=2022, month=9).strftime('%Y-%m-%dT%H:%M:%SZ'))
    )
    write_api.write(bucket=bucket, org=org, record=point)