# Imports

In [None]:
from zipfile import ZipFile
from gzip import compress
import os
import pandas as pd

# Récupération des données

In [None]:
data = ZipFile('data/brut.zip', 'r')

stations = pd.read_csv(data.open('brut/bicincitta_parma_summary.csv'), sep=';')

status_header = ['date', 'Station', 'Status', 'Bikes', 'Slots']
status = pd.read_csv(data.open('brut/status_bicincitta_parma.csv'), sep=';', names=status_header,
                     parse_dates=["date"]).head(n=10000)

weather_header = ['Timestamp', 'Status', 'Clouds', 'Humidity', 'Pressure', 'Rain', 'WindGust', 'WindVarEnd', 'WindVarBeg', 'WindDeg', 'WindSpeed', 'Snow', 'TemperatureMax', 'TemperatureMin', 'TemperatureTemp']
weather = pd.read_csv(data.open('brut/weather_bicincitta_parma.csv'), sep=';', names=weather_header,
                      parse_dates=["Timestamp"]).head(n=10000)

# Nettoyage / correction des données

In [None]:
# Remove numbering in station names
stations['station'] = stations['station'].apply(lambda x: x[4:])

# Check statuses in stations stats then remove column
status = status[status['Status'] == 1]
status = status.drop('Status', axis=1)

# Add "Total" column to status
status["Total"] = status["Bikes"] + status["Slots"]

# Stockage et compression des résultats groupés

In [None]:
indexes_to_keep = ['Timestamp', 'Station', 'Bikes', 'Slots', 'Total', 'Status', 'Humidity', 'Pressure', 'Rain', 'WindDeg', 'WindSpeed', 'Snow', 'TemperatureTemp']
for key, df in status.groupby("Station"):
    path = f"data/{key.lower().replace(' ', '_')}"
    os.makedirs(path, exist_ok=True)
    
    s = df.set_index('date').resample('10min', label='right', closed='right').last().dropna().reset_index()
    w = weather.set_index('Timestamp').resample('10Min', label='right', closed='right').last().dropna().reset_index()
    velo = pd.merge(left=s, right=w, left_on='date', right_on='Timestamp', how='left')[indexes_to_keep]
    
    velo_gz = compress(velo.to_csv(sep=";", index=False).encode('utf-8'))
    with open(f"{path}/station.csv.gz", 'wb') as file:
        file.write(velo_gz)