# **Modeling the Impact of Wheather on Water Consumption in Barcelona** 

## Data Preparation and Integration - Iteration 1

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import os

In [None]:
data_path = '../data'

consum = pd.read_parquet(os.path.join(data_path, 'parquet/full/consum.parquet'))

### **Consumption Data**

In [None]:
display(consum.head())
display(consum.tail())

In [None]:
consum.info()

In [None]:
print(consum.nunique())
print("")
print(consum.isnull().sum())

In [None]:
consum['FECHA'] = pd.to_datetime(consum['FECHA'], errors='coerce')
consum = consum[consum['CONSUMO_REAL'] > 0]

drop_cols = ['US_AIGUA_GEST','NUM_MUN_SGAB','NUM_DTE_MUNI','NUM_COMPLET',
             'DATA_INST_COMP','MARCA_COMP','CODI_MODEL','DIAM_COMP']
consum = consum.drop(columns=[c for c in drop_cols if c in consum.columns])
 
display(consum.head())


In [None]:
consum = (
    consum.groupby('FECHA')['CONSUMO_REAL']
    .sum()
    .reset_index()
    .rename(columns={'CONSUMO_REAL': 'CONSUM_DIARI'})
)

display(consum.head())

### **Weather Data**

In [None]:
weather_21 = pd.read_csv(os.path.join(data_path, 'weather/weather_2021_clean.csv'))
display(weather_21.head())

In [None]:
# Concatenate weather data 
weather_22 = pd.read_csv(os.path.join(data_path, 'weather/weather_2022_clean.csv'))
weather_23 = pd.read_csv(os.path.join(data_path, 'weather/weather_2023_clean.csv'))
weather_24 = pd.read_csv(os.path.join(data_path, 'weather/weather_2024_clean.csv'))

weather = pd.concat([weather_21, weather_22, weather_23, weather_24], ignore_index=True)
display(weather.head())
display(weather.tail())

In [None]:
weather['DATA_LECTURA'] = pd.to_datetime(weather['DATA_LECTURA'], errors='coerce')

weather = (
    weather
    .groupby('DATA_LECTURA')
    .mean(numeric_only=True)
    .reset_index()
)

weather = weather.rename(columns={'DATA_LECTURA': 'FECHA'})
weather = weather.round(2)

display(weather.head())

### **Integration**

In [None]:
df = pd.merge(
    consum,
    weather,
    on='FECHA',
    how='inner'
)

df.head()

In [None]:
df.to_csv(os.path.join(data_path, 'consumption_weather.csv'), index=False)
print("Integrated dataset ready for modeling.")