In [None]:
import os
import pandas as pd
import sys
from pathlib import Path
from dotenv import load_dotenv

REPO_ROOT = Path("/Users/lisawalker/Workspace/urban-weather-air-quality-etl-pipeline")
NOTEBOOK_DIR = Path(__file__).resolve().parent if "__file__" in globals() else REPO_ROOT / "notebooks"
PROJECT_ROOT = NOTEBOOK_DIR.parent  # parent of notebooks/, i.e., the repo root

if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

os.chdir(PROJECT_ROOT)
DATA_PATH = PROJECT_ROOT / "UrbanAirQualityandHealthImpactDataset.csv"

from src.extract import extract_weather_data
from src.transform import transform_data
from src.load import create_database_engine, load_data_into_database_table, read_existing_table

load_dotenv()

DATABASE_URL = os.getenv('DATABASE_URL')
if not DATABASE_URL:
    raise ValueError("Database URL is not set in the .env file")

In [2]:
# Extract data from CSV
df = extract_weather_data()

In [3]:
# Transform data, only need to grab weather data
weather_df, _ = transform_data(df)

In [4]:
# Load data into database
DATABASE_URL = os.getenv('DATABASE_URL')
engine = create_database_engine(DATABASE_URL)
load_data_into_database_table(weather_df, 'weather', engine)
result = read_existing_table('weather', engine)
print("Weather data preview: \n", result.head())
print("Weather data shape: ", result.shape)

Weather data preview: 
     datetime  datetimeEpoch  tempmax  tempmin  temp  feelslikemax  \
0 2024-09-07   1.725692e+09    106.1     91.0  98.5         104.0   
1 2024-09-08   1.725779e+09    103.9     87.0  95.4         100.5   
2 2024-09-09   1.725865e+09    105.0     83.9  94.7          99.9   
3 2024-09-10   1.725952e+09    106.1     81.2  93.9         100.6   
4 2024-09-11   1.726038e+09    106.1     82.1  94.0         101.0   

   feelslikemin  feelslike   dew  humidity  ...               icon  \
0          88.1       95.9  51.5      21.0  ...  partly-cloudy-day   
1          84.7       92.3  48.7      21.5  ...          clear-day   
2          81.6       90.6  41.7      16.9  ...          clear-day   
3          79.5       89.8  39.1      15.7  ...          clear-day   
4          80.0       90.0  40.1      15.9  ...          clear-day   

                            stations  source     City  Temp_Range  Heat_Index  \
0  ['KSDL', 'KDVT', 'AZM15', 'KPHX']    comb  Phoenix      