In [38]:
import pandas as pd
import requests
from sqlalchemy import create_engine, text
from datetime import datetime

In [39]:
# ---- 1. Connection setup ----
host_name = "localhost"
host_ip = "127.0.0.1"
port = "3306"
user_id = "root"
pwd = "Peanut168996!"
db_name = "shuttle_dw"

engine_str = f"mysql+pymysql://{user_id}:{pwd}@{host_ip}:{port}/{db_name}"
engine = create_engine(engine_str)
print("SQLAlchemy engine created successfully.")

SQLAlchemy engine created successfully.


In [40]:
# ---- 2. Create Weather Table ----
with engine.connect() as conn:
    conn.execute(text("""
    CREATE TABLE IF NOT EXISTS weather (
        weather_id INT AUTO_INCREMENT PRIMARY KEY,
        city VARCHAR(50),
        temperature FLOAT,
        weather_condition VARCHAR(50),
        weather_date DATE,
        timestamp DATETIME
    )
    """))
    print("Weather table ensured in database.")

Weather table ensured in database.


In [41]:
import requests
from datetime import datetime, timedelta, timezone
from collections import Counter
from sqlalchemy import create_engine, text

CITY = "Charlottesville"
API_KEY = "18dd66c10f49da0b308a44c2709d8a5c"
lat = 38.0293
lon = -78.4767

# Start and end dates for October 2025
start_date = datetime(2025, 10, 1)
end_date = datetime(2025, 10, 31)

current_date = start_date
weather_records = []

while current_date <= end_date:
    # Unix timestamps for the start of the day
    start_ts = int(current_date.replace(tzinfo=timezone.utc).timestamp())
    
    url = f"https://history.openweathermap.org/data/2.5/history/city?lat={lat}&lon={lon}&type=hour&start={start_ts}&appid={API_KEY}"
    response = requests.get(url)
    
    if response.status_code == 200:
        data = response.json()
        hourly_list = data.get("list", [])
        if not hourly_list:
            print(f"No data for {current_date.date()}")
            current_date += timedelta(days=1)
            continue
        
        # Aggregate daily
        temps = [hour["main"]["temp"] for hour in hourly_list]
        conditions = [hour["weather"][0]["main"] for hour in hourly_list]
        avg_temp = sum(temps) / len(temps)
        most_common_condition = Counter(conditions).most_common(1)[0][0]
        
        weather_records.append({
            "city": CITY,
            "temperature": round(avg_temp, 2),
            "weather_condition": most_common_condition,
            "weather_date": current_date.date(),
            "timestamp": datetime.now(timezone.utc)
        })
        
        print(f"Processed {current_date.date()}")
    else:
        print(f"Failed to fetch data for {current_date.date()}: {response.status_code}")
    
    current_date += timedelta(days=1)

# Insert all days into MySQL
with engine.begin() as conn:
    for record in weather_records:
        conn.execute(
            text("""
                INSERT INTO weather (city, temperature, weather_condition, weather_date, timestamp)
                VALUES (:city, :temperature, :weather_condition, :weather_date, :timestamp)
            """),
            record
        )

print("All October 2025 weather data inserted successfully!")


Processed 2025-10-01
Processed 2025-10-02
Processed 2025-10-03
Processed 2025-10-04
Processed 2025-10-05
Processed 2025-10-06
Processed 2025-10-07
Processed 2025-10-08
Processed 2025-10-09
Processed 2025-10-10
Processed 2025-10-11
Processed 2025-10-12
Processed 2025-10-13
Processed 2025-10-14
Processed 2025-10-15
Processed 2025-10-16
Processed 2025-10-17
Processed 2025-10-18
Processed 2025-10-19
Processed 2025-10-20
Processed 2025-10-21
Processed 2025-10-22
Processed 2025-10-23
Processed 2025-10-24
Processed 2025-10-25
Processed 2025-10-26
Processed 2025-10-27
Failed to fetch data for 2025-10-28: 400
Failed to fetch data for 2025-10-29: 400
Failed to fetch data for 2025-10-30: 400
Failed to fetch data for 2025-10-31: 400
All October 2025 weather data inserted successfully!


In [46]:
# ---- Update weather table: convert Kelvin to Fahrenheit ----
update_query = """
UPDATE weather
SET temperature = ROUND((temperature - 273.15) * 9/5 + 32, 2);
"""

with engine.connect() as conn:
    conn.execute(text(update_query))
    conn.commit()  # commit changes
    print("Weather table temperatures converted to Fahrenheit.")


Weather table temperatures converted to Fahrenheit.


In [47]:
from sqlalchemy import text
import pandas as pd

# ---- Query: passengers by day, temperature, weather, and vehicle ----
query = """
SELECT
    d.full_date AS day,
    w.temperature,
    w.weather_condition,
    v.vehicle_id,
    v.vehicle_type,
    SUM(f.passengers) AS total_passengers
FROM fact_passenger_counts f
JOIN dim_date d ON f.date_key = d.date_key
JOIN weather w ON d.full_date = w.weather_date
JOIN dim_vehicle v ON f.vehicle_id = v.vehicle_id
GROUP BY d.full_date, w.temperature, w.weather_condition, v.vehicle_id, v.vehicle_type
ORDER BY d.full_date, v.vehicle_id;
"""

# ---- Execute query ----
with engine.connect() as conn:
    result = conn.execute(text(query))
    df_passenger_weather = pd.DataFrame(result.fetchall(), columns=result.keys())

# ---- Display results ----
print(df_passenger_weather.head())


          day  temperature weather_condition  vehicle_id vehicle_type  \
0  2025-10-01        65.48             Clear         101          bus   
1  2025-10-01        65.48             Clear         102        ejest   
2  2025-10-02        58.80             Clear         103          van   
3  2025-10-02        58.80             Clear         104          car   
4  2025-10-03        59.29             Clear         105          bus   

  total_passengers  
0               32  
1               18  
2               28  
3                4  
4               36  
