In [10]:
import pandas as pd
from sqlalchemy import create_engine

# Database credentials
db_username = 'admin'
db_password = 'UBCParking2024'
db_server = 'testdb.cdq6s8s6klpd.ca-central-1.rds.amazonaws.com'
db_name = 'Parking'

# SQLAlchemy connection string
connection_string = f'mssql+pyodbc://{db_username}:{db_password}@{db_server}/{db_name}?driver=ODBC+Driver+17+for+SQL+Server'

# Create the SQLAlchemy engine
engine = create_engine(connection_string)

# List of tables to query with their new names
tables = {
    'dbo.NorthParkade_Occupancy': 'North',
    'dbo.WestParkade_Occupancy': 'West',
    'dbo.RoseGardenParkade_Occupancy': 'Rose',
    
    'dbo.HealthSciencesParkade_Occupancy': 'Health Sciences',
    'dbo.FraserParkade_Occupancy': 'Fraser',
    
    'dbo.ThunderbirdParkade_Occupancy': 'Thunderbird',
    
    'dbo.UnivWstBlvdParkade_Occupancy': 'University Lot Blvd'
}

# Define the timestamp to filter rows
timestamp_cutoff = '2024-03-05 23:00:00'

# Initialize an empty DataFrame to hold all results
all_data = pd.DataFrame()

# Query each table and concatenate results
with engine.connect() as connection:
    for table, new_name in tables.items():
        print(f"Querying table: {table}")
        
        query = f"""
        SELECT Vehicles, Timestamp
        FROM {table}
        WHERE Timestamp >= '{timestamp_cutoff}'
        """
        df = pd.read_sql(query, connection)
        
        # Print the raw data retrieved
        print(f"Raw data from {table}:")
        print(df.head())
        
        # Convert Timestamp to datetime and round down to the nearest hour
        df['Timestamp'] = pd.to_datetime(df['Timestamp'])
        df['Hour'] = df['Timestamp'].dt.floor('H')
        
        # Group by the hour and take the latest record for each hour
        df = df.groupby('Hour').last().reset_index()
        
        # Print the data after grouping by hour
        print(f"Data after grouping by hour from {table}:")
        print(df.head())
        
        # Rename the Vehicles column to the new parkade name
        df = df.rename(columns={'Vehicles': new_name})
        
        # Merge the data
        if all_data.empty:
            all_data = df[['Hour', new_name]]
        else:
            all_data = pd.merge(all_data, df[['Hour', new_name]], on='Hour', how='outer')
        
        # Print the merged data so far
        print(f"Merged data so far:")
        print(all_data.head())

# Save the results to a CSV file
csv_file_path = 'output.csv'
all_data.to_csv(csv_file_path, index=False)

print(f"Data has been successfully saved to {csv_file_path}")


Querying table: dbo.NorthParkade_Occupancy
Raw data from dbo.NorthParkade_Occupancy:
   Vehicles                    Timestamp
0        84  2024-04-06 18:24:00.0000000
1        83  2024-04-06 18:25:00.0000000
2        81  2024-04-06 18:26:00.0000000
3        81  2024-04-06 18:27:00.0000000
4        80  2024-04-06 18:28:00.0000000
Data after grouping by hour from dbo.NorthParkade_Occupancy:
                 Hour  Vehicles           Timestamp
0 2024-03-05 23:00:00         0 2024-03-05 23:59:00
1 2024-03-06 00:00:00         1 2024-03-06 00:59:00
2 2024-03-06 01:00:00         2 2024-03-06 01:59:00
3 2024-03-06 02:00:00         1 2024-03-06 02:59:00
4 2024-03-06 03:00:00         1 2024-03-06 03:59:00
Merged data so far:
                 Hour  North
0 2024-03-05 23:00:00      0
1 2024-03-06 00:00:00      1
2 2024-03-06 01:00:00      2
3 2024-03-06 02:00:00      1
4 2024-03-06 03:00:00      1
Querying table: dbo.WestParkade_Occupancy


  df['Hour'] = df['Timestamp'].dt.floor('H')


Raw data from dbo.WestParkade_Occupancy:
   Vehicles                    Timestamp
0        41  2024-03-05 23:00:00.0000000
1        40  2024-03-05 23:01:00.0000000
2        40  2024-03-05 23:02:00.0000000
3        40  2024-03-05 23:03:00.0000000
4        40  2024-03-05 23:04:00.0000000
Data after grouping by hour from dbo.WestParkade_Occupancy:
                 Hour  Vehicles           Timestamp
0 2024-03-05 23:00:00        28 2024-03-05 23:59:00
1 2024-03-06 00:00:00        19 2024-03-06 00:59:00
2 2024-03-06 01:00:00        22 2024-03-06 01:59:00
3 2024-03-06 02:00:00        18 2024-03-06 02:59:00
4 2024-03-06 03:00:00        18 2024-03-06 03:59:00
Merged data so far:
                 Hour  North  West
0 2024-03-05 23:00:00      0    28
1 2024-03-06 00:00:00      1    19
2 2024-03-06 01:00:00      2    22
3 2024-03-06 02:00:00      1    18
4 2024-03-06 03:00:00      1    18
Querying table: dbo.RoseGardenParkade_Occupancy


  df['Hour'] = df['Timestamp'].dt.floor('H')


Raw data from dbo.RoseGardenParkade_Occupancy:
   Vehicles                    Timestamp
0         7  2024-03-05 23:00:00.0000000
1         7  2024-03-05 23:01:00.0000000
2         7  2024-03-05 23:02:00.0000000
3         7  2024-03-05 23:03:00.0000000
4         8  2024-03-05 23:04:00.0000000
Data after grouping by hour from dbo.RoseGardenParkade_Occupancy:
                 Hour  Vehicles           Timestamp
0 2024-03-05 23:00:00         5 2024-03-05 23:59:00
1 2024-03-06 00:00:00         3 2024-03-06 00:59:00
2 2024-03-06 01:00:00         3 2024-03-06 01:59:00
3 2024-03-06 02:00:00         3 2024-03-06 02:59:00
4 2024-03-06 03:00:00         3 2024-03-06 03:59:00
Merged data so far:
                 Hour  North  West  Rose
0 2024-03-05 23:00:00      0    28     5
1 2024-03-06 00:00:00      1    19     3
2 2024-03-06 01:00:00      2    22     3
3 2024-03-06 02:00:00      1    18     3
4 2024-03-06 03:00:00      1    18     3
Querying table: dbo.HealthSciencesParkade_Occupancy


  df['Hour'] = df['Timestamp'].dt.floor('H')


Raw data from dbo.HealthSciencesParkade_Occupancy:
   Vehicles                    Timestamp
0        43  2024-03-05 23:00:00.0000000
1        40  2024-03-05 23:01:00.0000000
2        37  2024-03-05 23:02:00.0000000
3        33  2024-03-05 23:03:00.0000000
4        32  2024-03-05 23:04:00.0000000
Data after grouping by hour from dbo.HealthSciencesParkade_Occupancy:
                 Hour  Vehicles           Timestamp
0 2024-03-05 23:00:00        15 2024-03-05 23:59:00
1 2024-03-06 00:00:00        10 2024-03-06 00:59:00
2 2024-03-06 01:00:00        10 2024-03-06 01:59:00
3 2024-03-06 02:00:00         9 2024-03-06 02:59:00
4 2024-03-06 03:00:00         9 2024-03-06 03:59:00
Merged data so far:
                 Hour  North  West  Rose  Health Sciences
0 2024-03-05 23:00:00      0    28     5               15
1 2024-03-06 00:00:00      1    19     3               10
2 2024-03-06 01:00:00      2    22     3               10
3 2024-03-06 02:00:00      1    18     3                9
4 2024-03-0

  df['Hour'] = df['Timestamp'].dt.floor('H')


Raw data from dbo.FraserParkade_Occupancy:
   Vehicles                    Timestamp
0        19  2024-03-05 23:00:00.0000000
1        19  2024-03-05 23:01:00.0000000
2        18  2024-03-05 23:02:00.0000000
3        18  2024-03-05 23:03:00.0000000
4        18  2024-03-05 23:04:00.0000000
Data after grouping by hour from dbo.FraserParkade_Occupancy:
                 Hour  Vehicles           Timestamp
0 2024-03-05 23:00:00        19 2024-03-05 23:59:00
1 2024-03-06 00:00:00        18 2024-03-06 00:59:00
2 2024-03-06 01:00:00        19 2024-03-06 01:59:00
3 2024-03-06 02:00:00        19 2024-03-06 02:59:00
4 2024-03-06 03:00:00        20 2024-03-06 03:59:00
Merged data so far:
                 Hour  North  West  Rose  Health Sciences  Fraser
0 2024-03-05 23:00:00      0    28     5               15      19
1 2024-03-06 00:00:00      1    19     3               10      18
2 2024-03-06 01:00:00      2    22     3               10      19
3 2024-03-06 02:00:00      1    18     3             

  df['Hour'] = df['Timestamp'].dt.floor('H')


Raw data from dbo.ThunderbirdParkade_Occupancy:
   Vehicles                    Timestamp
0       504  2024-04-19 10:30:00.0000000
1       506  2024-04-19 10:31:00.0000000
2       507  2024-04-19 10:32:00.0000000
3       508  2024-04-19 10:33:00.0000000
4       506  2024-04-19 10:34:00.0000000
Data after grouping by hour from dbo.ThunderbirdParkade_Occupancy:
                 Hour  Vehicles           Timestamp
0 2024-03-05 23:00:00         2 2024-03-05 23:59:00
1 2024-03-06 00:00:00         0 2024-03-06 00:59:00
2 2024-03-06 01:00:00         1 2024-03-06 01:59:00
3 2024-03-06 02:00:00         1 2024-03-06 02:59:00
4 2024-03-06 03:00:00         1 2024-03-06 03:59:00
Merged data so far:
                 Hour  North  West  Rose  Health Sciences  Fraser  Thunderbird
0 2024-03-05 23:00:00      0    28     5               15      19            2
1 2024-03-06 00:00:00      1    19     3               10      18            0
2 2024-03-06 01:00:00      2    22     3               10      19     

  df['Hour'] = df['Timestamp'].dt.floor('H')


Raw data from dbo.UnivWstBlvdParkade_Occupancy:
   Vehicles                    Timestamp
0        10  2024-04-20 06:18:00.0000000
1        10  2024-04-20 06:19:00.0000000
2        10  2024-04-20 06:20:00.0000000
3        10  2024-04-20 06:21:00.0000000
4        10  2024-04-20 06:22:00.0000000
Data after grouping by hour from dbo.UnivWstBlvdParkade_Occupancy:
                 Hour  Vehicles           Timestamp
0 2024-03-05 23:00:00        18 2024-03-05 23:59:00
1 2024-03-06 00:00:00        16 2024-03-06 00:59:00
2 2024-03-06 01:00:00        18 2024-03-06 01:59:00
3 2024-03-06 02:00:00        17 2024-03-06 02:59:00
4 2024-03-06 03:00:00        17 2024-03-06 03:59:00
Merged data so far:
                 Hour  North  West  Rose  Health Sciences  Fraser  \
0 2024-03-05 23:00:00      0    28     5               15      19   
1 2024-03-06 00:00:00      1    19     3               10      18   
2 2024-03-06 01:00:00      2    22     3               10      19   
3 2024-03-06 02:00:00      1   

  df['Hour'] = df['Timestamp'].dt.floor('H')


In [7]:
import pandas as pd
from sqlalchemy import create_engine

# Database credentials
db_username = 'admin'
db_password = 'UBCParking2024'
db_server = 'testdb.cdq6s8s6klpd.ca-central-1.rds.amazonaws.com'
db_name = 'Parking'

# SQLAlchemy connection string
connection_string = f'mssql+pyodbc://{db_username}:{db_password}@{db_server}/{db_name}?driver=ODBC+Driver+17+for+SQL+Server'

# Create the SQLAlchemy engine
engine = create_engine(connection_string)

# Define the timestamp to filter rows
timestamp_cutoff = '2024-03-05 23:00:00'
timestamp_cutoff_unix = pd.Timestamp(timestamp_cutoff).timestamp()

# Columns to select from the weather table
weather_columns = [
    'temp', 'visibility', 'dew_point', 'feels_like', 
    'pressure', 'wind_speed', 
    'clouds', 'rain', 'snow'
] # we're missing humidity, clouds_all, rain_1h, snow_1h from the db

# Query the actual_weather table
with engine.connect() as connection:
    query = f"""
    SELECT dt, {', '.join(weather_columns)}
    FROM dbo.actual_weather
    WHERE dt >= {int(timestamp_cutoff_unix)}
    ORDER BY dt DESC
    """
    weather_df = pd.read_sql(query, connection)

# Convert Unix timestamp to datetime
weather_df['dt'] = pd.to_datetime(weather_df['dt'], unit='s')
weather_df = weather_df.rename(columns={'dt': 'Timestamp'})

# Print the first few rows of the weather data
print("Weather data retrieved:")
print(weather_df.head())

# Save the weather data to a CSV file
weather_csv_file_path = 'weather_output.csv'
weather_df.to_csv(weather_csv_file_path, index=False)

print(f"Weather data has been successfully saved to {weather_csv_file_path}")


Weather data retrieved:
            Timestamp    temp  visibility  dew_point  feels_like  pressure  \
0 2024-06-13 10:00:00  282.31       10000     279.58      281.80    1020.0   
1 2024-06-13 09:00:00  283.07       10000     279.60      282.75    1020.0   
2 2024-06-13 08:00:00  283.88       10000     280.57      283.10    1021.0   
3 2024-06-13 07:00:00  284.54       10000     280.85      283.77    1021.0   
4 2024-06-13 06:00:00  285.34       10000     281.24      284.60    1021.0   

   wind_speed  clouds  rain  snow  
0        1.54      27   0.0   0.0  
1        1.46      33   0.0   0.0  
2        0.45      42   0.0   0.0  
3        1.03       0   0.0   0.0  
4        1.69       2   0.0   0.0  
Weather data has been successfully saved to weather_output.csv
