In [2]:
import pandas as pd
from sqlalchemy import create_engine
from datetime import datetime

# Database credentials
db_username = 'admin'
db_password = 'UBCParking2024'
db_server = 'testdb.cdq6s8s6klpd.ca-central-1.rds.amazonaws.com'
db_name = 'Parking'

# SQLAlchemy connection string
connection_string = f'mssql+pyodbc://{db_username}:{db_password}@{db_server}/{db_name}?driver=ODBC+Driver+17+for+SQL+Server'

# Create the SQLAlchemy engine
engine = create_engine(connection_string)

# List of tables to query with their new names
tables = {
    'dbo.NorthParkade_Occupancy': 'North',
    'dbo.WestParkade_Occupancy': 'West',
    'dbo.RoseGardenParkade_Occupancy': 'Rose',
    'dbo.HealthSciencesParkade_Occupancy': 'Health Sciences',
    'dbo.FraserParkade_Occupancy': 'Fraser',
    'dbo.ThunderbirdParkade_Occupancy': 'Thunderbird',
    'dbo.UnivWstBlvdParkade_Occupancy': 'University Lot Blvd'
}

# Define the timestamp to filter rows
timestamp_cutoff = '2024-03-05 23:00:00'
# Convert timestamp_cutoff to Unix timestamp
timestamp_unix = int(datetime.strptime(timestamp_cutoff, '%Y-%m-%d %H:%M:%S').timestamp())


# Initialize an empty DataFrame to hold all results
all_data = pd.DataFrame()

# Query each table and concatenate results
with engine.connect() as connection:
    for table, new_name in tables.items():
        print(f"Querying table: {table}")
        
        query = f"""
        SELECT Vehicles, TimestampUnix
        FROM {table}
        
        """
        df = pd.read_sql(query, connection)
        
        # Print the raw data retrieved
        print(f"Raw data from {table}:")
        print(df.head())
        
        # Convert TimestampUnix to datetime and round down to the nearest hour
        df['Timestamp'] = pd.to_datetime(df['TimestampUnix'], unit='s')
        df['Hour'] = df['Timestamp'].dt.floor('H')
        
        # Group by the hour and take the latest record for each hour
        df = df.groupby('Hour').last().reset_index()
        
        # Print the data after grouping by hour
        print(f"Data after grouping by hour from {table}:")
        print(df.head())
        
        # Rename the Vehicles column to the new parkade name
        df = df.rename(columns={'Vehicles': new_name})
        
        # Merge the data
        if all_data.empty:
            all_data = df[['Hour', new_name]]
        else:
            all_data = pd.merge(all_data, df[['Hour', new_name]], on='Hour', how='outer')
        
        # Print the merged data so far
        print(f"Merged data so far:")
        print(all_data.head())

# Save the results to a CSV file
csv_file_path = 'output.csv'
all_data.to_csv(csv_file_path, index=False)

print(f"Data has been successfully saved to {csv_file_path}")


Querying table: dbo.NorthParkade_Occupancy
Raw data from dbo.NorthParkade_Occupancy:
   Vehicles  TimestampUnix
0        90     1645901520
1        89     1645901580
2        89     1645901640
3        89     1645901700
4        90     1645901760


  df['Hour'] = df['Timestamp'].dt.floor('H')


Data after grouping by hour from dbo.NorthParkade_Occupancy:
                 Hour  Vehicles  TimestampUnix           Timestamp
0 2018-09-06 09:00:00       950     1536227940 2018-09-06 09:59:00
1 2018-09-06 10:00:00       918     1536231540 2018-09-06 10:59:00
2 2018-09-06 11:00:00       832     1536235140 2018-09-06 11:59:00
3 2018-09-06 12:00:00       850     1536238740 2018-09-06 12:59:00
4 2018-09-06 13:00:00       859     1536242340 2018-09-06 13:59:00
Merged data so far:
                 Hour  North
0 2018-09-06 09:00:00    950
1 2018-09-06 10:00:00    918
2 2018-09-06 11:00:00    832
3 2018-09-06 12:00:00    850
4 2018-09-06 13:00:00    859
Querying table: dbo.WestParkade_Occupancy


KeyboardInterrupt: 

In [5]:
import pandas as pd
from sqlalchemy import create_engine

# Database credentials
db_username = 'admin'
db_password = 'UBCParking2024'
db_server = 'testdb.cdq6s8s6klpd.ca-central-1.rds.amazonaws.com'
db_name = 'Parking'

# SQLAlchemy connection string
connection_string = f'mssql+pyodbc://{db_username}:{db_password}@{db_server}/{db_name}?driver=ODBC+Driver+17+for+SQL+Server'

# Create the SQLAlchemy engine
engine = create_engine(connection_string)

# Define the timestamp to filter rows
timestamp_cutoff = '2024-03-05 23:00:00'
timestamp_cutoff_unix = pd.Timestamp(timestamp_cutoff).timestamp()

# Columns to select from the weather table
weather_columns = [
    'temp', 'visibility', 'dew_point', 'feels_like', 
    'pressure', 'wind_speed', 
    'clouds', 'rain', 'snow'
] # we're missing humidity, clouds_all, rain_1h, snow_1h from the db

# Query the actual_weather table
with engine.connect() as connection:
    query = f"""
    SELECT dt, {', '.join(weather_columns)}
    FROM dbo.actual_weather
    ORDER BY dt ASC
    """
    weather_df = pd.read_sql(query, connection)

# Convert Unix timestamp to datetime
weather_df['dt'] = pd.to_datetime(weather_df['dt'], unit='s')
weather_df = weather_df.rename(columns={'dt': 'Timestamp'})

# Print the first few rows of the weather data
print("Weather data retrieved:")
print(weather_df.head())
print(weather_df.tail())

# Save the weather data to a CSV file
weather_csv_file_path = 'weather_output.csv'
weather_df.to_csv(weather_csv_file_path, index=False)

print(f"Weather data has been successfully saved to {weather_csv_file_path}")


Weather data retrieved:
            Timestamp    temp  visibility  dew_point  feels_like  pressure  \
0 2024-05-26 19:00:00  283.01       10000     282.40      279.58    1019.0   
1 2024-05-26 20:00:00  283.85       10000     283.08      283.46    1019.0   
2 2024-05-26 21:00:00  284.24       10000     283.31      283.86    1020.0   
3 2024-05-26 22:00:00  284.52       10000     283.43      284.14    1020.0   
4 2024-05-26 23:00:00  284.48       10000     283.39      284.10    1020.0   

   wind_speed  clouds  rain  snow  
0        8.23     100   0.0   0.0  
1        7.72     100   0.0   0.0  
2       10.29     100   0.0   0.0  
3       10.29     100   0.0   0.0  
4        9.26     100   0.0   0.0  
              Timestamp    temp  visibility  dew_point  feels_like  pressure  \
872 2024-07-02 03:00:00  292.60       10000     285.62      292.27    1019.0   
873 2024-07-02 04:00:00  291.59       10000     285.36      291.24    1019.0   
874 2024-07-02 05:00:00  290.51       10000     284