In [20]:
import pandas as pd
import glob

base_dir = 'LGBM_days/'  # Adjust this to your actual base directory
parking_lots = ['North', 'West', 'Rose', 'Health Sciences', 'Fraser','Thunderbird','University Lot Blvd']  # Adjust with your actual parking lot names

parking_lot_dfs = {}

for parking_lot in parking_lots:
    parking_lot_df = pd.DataFrame(columns=['Timestamp', 'Occupancy'])
    occupancy_dict = {}  # Dictionary to store occupancy values by timestamp
    
    for days_to_predict in range(7, 0, -1):
        filename = f'LGBM_{parking_lot}_short_{days_to_predict}_day_future_predictions.csv'
        file_path = base_dir + filename
        
        if glob.glob(file_path):  # Check if the file exists
            df = pd.read_csv(file_path)
            df['Timestamp'] = pd.to_datetime(df['Timestamp'])  # Convert Timestamp to datetime if not already
            
            # Update occupancy_dict with new data, replacing existing data for the same timestamps
            for index, row in df.iterrows():
                timestamp = row['Timestamp']
                occupancy = row['Occupancy']
                
                occupancy_dict[timestamp] = occupancy
    
    # Convert occupancy_dict to a DataFrame and store in parking_lot_dfs
    if occupancy_dict:
        parking_lot_df = pd.DataFrame(list(occupancy_dict.items()), columns=['Timestamp', 'Occupancy'])
    
    # Store the parking_lot_df in the dictionary
    parking_lot_dfs[parking_lot] = parking_lot_df

# Print or display the resulting DataFrames
for parking_lot, df in parking_lot_dfs.items():
    print(f"Parking Lot: {parking_lot}")
    print(df)  # Print the first few rows for overview
    print("\n")

Parking Lot: North
              Timestamp  Occupancy
0   2024-06-06 13:00:00        381
1   2024-06-06 14:00:00        331
2   2024-06-06 15:00:00        238
3   2024-06-06 16:00:00        139
4   2024-06-06 17:00:00         74
..                  ...        ...
163 2024-06-13 08:00:00        235
164 2024-06-13 09:00:00        293
165 2024-06-13 10:00:00        342
166 2024-06-13 11:00:00        345
167 2024-06-13 12:00:00        363

[168 rows x 2 columns]


Parking Lot: West
              Timestamp  Occupancy
0   2024-06-06 13:00:00        494
1   2024-06-06 14:00:00        459
2   2024-06-06 15:00:00        293
3   2024-06-06 16:00:00        188
4   2024-06-06 17:00:00        136
..                  ...        ...
163 2024-06-13 08:00:00        290
164 2024-06-13 09:00:00        380
165 2024-06-13 10:00:00        417
166 2024-06-13 11:00:00        441
167 2024-06-13 12:00:00        444

[168 rows x 2 columns]


Parking Lot: Rose
              Timestamp  Occupancy
0   2024-06-06 13:

In [27]:
import pandas as pd
import glob

base_dir_days = 'LGBM_days/'  # Directory for daily predictions
base_dir_hours = 'LGBM_1_hour/'  # Directory for hourly predictions
parking_lots = ['North', 'West', 'Rose', 'Health Sciences', 'Fraser','Thunderbird','University Lot Blvd']  # Adjust with your actual parking lot names

parking_lot_dfs = {}

# Process daily predictions
for parking_lot in parking_lots:
    parking_lot_df = pd.DataFrame(columns=['Timestamp', 'Occupancy'])
    occupancy_dict = {}  # Dictionary to store occupancy values by timestamp
    
    # Process daily predictions (7 days to 1 day)
    for days_to_predict in range(7, 0, -1):
        filename = f'LGBM_{parking_lot}_short_{days_to_predict}_day_future_predictions.csv'
        file_path = base_dir_days + filename
        
        if glob.glob(file_path):  # Check if the file exists
            df = pd.read_csv(file_path)
            df['Timestamp'] = pd.to_datetime(df['Timestamp'])  # Convert Timestamp to datetime if not already
            
            # Update occupancy_dict with new data, replacing existing data for the same timestamps
            for index, row in df.iterrows():
                timestamp = row['Timestamp']
                occupancy = row['Occupancy']
                
                occupancy_dict[timestamp] = occupancy
    
    # Convert occupancy_dict to a DataFrame and store in parking_lot_df
    if occupancy_dict:
        parking_lot_df = pd.DataFrame(list(occupancy_dict.items()), columns=['Timestamp', 'Occupancy'])
    
    # Store the parking_lot_df in the dictionary
    parking_lot_dfs[parking_lot] = parking_lot_df

# Process hourly predictions (1 hour)
for parking_lot in parking_lots:
    filename = f'LGBM_{parking_lot}_short_1_hour_future_predictions.csv'
    file_path = base_dir_hours + filename
    
    if glob.glob(file_path):  # Check if the file exists
        df = pd.read_csv(file_path)
        df['Timestamp'] = pd.to_datetime(df['Timestamp'])  # Convert Timestamp to datetime if not already
        
        # Update parking_lot_df with hourly predictions, replacing existing data for the same timestamps
        for index, row in df.iterrows():
            timestamp = row['Timestamp']
            occupancy = row['Occupancy']
            
            # Check if Timestamp already exists in parking_lot_dfs[parking_lot]
            if parking_lot in parking_lot_dfs and not parking_lot_dfs[parking_lot].empty:
                existing_row_index = parking_lot_dfs[parking_lot][parking_lot_dfs[parking_lot]['Timestamp'] == timestamp].index
                if not existing_row_index.empty:
                    parking_lot_dfs[parking_lot].loc[existing_row_index, 'Occupancy'] = occupancy
                else:
                    parking_lot_dfs[parking_lot] = parking_lot_dfs[parking_lot].append({'Timestamp': timestamp, 'Occupancy': occupancy}, ignore_index=True)
            else:
                parking_lot_dfs[parking_lot] = pd.DataFrame({'Timestamp': [timestamp], 'Occupancy': [occupancy]})
    
    # Sort and reset index if necessary
    if parking_lot in parking_lot_dfs and not parking_lot_dfs[parking_lot].empty:
        parking_lot_dfs[parking_lot] = parking_lot_dfs[parking_lot].sort_values(by='Timestamp').reset_index(drop=True)

# Print or display the resulting DataFrames
for parking_lot, df in parking_lot_dfs.items():
    print(f"Parking Lot: {parking_lot}")
    print(df)  # Print the first few rows for overview
    print("\n")


Parking Lot: North
              Timestamp  Occupancy
0   2024-06-06 13:00:00        353
1   2024-06-06 14:00:00        331
2   2024-06-06 15:00:00        238
3   2024-06-06 16:00:00        139
4   2024-06-06 17:00:00         74
..                  ...        ...
163 2024-06-13 08:00:00        235
164 2024-06-13 09:00:00        293
165 2024-06-13 10:00:00        342
166 2024-06-13 11:00:00        345
167 2024-06-13 12:00:00        363

[168 rows x 2 columns]


Parking Lot: West
              Timestamp  Occupancy
0   2024-06-06 13:00:00        473
1   2024-06-06 14:00:00        459
2   2024-06-06 15:00:00        293
3   2024-06-06 16:00:00        188
4   2024-06-06 17:00:00        136
..                  ...        ...
163 2024-06-13 08:00:00        290
164 2024-06-13 09:00:00        380
165 2024-06-13 10:00:00        417
166 2024-06-13 11:00:00        441
167 2024-06-13 12:00:00        444

[168 rows x 2 columns]


Parking Lot: Rose
              Timestamp  Occupancy
0   2024-06-06 13:

In [29]:
import pandas as pd
import glob

base_dir_days = 'LGBM_days/'      # Directory for daily predictions
base_dir_hours = 'LGBM_1_hour/'   # Directory for hourly predictions
base_dir_longterm = 'LGBM_longterm/'  # Directory for long-term predictions
parking_lots = ['North', 'West', 'Rose', 'Health Sciences', 'Fraser','Thunderbird','University Lot Blvd']  # Adjust with your actual parking lot names

# Append long-term predictions after 7 days
for parking_lot in parking_lots:
    filename = f'LGBM_{parking_lot}_longterm_future_predictions.csv'
    file_path = base_dir_longterm + filename
    
    if glob.glob(file_path):  # Check if the file exists
        df = pd.read_csv(file_path)
        df['Timestamp'] = pd.to_datetime(df['Timestamp'])  # Convert Timestamp to datetime if not already
        
        # Filter long-term predictions that are 7 days or more into the future
        longterm_df = df[df['Timestamp'] >= pd.Timestamp.now() + pd.Timedelta(days=7)]
        
        # Append long-term predictions to parking_lot_dfs[parking_lot]
        if parking_lot in parking_lot_dfs and not parking_lot_dfs[parking_lot].empty:
            parking_lot_dfs[parking_lot] = pd.concat([parking_lot_dfs[parking_lot], longterm_df[['Timestamp', 'Occupancy']]], ignore_index=True)
        else:
            parking_lot_dfs[parking_lot] = longterm_df[['Timestamp', 'Occupancy']].copy()

# Sort and reset index after appending long-term predictions
for parking_lot in parking_lots:
    if parking_lot in parking_lot_dfs and not parking_lot_dfs[parking_lot].empty:
        parking_lot_dfs[parking_lot] = parking_lot_dfs[parking_lot].sort_values(by='Timestamp').reset_index(drop=True)

# Print or display the resulting DataFrames
for parking_lot, df in parking_lot_dfs.items():
    print(f"Parking Lot: {parking_lot}")
    print(df)  # Print the first few rows for overview
    print("\n")


Parking Lot: North
                Timestamp  Occupancy
0     2024-06-06 13:00:00        353
1     2024-06-06 14:00:00        331
2     2024-06-06 15:00:00        238
3     2024-06-06 16:00:00        139
4     2024-06-06 17:00:00         74
...                   ...        ...
17011 2025-06-06 10:00:00        216
17012 2025-06-06 11:00:00        244
17013 2025-06-06 11:00:00        244
17014 2025-06-06 12:00:00        244
17015 2025-06-06 12:00:00        244

[17016 rows x 2 columns]


Parking Lot: West
                Timestamp  Occupancy
0     2024-06-06 13:00:00        473
1     2024-06-06 14:00:00        459
2     2024-06-06 15:00:00        293
3     2024-06-06 16:00:00        188
4     2024-06-06 17:00:00        136
...                   ...        ...
17011 2025-06-06 10:00:00        293
17012 2025-06-06 11:00:00        332
17013 2025-06-06 11:00:00        332
17014 2025-06-06 12:00:00        340
17015 2025-06-06 12:00:00        340

[17016 rows x 2 columns]


Parking Lot: Rose
 

In [32]:
import os
output_folder = 'LGBM_combined_predictions/'  # Folder to save combined predictions
# Export each parking lot's predictions to individual CSV files
os.makedirs(output_folder, exist_ok=True)
for parking_lot, df in parking_lot_dfs.items():
    output_filename = os.path.join(output_folder, f'{parking_lot}_predictions.csv')
    df.to_csv(output_filename, index=False)

In [35]:
from sqlalchemy import create_engine

# Replace with your database connection details
username = 'admin'
password = 'UBCParking2024'
host = 'testdb.cdq6s8s6klpd.ca-central-1.rds.amazonaws.com'  # Usually 'localhost' or IP address
database_name = 'Parking'

# Create SQLAlchemy engine
engine = create_engine(f'mysql+pymysql://{username}:{password}@{host}/{database_name}')

from sqlalchemy import Table, Column, Integer, String, DateTime
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker

# Create a base class for declarative class definitions
Base = declarative_base()

# Define a class for each parking lot's predictions
class ParkingLotPredictions(Base):
    __tablename__ = 'parking_lot_predictions'  # Table name will be dynamically created
    id = Column(Integer, primary_key=True, autoincrement=True)
    Timestamp = Column(DateTime)
    Occupancy = Column(Integer)

# Create all tables in the database (if they don't exist)
Base.metadata.create_all(engine)

# Create a session to interact with the database
Session = sessionmaker(bind=engine)
session = Session()

# Iterate over parking_lot_dfs and insert data into corresponding tables
for parking_lot, df in parking_lot_dfs.items():
    table_name = f'dbo.{parking_lot}_predictions'  # Table name format: dbo.{parking_lot}_predictions

    # Create table if not exists
    if not engine.dialect.has_table(engine, table_name):
        metadata = Base.metadata
        table = Table(table_name, metadata,
                      Column('id', Integer, primary_key=True, autoincrement=True),
                      Column('Timestamp', DateTime),
                      Column('Occupancy', Integer),
                      schema='dbo')
        table.create(engine)

    # Insert data into the table
    df.to_sql(name=table_name, con=engine, if_exists='replace', index=False)

# Commit changes and close the session
session.commit()
session.close()


  Base = declarative_base()


OperationalError: (pymysql.err.OperationalError) (2003, "Can't connect to MySQL server on 'testdb.cdq6s8s6klpd.ca-central-1.rds.amazonaws.com' (timed out)")
(Background on this error at: https://sqlalche.me/e/20/e3q8)