In [None]:
import pandas as pd
import os
from dotenv import load_dotenv
from google.cloud.sql.connector import Connector, IPTypes
import pg8000
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Loading environment variables
load_dotenv()

instance_connection_name = os.environ["INSTANCE_CONNECTION_NAME"]
db_user = os.environ["DB_USER"]
db_pass = os.environ["DB_PASS"]
db_name = os.environ["DB_NAME"]

ip_type = IPTypes.PRIVATE if os.environ.get("PRIVATE_IP") else IPTypes.PUBLIC

print(f"Connection to: {instance_connection_name}")
print(f"Database: {db_name}")
print(f"User: {db_user}")

In [None]:
# Creating a connection
connector = Connector(refresh_strategy="LAZY")

def get_conn() -> pg8000.dbapi.Connection:
    conn: pg8000.dbapi.Connection = connector.connect(
        instance_connection_name,
        "pg8000",
        user=db_user,
        password=db_pass,
        db=db_name,
        ip_type=ip_type,
    )
    return conn

conn = get_conn()
print("Connection established successfully!")

In [None]:
# Download all data from the table
query = """
    SELECT 
        id,
        vehicle_id,
        area,
        x,
        y,
        width,
        heigth,
        date_time,
        frame_id
    FROM traffic_data
    ORDER BY date_time DESC
"""

df = pd.read_sql(query, conn)
print(f"Loaded {len(df)} records")
df.head()

In [None]:
df['year'] = df['date_time'].dt.year
df['month'] = df['date_time'].dt.month
df['day'] = df['date_time'].dt.day
df['hour'] = df['date_time'].dt.hour
df['unique_vehicle_id'] = round(df['vehicle_id'] + df['year']/10000 + df['month']/1000000 + df['day']/100000000 + df['hour']/10000000000, 10)
df.head()

In [None]:
df['size'] = df['width'] * df['heigth']
df.head()

In [None]:
min_max_df = df.groupby('unique_vehicle_id')[['size', 'y']].agg(['max', 'min']).sort_index()

In [None]:
min_max_df['way_size'] = min_max_df['y']['max'] - min_max_df['y']['min']
min_max_df.columns = ['size_max', 'size_min', 'y_max', 'y_min', 'way_size']
min_max_df.head()

In [None]:
min_max_df['full_way'] = min_max_df['way_size'] > 240

In [None]:
min_max_df['day'] = min_max_df.index.map(lambda x: True if 6 <= int((x* 10000000000)%100) < 18 else False)

In [None]:
min_max_df

In [None]:
# Close the connection to the database
# conn.close()
# connector.close()
# print("âœ“ Connection closed")