In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sqlalchemy import create_engine
from PIL import Image
import os

In [None]:
# Database connection
DATABASE_URL = "sqlite:///project.db"
engine = create_engine(DATABASE_URL)

`get_data_from_db` function retrieves fare data from two sources:<br>
**Taxi trips**: It queries the taxi_trips table and computes the sum of fare_amount, total_surcharge, mta_tax, and tolls_amount for each month.<br>
**Uber trips**: Similarly, it queries the uber_trips table and computes the sum of base_passenger_fare, total_surcharge, sales_tax, and tolls for each month.<br>
Returns:<br>
A pd.DataFrame object containing the merged fare data from both taxi and Uber trips. It includes the total fare, base fare, surcharges, taxes, and tolls for each month.

In [None]:
def get_data_from_db() -> pd.DataFrame:
    """
    Query the database to retrieve total fare data for taxi and Uber trips.

    This function runs SQL queries to fetch total fare information for both taxi and Uber trips between January 2020 and August 2024. 
    It calculates the total fare, base fare, surcharges, taxes, and tolls for each month.

    Returns:
        pd.DataFrame: A merged DataFrame containing the monthly total fare data for both taxis and Ubers.
                      The DataFrame contains columns: 'month', 'total_fare_taxi', 'base_fare_taxi', 'surcharges_taxi', 
                      'taxes_taxi', 'tolls_taxi', 'total_fare_uber', 'base_fare_uber', 'surcharges_uber', 
                      'taxes_uber', 'tolls_uber'.
    """
    # Taxi data query
    taxi_query = """
    SELECT strftime('%Y-%m', tpep_pickup_datetime) AS month,
           SUM(fare_amount + total_surcharge + mta_tax + tolls_amount) AS total_fare,
           SUM(fare_amount) AS base_fare,
           SUM(total_surcharge) AS surcharges,
           SUM(mta_tax) AS taxes,
           SUM(tolls_amount) AS tolls
    FROM taxi_trips
    WHERE tpep_pickup_datetime BETWEEN '2020-01-01' AND '2024-08-31'
    GROUP BY month;
    """
    
    # Uber data query
    uber_query = """
    SELECT strftime('%Y-%m', pickup_datetime) AS month,
           SUM(base_passenger_fare + total_surcharge + sales_tax + tolls) AS total_fare,
           SUM(base_passenger_fare) AS base_fare,
           SUM(total_surcharge) AS surcharges,
           SUM(sales_tax) AS taxes,
           SUM(tolls) AS tolls
    FROM uber_trips
    WHERE pickup_datetime BETWEEN '2020-01-01' AND '2024-08-31'
    GROUP BY month;
    """
    
    # Load data into DataFrames
    taxi_data = pd.read_sql(taxi_query, engine)
    uber_data = pd.read_sql(uber_query, engine)

    # Merge the data on 'month' with outer join to keep all months from both datasets
    merged_data = pd.merge(taxi_data, uber_data, on='month', suffixes=('_taxi', '_uber'), how='outer').fillna(0)

    return merged_data

`plot_stacked_animation_with_pillow`function creates a stacked bar chart animation that shows the breakdown of fares (base fare, surcharges, taxes, and tolls) for both taxi and Uber trips over time. <br>
It generates individual frames for the animation and saves them as a GIF using the Pillow library.<br>
In detail,<br>
It iterates over the four components (base fare, surcharges, taxes, and tolls) and adds each to the stacked bar chart progressively in each frame.<br>
After generating all the frames, it uses Pillow to compile these frames into an animated GIF, which is saved to disk.<br>
Note:The frames are saved as individual PNG images in a temporary directory. Once the GIF is created, the individual frame images are deleted.<br>
<br>
Arguments:<br>
merged_data (pd.DataFrame): This DataFrame contains the fare data for both taxis and Ubers, including columns for base_fare_taxi, surcharges_taxi, taxes_taxi, tolls_taxi, base_fare_uber, surcharges_uber, taxes_uber, tolls_uber, and month.<br>
Returns:<br>
None. This function saves the animated GIF as total_fares_animation.gif in the current directory.<br>
<br>
About GIF Settings:<br>
The GIF is saved with a duration of 1000 ms (1 second) per frame.<br>
The animation loops indefinitely (loop=0).<br>

In [None]:
def plot_stacked_animation_with_pillow(merged_data: pd.DataFrame) -> None:
    """
    Create an animated stacked bar chart for total fares from taxis and Ubers, showing the stacking of base fare, surcharges, taxes, and tolls over time.
    Save the animation as a GIF using Pillow.

    This function generates a series of stacked bar charts displaying how the base fare, surcharges, taxes, and tolls stack up over time. 
    The GIF animation visually shows how each component contributes to the total fare for both taxis and Ubers over the months from January 2020 to August 2024.

    Args:
        merged_data (pd.DataFrame): A DataFrame containing the fare data for taxis and Ubers, including columns for:
                                     'month', 'base_fare_taxi', 'surcharges_taxi', 'taxes_taxi', 'tolls_taxi', 
                                     'base_fare_uber', 'surcharges_uber', 'taxes_uber', 'tolls_uber'.
    
    Returns:
        None: The function saves a GIF file and does not return any value.
    """
    # Data preparation
    taxi_base_fare = merged_data['base_fare_taxi']
    taxi_surcharges = merged_data['surcharges_taxi']
    taxi_taxes = merged_data['taxes_taxi']
    taxi_tolls = merged_data['tolls_taxi']
    
    uber_base_fare = merged_data['base_fare_uber']
    uber_surcharges = merged_data['surcharges_uber']
    uber_taxes = merged_data['taxes_uber']
    uber_tolls = merged_data['tolls_uber']
    
    # Create directory for saving frames
    frame_dir = "frames"
    os.makedirs(frame_dir, exist_ok=True)

    width = 0.35
    x = np.arange(len(merged_data['month']))

    # Create an empty list to store the frames
    frames = []

    # Initialize the figure
    fig, ax = plt.subplots(figsize=(14, 7))
    ax.set_title('Monthly Total Fares Earned (Jan 2020 - Aug 2024) - Stacked Animation')
    ax.set_xlabel('Month')
    ax.set_ylabel('Total Fares ($)')
    ax.set_xticks(x)
    ax.set_xticklabels(merged_data['month'], rotation=45)
    
    # Create a function to plot each frame
    for frame in range(4):
        ax.clear()  # Clear the axis to replot

        # Re-plot base bar
        ax.bar(x - width/2, taxi_base_fare, width, label='Taxi Base Fare', color='orange')
        ax.bar(x + width/2, uber_base_fare, width, label='Uber Base Fare', color='lightblue')

        # Add surcharges for taxi and Uber to the base bars
        if frame >= 1:
            ax.bar(x - width/2, taxi_surcharges, width, label='Taxi Surcharges', bottom=taxi_base_fare, color='lightcoral')
            ax.bar(x + width/2, uber_surcharges, width, label='Uber Surcharges', bottom=uber_base_fare, color='skyblue')
            taxi_base_fare += taxi_surcharges
            uber_base_fare += uber_surcharges

        # Add taxes for taxi and Uber to the stack
        if frame >= 2:
            ax.bar(x - width/2, taxi_taxes, width, label='Taxi Taxes', bottom=taxi_base_fare, color='red')
            ax.bar(x + width/2, uber_taxes, width, label='Uber Taxes', bottom=uber_base_fare, color='deepskyblue')
            taxi_base_fare += taxi_taxes
            uber_base_fare += uber_taxes

        # Add tolls for taxi and Uber
        if frame >= 3:
            ax.bar(x - width/2, taxi_tolls, width, label='Taxi Tolls', bottom=taxi_base_fare, color='darkred')
            ax.bar(x + width/2, uber_tolls, width, label='Uber Tolls', bottom=uber_base_fare, color='dodgerblue')

        ax.legend()

        # Save the current frame to the frames list
        plt.tight_layout()
        frame_filename = os.path.join(frame_dir, f"frame_{frame}.png")
        plt.savefig(frame_filename)
        frames.append(frame_filename)

    # Use Pillow to create a GIF
    images = [Image.open(frame) for frame in frames]
    gif_filename = "total_fares_animation.gif"
    images[0].save(gif_filename, save_all=True, append_images=images[1:], duration=1000, loop=0)

    # Clean up the frames directory after creating the GIF
    for frame in frames:
        os.remove(frame)

    # Display the generated GIF path
    print(f"GIF saved as {gif_filename}")