# IMT.fi Funnel Analysis 2019-2024

Olli Salli, 2025

In [None]:
# Standard data packages
import pandas as pd
import numpy as np

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# For connecting to your warehouse (adjust as needed)
from sqlalchemy import create_engine, text

# Display options
pd.set_option("display.max_columns", None)
sns.set_theme(style="whitegrid")


In [None]:
import redshift_connector
import pandas as pd
import yaml
from pathlib import Path

# Avoid error from stock psycopg2 trying to set "standad_conforming_strings" setting
from sqlalchemy.dialects import registry
from sqlalchemy.dialects.postgresql.psycopg2 import PGDialect_psycopg2
class RedshiftPostgresDialect(PGDialect_psycopg2):
    def _set_backslash_escapes(self, connection):
        self._backslash_escapes = "off"
registry.register("redshift_custom", __name__, "RedshiftPostgresDialect")

# Load from dbt profiles.yml
with open(Path.home() / ".dbt" / "profiles.yml") as f:
    profiles = yaml.safe_load(f)

p = profiles["imt_aws_redshift"]["outputs"]["dev"]

engine = create_engine(
    f'redshift_custom://{p["user"]}:{p["password"]}@{p["host"]}:5439/{p["dbname"]}',
    connect_args={
        "sslmode": "require"
    }
)

with engine.connect() as conn:
    print("✅ Connected to Redshift")

def run_query(sql: str, params=None):
    from sqlalchemy import text
    with engine.connect() as conn:
        return pd.read_sql(text(sql), conn, params=params or {})


## Overall reservation completion comparison

In [None]:
query = """
SELECT
        departureyear,
        COUNT(*) AS total_reservations,
        SUM(CASE WHEN finalstage = 'Confirmed' THEN 1 ELSE 0 END) AS confirmed
    FROM dbt_osalli.fct_reservation_funnel
    WHERE departuredate BETWEEN :start AND :end
    GROUP BY departureyear
    ORDER BY departureyear
"""

df = run_query(query, params={"start": "2019-01-01", "end": "2024-12-31"})

# Visualization
fig, ax1 = plt.subplots(figsize=(12, 6))

x = np.arange(len(df['departureyear']))
width = 0.35

# Left y-axis for total_reservations
ax1.set_xlabel('Departure Year')
ax1.set_ylabel('Total Reservations', color='tab:blue')
bars1 = ax1.bar(x - width/2, df['total_reservations'], width, label='Total Reservations', color='tab:blue', alpha=0.7)
ax1.tick_params(axis='y', labelcolor='tab:blue')
ax1.set_xticks(x)
ax1.set_xticklabels(df['departureyear'])

# Set left y-axis range and ticks
ax1.set_ylim(0, 1_100_000)
ax1.set_yticks(np.arange(0, 1_000_001, 200_000))

# Format left y-axis with humanized numbers
from matplotlib.ticker import FuncFormatter
def human_format(num, pos):
    if num >= 1_000_000:
        return f'{num/1_000_000:.1f}M'
    elif num >= 1_000:
        return f'{num/1_000:.0f}k'
    else:
        return f'{num:.0f}'
ax1.yaxis.set_major_formatter(FuncFormatter(human_format))

# Right y-axis for confirmed
ax2 = ax1.twinx()
ax2.set_ylabel('Confirmed', color='tab:green')
bars2 = ax2.bar(x + width/2, df['confirmed'], width, label='Confirmed', color='tab:green', alpha=0.7)
ax2.tick_params(axis='y', labelcolor='tab:green')

# Set right y-axis range and ticks
ax2.set_ylim(0, 55_000)
ax2.set_yticks(np.arange(0, 50_001, 10_000))

# Format right y-axis with humanized numbers
ax2.yaxis.set_major_formatter(FuncFormatter(human_format))

# Title and legend
plt.title('Reservations by Departure Year: Total vs Confirmed')
fig.legend(loc='upper right', bbox_to_anchor=(0.9, 0.9))
fig.tight_layout()
plt.show()