# IMT.fi Funnel Analysis 2019-2024

Olli Salli, 2025

In [None]:
# Standard data packages
import pandas as pd
import numpy as np

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# For connecting to your warehouse (adjust as needed)
from sqlalchemy import create_engine, text

# Display options
pd.set_option("display.max_columns", None)
sns.set_theme(style="whitegrid")


In [None]:
import redshift_connector
import pandas as pd
import yaml
from pathlib import Path

# Avoid error from stock psycopg2 trying to set "standad_conforming_strings" setting
from sqlalchemy.dialects import registry
from sqlalchemy.dialects.postgresql.psycopg2 import PGDialect_psycopg2
class RedshiftPostgresDialect(PGDialect_psycopg2):
    def _set_backslash_escapes(self, connection):
        self._backslash_escapes = "off"
registry.register("redshift_custom", __name__, "RedshiftPostgresDialect")

# Load from dbt profiles.yml
with open(Path.home() / ".dbt" / "profiles.yml") as f:
    profiles = yaml.safe_load(f)

p = profiles["imt_aws_redshift"]["outputs"]["dev"]

engine = create_engine(
    f'redshift_custom://{p["user"]}:{p["password"]}@{p["host"]}:5439/{p["dbname"]}',
    connect_args={
        "sslmode": "require"
    }
)

with engine.connect() as conn:
    print("✅ Connected to Redshift")

def run_query(sql: str, params=None):
    from sqlalchemy import text
    with engine.connect() as conn:
        return pd.read_sql(text(sql), conn, params=params or {})


## Overall reservation completion comparison

In [None]:
query = """
SELECT
        departureyear,
        COUNT(*) AS total_reservations,
        SUM(CASE WHEN finalstage = 'Confirmed' THEN 1 ELSE 0 END) AS confirmed
    FROM dbt_osalli.fct_reservation_funnel
    WHERE departuredate BETWEEN :start AND :end
    GROUP BY departureyear
    ORDER BY departureyear
"""

df = run_query(query, params={"start": "2019-01-01", "end": "2024-12-31"})
df