# IMT.fi Funnel Analysis 2019-2024

Olli Salli, 2025

Note! A typical notebook would have commentary and more informative headings; these are OMITTED HERE to avoid leaking business details to those without access to the source data. 

In [None]:
start_date = '2019-01-01'
end_date = '2024-12-31'

In [None]:
# Standard data packages
import pandas as pd

# For connecting to your warehouse
from sqlalchemy import create_engine
%load_ext sql

# Display options
pd.set_option('display.max_columns', None)

In [None]:
# Import plotting helpers
from funnel_plots import (
    plot_product_step_comparison,
    plot_completion_comparison,
    plot_abandonment_stages,
)

# Auto-reload modules when they change
%load_ext autoreload
%autoreload 2

In [None]:
import yaml
from pathlib import Path

# Avoid error from stock psycopg2 trying to set 'standard_conforming_strings' setting
from sqlalchemy.dialects import registry
from sqlalchemy.dialects.postgresql.psycopg2 import PGDialect_psycopg2
class RedshiftPostgresDialect(PGDialect_psycopg2):
    def _set_backslash_escapes(self, connection):
        self._backslash_escapes = 'off'
registry.register('redshift_custom', __name__, 'RedshiftPostgresDialect')

# Load from dbt profiles.yml
with open(Path.home() / '.dbt' / 'profiles.yml') as f:
    profiles = yaml.safe_load(f)

p = profiles['imt_aws_redshift']['outputs']['dev']
db_url = f"redshift_custom://{p['user']}:{p['password']}@{p['host']}:5439/{p['dbname']}"
engine = create_engine(db_url, connect_args={'sslmode': 'require'})

%sql engine
%config SqlMagic.autopandas = True
%config SqlMagic.named_parameters = 'enabled'

## Overall reservation completion comparison

In [None]:
%%sql --save res_by_year
SELECT
        departureyear,
        COUNT(*) AS total_reservations,
        SUM(CASE WHEN finalstage = 'Confirmed' THEN 1 ELSE 0 END) AS confirmed
    FROM dbt_osalli.fct_reservation_funnel
    WHERE departuredate BETWEEN :start_date AND :end_date
    GROUP BY departureyear
    ORDER BY departureyear;

In [None]:
res_by_year_df = %sql SELECT * from res_by_year

plot_completion_comparison(res_by_year_df)

## Abandonment stages 2019 vs 2024
### Whole pipeline

In [None]:
%%sql --save abandonment_stages
SELECT
    departureyear,
    finalstage,
    COUNT(*) AS count
FROM dbt_osalli.fct_reservation_funnel
WHERE departuredate BETWEEN :start_date AND :end_date
    AND finalstage != 'Confirmed'
    AND departureyear IN (2019, 2024)
GROUP BY departureyear, finalstage
ORDER BY departureyear, count DESC;

In [None]:
abandonment_df = %sql SELECT * from abandonment_stages

plot_abandonment_stages(abandonment_df)

### Within ProductSelection

In [None]:
%%sql --save product_selection_abandonment
SELECT
    departureyear,
    abandonproductsteptype,
    COUNT(*) AS count
FROM dbt_osalli.fct_reservation_funnel
WHERE departuredate BETWEEN :start_date AND :end_date
    AND finalstage = 'ProductSelection'
    AND departureyear IN (2019, 2024)
GROUP BY departureyear, abandonproductsteptype
HAVING abandonproductsteptype IS NOT NULL
ORDER BY departureyear, count DESC;

In [None]:
prod_select_df = %sql SELECT * from product_selection_abandonment

plot_product_step_comparison(prod_select_df, 'ProductSelection Abandonment by Step: 2019 vs 2024')

#### 

The above difference in the relative importance of ship-related and hotel-related steps could be explained by changes in trip distribution.