In [1]:
import matplotlib.pyplot as plt
import os
import pandas as pd
import sqlalchemy
import utils

In [2]:
# Read the environment variables
%load_ext dotenv
%dotenv ../.env

In [3]:
# Pandas settings
pd.set_option("display.max_colwidth", None)

In [4]:
# Connect to the database
host = os.environ["LOCAL_HOST"]
port = os.environ["LOCAL_PORT"]
dbname = os.environ["POSTGRES_DB"]
dbuser = os.environ["POSTGRES_USER"]
dbpassword = os.environ["POSTGRES_PASSWORD"]

conn_string = f"postgresql://{dbuser}:{dbpassword}@{host}:{port}/{dbname}"
engine = sqlalchemy.create_engine(conn_string)

In [5]:
sql = """
SELECT * 
  FROM information_schema.tables
 WHERE table_schema = 'marts'
"""
result = pd.read_sql(sql, engine)
result

Unnamed: 0,table_catalog,table_schema,table_name,table_type,self_referencing_column_name,reference_generation,user_defined_type_catalog,user_defined_type_schema,user_defined_type_name,is_insertable_into,is_typed,commit_action
0,db2,marts,interactions,BASE TABLE,,,,,,YES,NO,
1,db2,marts,account_interactions_encoded,BASE TABLE,,,,,,YES,NO,
2,db2,marts,product_holdings,BASE TABLE,,,,,,YES,NO,
3,db2,marts,account_states,BASE TABLE,,,,,,YES,NO,
4,db2,marts,account_churn,BASE TABLE,,,,,,YES,NO,
5,db2,marts,account_product_holdings,BASE TABLE,,,,,,YES,NO,
6,db2,marts,account_info,BASE TABLE,,,,,,YES,NO,
7,db2,marts,dim_dates,BASE TABLE,,,,,,YES,NO,
8,db2,marts,account_interactions,BASE TABLE,,,,,,YES,NO,


In [6]:
table_schema = "marts"
table_name = "account_churn"

output = utils.table_summary(table_schema, table_name, engine)
output

Unnamed: 0,column_name,data_type,total_count,unique_count,is_unique,min_value,max_value,true_count,false_count,null_count,empty_count,unique_values
0,account_id,character varying,8931,8931,True,1000,9999,,,0,0,
1,language,text,8931,4,False,DE,IT,,,1522,0,"[DE, EN, FR, IT, None]"
2,gender,text,8931,3,False,D,M,,,0,0,"[D, F, M]"
3,birthday,date,8931,6115,False,1903-09-22,2020-01-30,,,0,0,
4,zip_code,character varying,8931,9,False,1006,9102,,,0,0,"[1006, 2006, 3005, 4003, 5005, 6006, 7001, 8006, 9102]"
5,payment_method,text,8931,3,False,Kreditkarte,Rechnung,,,0,0,"[Kreditkarte, LSV, Rechnung]"
6,age,integer,8931,90,False,4,120,,,0,0,
7,first_purchase_date,date,8931,951,False,2021-01-01,2023-09-26,,,61,0,
8,last_terminate_date,date,8931,572,False,2021-03-18,9999-12-31,,,61,0,
9,tenure,integer,8931,1058,False,10,1151,,,61,0,


In [7]:
table_schema = "marts"
table_name = "account_interactions"

output = utils.table_summary(table_schema, table_name, engine)
output

Unnamed: 0,column_name,data_type,total_count,unique_count,is_unique,min_value,max_value,true_count,false_count,null_count,empty_count,unique_values
0,account_id,character varying,8931,8931,True,1000,9999,,,0,0,
1,language,text,8931,4,False,DE,IT,,,1522,0,"[DE, EN, FR, IT, None]"
2,gender,text,8931,3,False,D,M,,,0,0,"[D, F, M]"
3,birthday,date,8931,6115,False,1903-09-22,2020-01-30,,,0,0,
4,zip_code,character varying,8931,9,False,1006,9102,,,0,0,"[1006, 2006, 3005, 4003, 5005, 6006, 7001, 8006, 9102]"
5,payment_method,text,8931,3,False,Kreditkarte,Rechnung,,,0,0,"[Kreditkarte, LSV, Rechnung]"
6,age,integer,8931,90,False,4,120,,,0,0,
7,first_purchase_date,date,8931,951,False,2021-01-01,2023-09-26,,,61,0,
8,last_terminate_date,date,8931,572,False,2021-03-18,9999-12-31,,,61,0,
9,tenure,integer,8931,1058,False,10,1151,,,61,0,


In [8]:
table_schema = "marts"
table_name = "account_interactions_encoded"

output = utils.table_summary(table_schema, table_name, engine)
output

Unnamed: 0,column_name,data_type,total_count,unique_count,is_unique,min_value,max_value,true_count,false_count,null_count,empty_count,unique_values
0,churned_date,date,8931,571,False,2021-03-18,2023-07-31,,,6979,0,
1,is_churned,integer,8931,2,False,0,1,,,0,0,"[0, 1]"
2,language_de,integer,8931,2,False,0,1,,,0,0,"[0, 1]"
3,language_en,integer,8931,2,False,0,1,,,0,0,"[0, 1]"
4,language_fr,integer,8931,2,False,0,1,,,0,0,"[0, 1]"
5,language_it,integer,8931,2,False,0,1,,,0,0,"[0, 1]"
6,gender_d,integer,8931,2,False,0,1,,,0,0,"[0, 1]"
7,gender_f,integer,8931,2,False,0,1,,,0,0,"[0, 1]"
8,gender_m,integer,8931,2,False,0,1,,,0,0,"[0, 1]"
9,age_0_25,integer,8931,2,False,0,1,,,0,0,"[0, 1]"


In [9]:
sql = """
SELECT *
  FROM marts.account_interactions_encoded
 """
result = pd.read_sql(sql, engine)
result

Unnamed: 0,churned_date,is_churned,language_de,language_en,language_fr,language_it,gender_d,gender_f,gender_m,age_0_25,...,time_in_queue_20_40,time_in_queue_40,handling_time_s_0_40,handling_time_s_40_80,handling_time_s_80,customer_satisfaction_after_call_1,customer_satisfaction_after_call_2,customer_satisfaction_after_call_3,customer_satisfaction_after_call_4,customer_satisfaction_after_call_5
0,,0,0,1,0,0,0,1,0,1,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
1,2023-04-11,1,0,0,0,1,0,1,0,0,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
2,,0,1,0,0,0,0,1,0,0,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
3,,0,0,0,0,0,0,1,0,0,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
4,,0,1,0,0,0,0,0,1,0,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8926,,0,0,0,0,0,0,0,1,0,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
8927,,0,0,0,0,0,0,0,1,0,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
8928,2022-11-16,1,0,1,0,0,0,1,0,0,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
8929,,0,0,1,0,0,1,0,0,0,...,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1


In [10]:
corr_matrix = result.corr(numeric_only=True)
corr_matrix["is_churned"].sort_values(ascending=False)

is_churned                            1.000000
has_call                              0.398603
handling_time_s_0_40                  0.379933
handling_time_s_80                    0.377810
time_in_queue_40                      0.376716
call_reason_churn                     0.376651
time_in_queue_20_40                   0.376045
time_in_queue_0_20                    0.374886
handling_time_s_40_80                 0.374882
call_reason_product                   0.372959
call_reason_billing                   0.369655
call_reason_technical                 0.369258
customer_satisfaction_after_call_1    0.306905
customer_satisfaction_after_call_2    0.303517
customer_satisfaction_after_call_3    0.303115
customer_satisfaction_after_call_4    0.300140
customer_satisfaction_after_call_5    0.299559
tenure_100_200                        0.180675
tenure_200_300                        0.148737
tenure_0_100                          0.131519
tenure_300_400                        0.118365
age_25_50    

In [11]:
pd.crosstab(result["is_churned"], result["has_call"])

has_call,0,1
is_churned,Unnamed: 1_level_1,Unnamed: 2_level_1
0,6979,0
1,1572,380


In [12]:
pd.crosstab(result["is_churned"], result["handling_time_s_0_40"])

handling_time_s_0_40,-1,0,1
is_churned,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,6979,0,0
1,1572,323,57


In [13]:
pd.crosstab(result["has_call"], result["handling_time_s_0_40"])

handling_time_s_0_40,-1,0,1
has_call,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,8551,0,0
1,0,323,57


In [14]:
pd.crosstab(result["is_churned"], result["call_reason_churn"])

call_reason_churn,-1,0,1
is_churned,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,6979,0,0
1,1583,326,43
