In [None]:

# QuickStart: Chatbot Conversion Analysis (Synthetic Data)
%pip install -q -r ../requirements.txt


In [None]:

import os
os.environ['SNOWFLAKE_ENABLED'] = '0'  # force synthetic data


In [None]:

from common.data_loader import load_dataframe
import pandas as pd
import numpy as np
from scipy.stats import pointbiserialr

df = load_dataframe().replace([np.inf, -np.inf], np.nan).dropna()
df['has_appt'] = df['HAS_APPT_SCHEDULED'].astype(int)
df['has_rfi'] = df['HAS_RFI_SUBMISSION'].astype(int)
df.head()


In [None]:

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

features = [
    'TOTAL_USER_MESSAGES','AVG_USER_WORDS_PER_MSG','MAX_USER_WORDS_IN_MSG',
    'USER_ENGAGEMENT_DURATION','MESSAGE_COUNT',
    'TOTAL_AGENT_MESSAGES','AVG_AGENT_WORDS_PER_MSG','MAX_AGENT_WORDS_IN_MSG'
]

X = df[features]
scaler = StandardScaler()
Xs = scaler.fit_transform(X)

Xa_tr, Xa_te, ya_tr, ya_te = train_test_split(Xs, df['has_appt'], test_size=0.2, random_state=42)
mdl = LogisticRegression(max_iter=1000, class_weight='balanced')
mdl.fit(Xa_tr, ya_tr)
print(classification_report(ya_te, mdl.predict(Xa_te)))


In [None]:

from scipy.stats import chi2_contingency
exp = df[df.get('EXPLICIT_APPT_REQUEST', 0) == 1]
ct = pd.crosstab(exp['SEQUENCE_PATTERN'], exp['HAS_APPT_SCHEDULED'])
chi2, p, dof, expct = chi2_contingency(ct)
ct, p


In [None]:

from IPython.display import Image, display

# Display pre-generated figures
for img in [
    '../figures/feature_importance_appt_vs_rfi.png',
    '../figures/distributions_grid.png',
    '../figures/pattern_rates_and_volume.png'
]:
    display(Image(filename=img))
