## Load Packages

In [50]:
# Load packages
import os
import snowflake.connector
from snowflake.connector.pandas_tools import write_pandas
import pandas as pd
import numpy as np
from sklearn.preprocessing import RobustScaler
import matplotlib.pyplot as plt
from sklearn_extra.cluster import KMedoids
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs
import pickle
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.decomposition import LatentDirichletAllocation

## Define Function for Running SQL Files

In [51]:
# Get SQL query file as data input 
def get_sql_file_as_text(file_path):
  
  with open(file_path, 'r') as f:
    sql_content = f.read()
  return sql_content

## Import Table from XLSX and Add Into Snowflake as TABLE

In [52]:
# Get contact owners sheet as table
# Read contact_owners.xlsx into pandas table
df = pd.read_excel('contact_owners.xlsx')
df.head()

Unnamed: 0,Record ID,CardUp Contact Owner Current Value,CardUp Contact Owner Change Date (Asia/Singapore),CardUp Contact Owner Previous Value (1),CardUp Contact Owner Change Date (1) (Asia/Singapore),CardUp Contact Owner Previous Value (2),CardUp Contact Owner Change Date (2) (Asia/Singapore),...,Unnamed: 8,Unnamed: 9,Unnamed: 10
0,56560804227,,NaT,,NaT,,NaT,,NaT,,NaT
1,56559992854,,NaT,,NaT,,NaT,,NaT,,NaT
2,56558354989,,NaT,,NaT,,NaT,,NaT,,NaT
3,56559482221,,NaT,,NaT,,NaT,,NaT,,NaT
4,56490172812,,NaT,,NaT,,NaT,,NaT,,NaT


In [53]:
# Rename Columns

df = df.rename(columns = {
    'Record ID': 'RECORD_ID',
    'CardUp Contact Owner Current Value': 'CURRENT_CONTACT_OWNER',
    'CardUp Contact Owner Change Date (Asia/Singapore)': 'CURRENT_CHANGE_DATE',
    'CardUp Contact Owner Previous Value (1)': 'PREV_CONTACT_OWNER_1',
    'CardUp Contact Owner Change Date (1) (Asia/Singapore)': 'PREV_CHANGE_DATE_1',
    'CardUp Contact Owner Previous Value (2)': 'PREV_CONTACT_OWNER_2',
    'CardUp Contact Owner Change Date (2) (Asia/Singapore)': 'PREV_CHANGE_DATE_2',
    '...': 'PREV_CONTACT_OWNER_3',
    'Unnamed: 8': 'PREV_CHANGE_DATE_3',
    'Unnamed: 9': 'PREV_CONTACT_OWNER_4',
    'Unnamed: 10': 'PREV_CHANGE_DATE_4',
})

df.head()

Unnamed: 0,RECORD_ID,CURRENT_CONTACT_OWNER,CURRENT_CHANGE_DATE,PREV_CONTACT_OWNER_1,PREV_CHANGE_DATE_1,PREV_CONTACT_OWNER_2,PREV_CHANGE_DATE_2,PREV_CONTACT_OWNER_3,PREV_CHANGE_DATE_3,PREV_CONTACT_OWNER_4,PREV_CHANGE_DATE_4
0,56560804227,,NaT,,NaT,,NaT,,NaT,,NaT
1,56559992854,,NaT,,NaT,,NaT,,NaT,,NaT
2,56558354989,,NaT,,NaT,,NaT,,NaT,,NaT
3,56559482221,,NaT,,NaT,,NaT,,NaT,,NaT
4,56490172812,,NaT,,NaT,,NaT,,NaT,,NaT


In [54]:
# Remove Rows with Missing Values in 'CURRENT_CONTACT_OWNER' Column
df.dropna(subset=['CURRENT_CONTACT_OWNER'], inplace=True)
len(df)

17351

In [55]:
df.head()

Unnamed: 0,RECORD_ID,CURRENT_CONTACT_OWNER,CURRENT_CHANGE_DATE,PREV_CONTACT_OWNER_1,PREV_CHANGE_DATE_1,PREV_CONTACT_OWNER_2,PREV_CHANGE_DATE_2,PREV_CONTACT_OWNER_3,PREV_CHANGE_DATE_3,PREV_CONTACT_OWNER_4,PREV_CHANGE_DATE_4
5,56498899716,Juliani Sarah (Deactivated User),2024-10-08 02:43:00,,NaT,,NaT,,NaT,,NaT
13,56577450520,Leia Ng,2024-09-25 15:59:00,,NaT,,NaT,,NaT,,NaT
20,56498377789,Juliani Sarah (Deactivated User),2024-10-08 02:43:00,,NaT,,NaT,,NaT,,NaT
24,56550115092,Terence Chua,2024-09-25 15:58:00,,NaT,,NaT,,NaT,,NaT
25,56527492928,Ashley Woo,2024-09-25 15:58:00,,NaT,,NaT,,NaT,,NaT


In [56]:
# Change date format for exporting to SQL
import datetime

df['CURRENT_CHANGE_DATE'] = df['CURRENT_CHANGE_DATE'].dt.strftime('%Y-%m-%d %H:%M:%S')
df['PREV_CHANGE_DATE_1'] = df['PREV_CHANGE_DATE_1'].dt.strftime('%Y-%m-%d %H:%M:%S')
df['PREV_CHANGE_DATE_2'] = df['PREV_CHANGE_DATE_2'].dt.strftime('%Y-%m-%d %H:%M:%S')
df['PREV_CHANGE_DATE_3'] = df['PREV_CHANGE_DATE_3'].dt.strftime('%Y-%m-%d %H:%M:%S')
df['PREV_CHANGE_DATE_4'] = df['PREV_CHANGE_DATE_4'].dt.strftime('%Y-%m-%d %H:%M:%S')

df.head()

Unnamed: 0,RECORD_ID,CURRENT_CONTACT_OWNER,CURRENT_CHANGE_DATE,PREV_CONTACT_OWNER_1,PREV_CHANGE_DATE_1,PREV_CONTACT_OWNER_2,PREV_CHANGE_DATE_2,PREV_CONTACT_OWNER_3,PREV_CHANGE_DATE_3,PREV_CONTACT_OWNER_4,PREV_CHANGE_DATE_4
5,56498899716,Juliani Sarah (Deactivated User),2024-10-08 02:43:00,,,,,,,,
13,56577450520,Leia Ng,2024-09-25 15:59:00,,,,,,,,
20,56498377789,Juliani Sarah (Deactivated User),2024-10-08 02:43:00,,,,,,,,
24,56550115092,Terence Chua,2024-09-25 15:58:00,,,,,,,,
25,56527492928,Ashley Woo,2024-09-25 15:58:00,,,,,,,,


In [57]:
# Establish connection with Snowflake
conn = snowflake.connector.connect(connection_name = "fundingsociety.ap-southeast-1.privatelink")
cursor = conn.cursor()

In [58]:
# Export to SQL
cursor.execute("""
CREATE OR REPLACE TABLE DEV.SBOX_SHILTON.CONTACT_OWNERS_HISTORICAL
               (
    RECORD_ID INT,
    CURRENT_CONTACT_OWNER VARCHAR(255),
    CURRENT_CHANGE_DATE TIMESTAMP,
    PREV_CONTACT_OWNER_1 VARCHAR(255),
    PREV_CHANGE_DATE_1 TIMESTAMP,
    PREV_CONTACT_OWNER_2 VARCHAR(255),
    PREV_CHANGE_DATE_2 TIMESTAMP,
    PREV_CONTACT_OWNER_3 VARCHAR(255),
    PREV_CHANGE_DATE_3 TIMESTAMP,
    PREV_CONTACT_OWNER_4 VARCHAR(255),
    PREV_CHANGE_DATE_4 TIMESTAMP
);
""")

# Commit the changes
conn.commit()

write_pandas(
    conn,
    df,
    table_name='CONTACT_OWNERS_HISTORICAL',
    database='DEV',
    schema='SBOX_SHILTON',
    overwrite=True,
    auto_create_table=False)

  write_pandas(


(True,
 1,
 17351,
 [('wlijwgvfyg/file0.txt',
   'LOADED',
   17351,
   17351,
   1,
   0,
   None,
   None,
   None,
   None)])

## Process Table

### Unpivot Table

In [59]:
# Get SQL text file
sql_file = "01 Unpivot Contact Owners Table.sql"
sql_text = get_sql_file_as_text(sql_file)

# Execute Query
cursor.execute(sql_text)


<snowflake.connector.cursor.SnowflakeCursor at 0x30c7e2950>

In [60]:
# Get Sample Data

testdata = conn.cursor().execute('''
select * from DEV.SBOX_SHILTON.CONTACT_OWNERS_HISTORICAL_UNPIVOT                                
''').fetch_pandas_all()

testdata.head(10)

Unnamed: 0,RECORD_ID,CONTACT_OWNER_VALUE,CONTACT_OWNER_DATE,ORDER_OWNER
0,56580328842,Ashley Woo,2024-10-08 02:43:00,1
1,56558162015,Juliani Sarah (Deactivated User),2024-10-08 02:43:00,1
2,56558684437,Juliani Sarah (Deactivated User),2024-10-08 02:43:00,1
3,56560579446,Keegan Loh,2024-09-25 15:59:00,1
4,56558935981,Juliani Sarah (Deactivated User),2024-10-08 02:43:00,1
5,56500113251,Terence Chua,2024-09-25 15:58:00,1
6,75129203304,Keegan Loh,2024-11-05 15:24:00,1
7,56549505918,Ashley Woo,2024-09-25 15:58:00,1
8,56576758676,Juliani Sarah (Deactivated User),2024-10-08 02:43:00,1
9,56582364000,Keegan Loh,2024-09-25 16:24:00,1


## Categorize Inbound, Outbound, Self-Serve, and Legacy

Inbound: Contact goes to Ashley first, and then assigned to Salesperson <br>
Outbound: Contact goes to Salesperson first <br>
Self-serve: Contact goes to Ashley and not assigned to Salesperson <br>
Legacy: Everyone else (mostly those handled by legacy salespeople)

In [61]:
# Get SQL text file
sql_file = "02 Inbound Outbound.sql"
sql_text = get_sql_file_as_text(sql_file)

# Execute Query
cursor.execute(sql_text)


<snowflake.connector.cursor.SnowflakeCursor at 0x30c7e2950>

In [62]:
# Get Sample Data

testdata = conn.cursor().execute('''
select * from DEV.SBOX_SHILTON.CONTACT_OWNERS_HISTORICAL_UNPIVOT_INBOUND_OUTBOUND                                
''').fetch_pandas_all()

testdata.head(10)

Unnamed: 0,RECORD_ID,OUTBOUND_INBOUND_LEAD
0,56577450520,Outbound
1,56558345010,Legacy
2,56549521292,Outbound
3,56558596927,Legacy
4,56574533781,Legacy
5,56501959092,Outbound
6,56508203117,Legacy
7,56561404962,Outbound
8,56549521294,Outbound
9,56558323281,Legacy


## Create F30D Tx Post-onboarding Value Table

In [63]:
# Get SQL text file
sql_file = "03 F30D Tx Value After Onboarding.sql"
sql_text = get_sql_file_as_text(sql_file)

# Execute Query
cursor.execute(sql_text)


<snowflake.connector.cursor.SnowflakeCursor at 0x30c7e2950>

In [64]:
# Get Sample Data

testdata = conn.cursor().execute('''
select * from DEV.SBOX_SHILTON.CARDUP_B2B_SG_USERID_F30DMAKE_F30DCOLLECT_AMOUNT                                
''').fetch_pandas_all()

testdata.head(10)

Unnamed: 0,USER_ID,CARDUP_PAYMENT_PAYMENT_TYPE_MAKE,CARDUP_PAYMENT_PAYMENT_TYPE_COLLECT,FIRST_30D_PAYMENT_AMOUNT_MAKE,FIRST_30D_PAYMENT_AMOUNT_COLLECT
0,103507,Rent,,10074.31992,
1,227695,Supplier,,31709.119127,
2,226678,Supplier,,16626.4880222,
3,174772,Rent,,13650.84,
4,19459,Supplier,,45347.8098794,
5,78838,Insurance,,138.7607886,
6,175328,Insurance,,33649.055167,
7,77231,Supplier,,15318.0777596,
8,232452,Supplier,,23714.504681,
9,130416,Payroll,,41355.21978,


## Main Table for Analysis

In [65]:
# Get SQL text file
sql_file = "04 Main Table For Salespeople Perf Analysis.sql"
sql_text = get_sql_file_as_text(sql_file)

# Execute Query
cursor.execute(sql_text)

<snowflake.connector.cursor.SnowflakeCursor at 0x30c7e2950>

In [66]:
# Get Sample Data

testdata = conn.cursor().execute('''
select * from DEV.SBOX_SHILTON.CARDUP_B2B_SG_FUNNEL_WITH_HISTORICAL_TAGGING                                
''').fetch_pandas_all()

testdata.head(10)

Unnamed: 0,RECORD_ID,USER_ID,CONTACT_OWNER,CONTACT_OWNER_HISTORICAL,HS_CREATE_DATE,SIGN_UP_DATE,IIC_DATE,INDUSTRY,OUTBOUND_INBOUND_LEAD,MAKE_ONBOARDED_DATE,...,FIRST_PAYMENT_DATE_MAKE,FIRST_PAYMENT_DATE_COLLECT,FIRST_30D_PAYMENT_AMOUNT_MAKE,FIRST_30D_PAYMENT_AMOUNT_COLLECT,DAYS_LEAD_TO_SIGN_UP,DAYS_SIGN_UP_TO_ONBOARDED_MAKE,DAYS_ONBOARDED_MAKE_TO_FIRST_PAYMENT_MAKE,DAYS_IIC_TO_CSS,DAYS_CSS_TO_ONBOARDED_COLLECT,DAYS_ONBOARDED_COLLECT_TO_FIRST_PAYMENT_COLLECT
0,56511491257,26847,Ashley Woo,Keegan Loh,2024-09-10,NaT,,,,,...,,,,,,,,,,
1,75129208155,147826,Jonathan Ng,,2024-11-05,NaT,,,,,...,,,,,,,,,,
2,56557884463,72253,Keegan Loh,,2024-09-10,NaT,,,,,...,,2022-05-04 04:00:10,,,,,,,,
3,56549605484,45457,Ashley Woo,,2024-09-10,NaT,2019-02-20 00:00:00.000,,,,...,,2021-03-16 11:36:17,,,,,,,,
4,56579196983,114857,Yogesh Parthasarathy,,2024-09-10,NaT,,,,,...,,,,,,,,,,
5,56561389996,97187,Leia Ng,,2024-09-10,NaT,,,,,...,,2023-01-01 02:12:42,,,,,,,,
6,56559898948,88317,Juliani Sarah (Deactivated User),,2024-09-10,NaT,,,,,...,,,,,,,,,,
7,56582404368,133973,Ashley Woo,,2024-09-10,NaT,2024-07-15 00:00:00.000,,,,...,,2023-07-10 03:06:12,,,,,,,,
8,56559474542,85888,Ashley Woo,Xavier Tang,2024-09-10,NaT,,,,,...,,2022-10-08 03:30:20,,,,,,,,
9,56558139757,77270,Ashley Woo,,2024-09-10,NaT,,,,,...,,2022-06-27 00:30:15,,,,,,,,
