In [65]:
import gspread
import numpy as np
import pandas as pd
import janitor

gc = gspread.oauth()

sht1 = gc.open_by_key('1xy2bw5ckuUod-5In2iZZuWxuIxvMqzI-fF_jODddGVg')

worksheet = sht1.worksheet("[all time] Marketing MEL/MQL Report")

all_time_lead = worksheet.get("A2:AL")

df_all_time_lead = pd.DataFrame(all_time_lead[1:], columns=all_time_lead[0])

df_all_time_lead = df_all_time_lead.clean_names()

df_all_time_lead.replace('', np.nan, inplace=True)

In [66]:
import re
import pandas as pd

def clean_phone_number(phone):

    if pd.isna(phone):

        return 'NA'  

    return re.sub(r'[^\d]', '', str(phone))

df_all_time_lead["mobile_primary"] = df_all_time_lead["mobile_primary"].apply(clean_phone_number)
df_all_time_lead["business_phone"] = df_all_time_lead["business_phone"].apply(clean_phone_number)

In [67]:
sht2 = gc.open_by_key('1-WkE7rGsIxYMLjVCZNEKYYliqXau71wNwwCXBjq_b0c')

sms_sam_list_sheet = sht2.worksheet("SMS List")

sms_sam_list_gsheet = sms_sam_list_sheet.get("A1:AH")

sms_sam_list = pd.DataFrame(sms_sam_list_gsheet[1:], columns=sms_sam_list_gsheet[0]).clean_names()

phone_list_source = sms_sam_list[["phone_clean","source"]]

In [68]:
def determine_metrics(row):
    if row['stage'] == 'Onboarded':
        return 'Onboarded'
    elif pd.notna(row['opportunity_id']):
        return 'SQL'
    elif pd.notna(row['latest_mql_timestamp']) and row['unqualified_reason'] not in ['Current Client', 'Duplicate', 'Not a Restaurant', 'Incorrect Phone Number']:
        return 'MQL'
    elif pd.notna(row['first_mel_timestamp']) and row['unqualified_reason'] not in ['Current Client', 'Duplicate']:
        return 'MEL'
    else:
        return 'Dead Lead' 
 

df_all_time_lead['metrics'] = df_all_time_lead.apply(determine_metrics, axis=1)

In [69]:
df_merged = df_all_time_lead.merge(
    phone_list_source,
    how='left',
    left_on='mobile_primary',
    right_on='phone_clean',
    suffixes=('', '_mobile')
)

df_merged.rename(columns={'source': 'source_mobile', 'phone_clean':'primary_sms'}, inplace=True)

df_merged = df_merged.merge(
    phone_list_source,
    how='left',
    left_on='business_phone',
    right_on='phone_clean',
    suffixes=('', '_business')
)

df_merged.rename(columns={'source': 'source_business', 'phone_clean':'business_sms'}, inplace=True)

df_merged['final_source'] = df_merged['source_mobile'].combine_first(df_merged['source_business'])

df_merged['final_source'] = df_merged['final_source'].fillna('No Match')


In [70]:
sht3 = gc.open_by_key('1IYMZ0a-yL7LQUgCsy6EZnSjHLBKXC48P9QRE9Kh8Jzo')
worksheet = sht3.worksheet("campaign_list")
campaign_list_sheet = worksheet.get("A:B")
campaign_list = pd.DataFrame(campaign_list_sheet[1:], columns=campaign_list_sheet[0])
campaign_list = campaign_list.clean_names()

campaign_map_dict = dict(zip(campaign_list['sub_campaign_tags'], campaign_list['list']))

# Display the created mapping dictionary (optional)
print("\nCampaign Map Dictionary:")
for key, value in list(campaign_map_dict.items())[:5]:  # Display first 5 mappings for brevity
    print(f"{key}: {value}")

# Update 'final_source' in df_merged using the mapping dictionary
df_merged['final_source'] = df_merged['latest_campaign'].map(campaign_map_dict).fillna(df_merged['final_source'])
df_merged.loc[df_merged['latest_campaign'].str.contains('brizo', case=False, na=False), 'final_source'] = 'Brizo'






Campaign Map Dictionary:
Mkt_DM_Snowflake_OctoberV1: Brizo
Mkt_DM_Snowflake_OctoberV2: Brizo
Mkt_DM_Snowflake_AirfryerV1: Brizo
Mkt_DM_Snowflake_AirfryerV3: Brizo
Mkt_DM_Snowflake_AirfryerV2: Brizo


In [71]:

# can add more columns if needed
df_result = df_merged[['first_mel_timestamp','latest_mql_timestamp','lead_id',
                       'mobile_primary', 'business_phone', 'lead_channel',
                       'final_source','metrics','latest_campaign','unqualified_reason','rejected_reason']]

In [72]:
import numpy as np

df_result.replace(to_replace=[np.nan, 'NA'], value='', inplace=True)

df_result.to_csv("/Users/yukachen/Documents/ad-hoc/sms/sms_sam_list.csv")

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_result.replace(to_replace=[np.nan, 'NA'], value='', inplace=True)


In [73]:
from gspread_dataframe import set_with_dataframe

spreadsheet_key = '1HDKnaLuPzP_-GWX0gGNsl72w5rd0H-YroC8g7RmVZtA'

gc = gspread.oauth()

quality_check = gc.open_by_key(spreadsheet_key)


try:
    worksheet = quality_check.worksheet("SMS SAM List <> Leads")
    quality_check.del_worksheet(worksheet)
    print("existing worksheet 'SMS SAM List <> Leads' deleted.")
except gspread.exceptions.WorksheetNotFound:
    print("worksheet 'SMS SAM List <> Leads' not found, creating a new one.")

worksheet = quality_check.add_worksheet(title="SMS SAM List <> Leads", rows=df_result.shape[0], cols=df_result.shape[1])
print("sms lead list has been added")

set_with_dataframe(worksheet, df_result)

existing worksheet 'SMS SAM List <> Leads' deleted.
sms lead list has been added


In [74]:


try:
    worksheet = quality_check.worksheet("Raw Data")
    quality_check.del_worksheet(worksheet)
    print("existing worksheet 'Raw Data' deleted.")
except gspread.exceptions.WorksheetNotFound:
    print("worksheet 'Raw Data' not found, creating a new one.")

worksheet = quality_check.add_worksheet(title="Raw Data", rows=df_merged.shape[0], cols=df_merged.shape[1])
print("raw data list has been added")

set_with_dataframe(worksheet, df_merged)

existing worksheet 'Raw Data' deleted.
raw data list has been added
