In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re

In [10]:
lead = pd.read_csv("data/SFDC Lead.csv")


lead["Business Phone"] = lead["Business Phone"].str.replace(
    r"[^\d]", "", regex=True
)

lead["Mobile - Primary"] = lead["Mobile - Primary"].str.replace(
    r"[^\d]", "", regex=True
)

filtered_lead = lead[
    lead["Lead Channel"].str.contains("DM", case=False, na=False)
]

In [11]:
clean_scan = pd.read_csv("data/Clean - QR Scan.csv")
clean_scan = clean_scan.iloc[:, :-4]
clean_scan["Business Phone"] = clean_scan["Business Phone"].str.replace(
    r"[^\d]", "", regex=True
)

In [12]:
form_submit = pd.read_csv("data/formsubmit.csv")

clean_scan["submit_form"] = clean_scan["Snowball Map"].isin(
    form_submit["Snowball"]
)

clean_scan["submit_form"] = clean_scan["submit_form"].astype(str)

scan_with_form_submition = clean_scan.copy()

In [13]:
inbound_call = pd.read_csv("data/Oct_Inbound_Call.csv")

inbound_call.dropna(subset=["Phone"], inplace=True)
inbound_call = inbound_call[
    inbound_call["Extension Channel"].str.contains("DM", case=False, na=False)
]

In [14]:
phone_to_match = clean_scan["Business Phone"].tolist()

lead_scan = filtered_lead[
    filtered_lead["Business Phone"].isin(phone_to_match)
].copy()
lead_scan_no = filtered_lead[
    ~filtered_lead["Business Phone"].isin(phone_to_match)
].copy()
lead_scan["Match By"] = "Business Phone"
lead_scan["Action"] = "Scan"

In [15]:
add_lead_scan = pd.merge(
    lead_scan,
    clean_scan[
        [
            "Cuisine Type",
            "State",
            "Session Date",
            "Business Phone",
            "Snowball Map",
            "submit_form",
        ]
    ],
    on="Business Phone",
    how="left",
)

In [16]:
# no matching (match = 0)
lead_scan2 = filtered_lead[
    filtered_lead["Mobile - Primary"].isin(phone_to_match)
].copy()

lead_scan2["Match By"] = "Mobile - Primary"
lead_scan2["Action"] = "Scan"

In [17]:
phone_to_match = inbound_call["Phone"].tolist()

lead_call = filtered_lead[
    filtered_lead["Mobile - Primary"].isin(phone_to_match)
].copy()
lead_call_no = filtered_lead[
    ~filtered_lead["Mobile - Primary"].isin(phone_to_match)
].copy()
lead_call["Action"] = "Inbound Call"
lead_call["Match By"] = "Mobile - Primary"

In [18]:
## no matching (match - 0)

# lead_call_2 = filtered_lead[filtered_lead['Business Phone'].isin(phone_to_match)].copy()


# lead_call_2['Action'] = "Inbound Call"
# lead_call_2['Match By'] = "Business Phone"

In [19]:
result = pd.concat([lead_scan, lead_call], ignore_index=True)
phone_to_match = result["Business Phone"].tolist()
phone_to_match2 = result["Mobile - Primary"].tolist()


no_match1 = filtered_lead[
    ~filtered_lead["Business Phone"].isin(phone_to_match)
].copy()
no_match2 = filtered_lead[
    ~filtered_lead["Mobile - Primary"].isin(phone_to_match2)
].copy()

no_match_total = pd.concat([no_match1, no_match2], ignore_index=True)
no_match_total["Action"] = "No Match"
no_match_total["Match By"] = "No Match"

In [20]:
final_SFDC_LEAD = pd.concat([no_match_total, result], ignore_index=True)
final_SFDC_LEAD.to_csv("data/matched_SFDC_LEAD.csv")

In [21]:
sam_us = pd.read_csv("data/October_ SAM_US_All.csv")
sam_canada = pd.read_csv("data/October_SAM_CAN_All.csv")

sam_list = pd.concat([sam_us, sam_canada], ignore_index=True)

sam_list["Business Phone"] = sam_list["Business Phone"].str.replace(
    r"[^\d]", "", regex=True
)
sam_list.replace("", np.nan, inplace=True)  # Replace empty strings with NaN

sam_list.dropna(subset=["Business Phone"], inplace=True)
sam_list.rename(columns={"State/Province Code": "State"}, inplace=True)


american_chinese = pd.read_csv("data/American_Chinese.csv")
american_chinese.rename(columns={"Phone": "Business Phone"}, inplace=True)
american_chinese["Business Phone"] = american_chinese["Business Phone"].apply(
    lambda x: re.sub(r"\D", "", str(x))[-10:]
)
american_chinese.replace(
    "", np.nan, inplace=True
)  # Replace empty strings with NaN
american_chinese.dropna(subset=["Business Phone"], inplace=True)

  sam_us = pd.read_csv("data/October_ SAM_US_All.csv")


In [22]:
sam_list.columns

Index(['List Version', 'Snowball Map', 'Name', 'Street', 'City/Town', 'State',
       'Zip/Postal Code', 'Country', 'First Name', 'Last Name', 'Title',
       'Role', 'Contact Email', 'Contact Phone', 'Established',
       'Estimated Employees', 'Business Phone', 'All Phone (Print Shop)',
       'Address Check', 'Name + City Check', 'Contact Count',
       'Most Common Email', 'Cuisines (Regional)', 'Price Range',
       'Website Technology Vendors', 'Unnamed: 25'],
      dtype='object')

In [23]:
# Assuming 'ID' is the common column between sam_list and american_chinese
merged_df = pd.merge(
    sam_list,
    american_chinese[["Business Phone"]],
    on="Business Phone",
    how="left",
)

# Create the 'Cuisine Type' column based on conditions
merged_df["Cuisine Type"] = np.where(
    merged_df["Business Phone"].notna(),
    "American Chinese",
    merged_df["Cuisines (Regional)"],
)

In [24]:
add_lead_call = pd.merge(
    lead_call,
    merged_df[["Cuisine Type", "State", "Business Phone", "Snowball Map"]],
    left_on="Mobile - Primary",
    right_on="Business Phone",
    how="left",
)

In [25]:
add_lead_no_match = pd.merge(
    no_match_total,
    merged_df[["Cuisine Type", "State", "Business Phone"]],
    left_on="Business Phone",
    right_on="Business Phone",
    how="left",
)

In [26]:
final_SFDC_LEAD_2 = pd.concat(
    [add_lead_call, add_lead_scan], ignore_index=True
)
final_SFDC_LEAD_finalfinal = pd.concat(
    [final_SFDC_LEAD_2, add_lead_no_match], ignore_index=True
)

final_SFDC_LEAD_finalfinal.to_csv("data/matched_SFDC_LEAD_final.csv")

In [27]:
inbound_call_notes = pd.merge(
    inbound_call,
    merged_df[["Cuisine Type", "State", "Business Phone"]],
    left_on="Phone",
    right_on="Business Phone",
    how="left",
)

inbound_call_notes.to_csv("data/inbound_call_notes.csv")