Loading the packages

In [1]:
import numpy as np  # NumPy: For numerical and array operations.
import pandas as pd  # Pandas: For data manipulation and analysis.
import matplotlib.pyplot as plt  # Matplotlib: For creating various types of plots and charts.
import seaborn as sns  # Seaborn: For making data visualizations more attractive and informative.
import re
import glob

Loading data

In [15]:
site_visit = pd.read_csv("../data/RAW/google_analytic_qr_scan.csv")

site_visit["Date"] = pd.to_datetime(site_visit["Date"], format="%Y%m%d")
site_visit["Date"] = site_visit["Date"].dt.strftime("%Y/%m/%d")


site_visit["Page location"] = site_visit["Page location"].astype(str)

Identify which group they are from

- DM: 1
- List A + List C = Trifold	= airfryer/ airfryer3/ airfryer5"
- List B + List D =	Envelope = airfryer2/ airfryer4

- DM2 : List A = Postcard = airfryer6

In [3]:
def categorize_drop(row):
    lowered_row = row.lower()
    if "airfryer2" in lowered_row or "airfryer4" in lowered_row:
        return "DM1_envelope"
    elif "airfryer3" in lowered_row or "airfryer5" in lowered_row:
        return "DM1_trifold"
    elif "airfryer6" in lowered_row:
        return "DM2_postcard"
    elif "airfryer" in lowered_row:
        return "DM1_trifold"  # Update with the appropriate category for general "airfryer"
    else:
        return None


# Apply the function to create the 'Category' column
site_visit["Category"] = site_visit["Page location"].apply(categorize_drop)

site_visit["Airfryer_Type"] = site_visit["Page location"].str.extract(
    r"(airfryer\d?)", flags=re.IGNORECASE
)

In [4]:
print(site_visit["Page location"].nunique())

119


getting the id for site visit (scanned qr code, submitted the forms)

In [25]:
## getting the id for site visit (scanned qr code, submitted the forms)

site_visit["ID"] = site_visit["Page location"].str.extract(r"utm_id=(\d+)")
site_visit.dropna(subset=["ID"], inplace=True)

site_visit = site_visit[site_visit["ID"] != "00000"]

id_counts = site_visit["ID"].value_counts()
site_visit["visit_again"] = site_visit["ID"].map(id_counts) > 1
site_visit["visit_again"] = site_visit["visit_again"].fillna(False)

print(site_visit["ID"].nunique())

site_visit.to_csv("../data/1site_visit_draft.csv")

110


In [6]:
site_visit_unique_ids = site_visit.drop_duplicates(subset=["ID"])
site_visit_unique_ids = site_visit.drop_duplicates(subset=["ID"], keep="last")

In [7]:
form_submit = pd.read_csv("../data/RAW/submissions.csv")
form_submit["ID"] = form_submit["Referrer"].str.extract(r"utm_id=(\d+)")
form_submit["utm_id"].fillna(
    form_submit["Referrer"]
    .str.extract(r"utm_id=(\d+)")
    .iloc[:, 0]
    .astype(float),
    inplace=True,
)

form_submit.rename(columns={"姓氏": "Name", "手机号码": "Phone"}, inplace=True)


form_submit["Category"] = form_submit["Referrer"].apply(categorize_drop)

form_submit["Airfryer_Type"] = form_submit["Referrer"].str.extract(
    r"(airfryer\d?)", flags=re.IGNORECASE
)
form_submit["Phone"] = form_submit["Phone"].str.replace("-", "", regex=True)

form_submit.to_csv("../data/1form_submission.csv")

In [19]:
# Extract IDs from form_submit as a list
form_submit2 = form_submit.dropna(subset=["utm_id"])

extracted_ids = form_submit2["utm_id"].tolist()

formatted_ids = [
    str(int(id)).zfill(5) if isinstance(id, (int, float)) else id
    for id in extracted_ids
]

# Use .loc to avoid the SettingWithCopyWarning
site_visit_unique_ids.loc[:, "form_submission"] = (
    site_visit_unique_ids.loc[:, "ID"].isin(formatted_ids).astype(str)
)

Compile all Brizo List

In [11]:
site_visit_unique_ids.loc[:, "ID"] = site_visit_unique_ids["ID"].astype(int)

## change Snowball format (00000)
all_data_brizo = all_data_brizo.dropna(subset=["Snowball Map"])
extracted_ids_brizo = all_data_brizo["Snowball Map"].tolist()
formatted_ids_brizo = [
    str(int(id)).zfill(5) if isinstance(id, (int, float)) else id
    for id in extracted_ids_brizo
]

site_visit_brizo = pd.merge(
    site_visit_unique_ids,
    all_data_brizo,
    left_on="ID",
    right_on="Snowball Map",
    how="left",
)

site_visit_brizo = site_visit_brizo[site_visit_brizo["Snowball Map"].notnull()]

In [12]:
all_data_printshop = all_data_printshop.dropna(subset=["Snowball Map"])
extracted_ids_printshop = all_data_printshop["Snowball Map"].tolist()
formatted_ids_printshop = [
    str(int(id)).zfill(5) if isinstance(id, (int, float)) else id
    for id in extracted_ids_printshop
]


site_visit_printshop = pd.merge(
    site_visit_unique_ids,
    all_data_printshop,
    left_on="ID",
    right_on="Snowball Map",
    how="left",
)

site_visit_printshop = site_visit_printshop[
    site_visit_printshop["Snowball Map"].notnull()
]

# 2023 12/01/2023 FROM HERE

already have site_visit_brizo and site_visit_printshop

1. create list from for both list
2. combine them
3. cleaning phone numbers and join with phone numbers
4. join with form fill
5. join with November lead

In [13]:
site_visit_brizo["Business Phone"] = site_visit_brizo[
    "Business Phone"
].str.replace(r"\D", "", regex=True)

In [24]:
IB_Call_Nov_Dec = pd.read_csv("../data/RAW/Ibcall.csv").dropna(subset=["Date"])

checking if they have scanned the qr code more than once

if the call is empty, then fill the phone numbers from the form submitted

using Inbound Call Tracker to match if they scanned and have called

In [14]:
inbound_call = pd.read_csv("data/Inbound-Call-Notes.csv")

# Add '2023' to the 'Date' column
inbound_call["Date"] = "2023 " + inbound_call["Date"]

# Convert the 'Date' column to datetime format
inbound_call["Date"] = pd.to_datetime(
    inbound_call["Date"], format="%Y %a %m/%d"
)

# Filter rows for October (month == 10)
october_calls = inbound_call[inbound_call["Date"].dt.month == 10]
october_calls = october_calls.copy()  # Make a copy of the DataFrame
october_calls["Date"] = pd.to_datetime(
    october_calls["Date"], format="%Y %a %m/%d"
).dt.strftime("%Y-%m-%d")

october_dm_calls = october_calls[october_calls["Extension Channel"] == "DM"]

extracted_phone = october_dm_calls["Phone"].tolist()
pivoted_df["inbound_call"] = pivoted_df["Phone"].isin(extracted_phone)

october_dm_calls = october_dm_calls.rename(
    columns={october_dm_calls.columns[16]: "Campaign"}
)
october_dm_calls["Campaign"].unique()

october_ft_calls = october_dm_calls[
    october_dm_calls["Campaign"] == "Mkt_DM_Snowflake_OctoberFreeTrial"
]

df = october_ft_calls.copy()

df["Date"] = pd.to_datetime(df["Date"])

# Define the date ranges for 'drop1' and 'drop2'
start_date_drop1 = pd.to_datetime("2023-10-03")
end_date_drop1 = pd.to_datetime("2023-10-05")
start_date_drop2 = pd.to_datetime("2023-10-18")
end_date_drop2 = pd.to_datetime("2023-10-20")

# Create the 'Period' column based on date ranges
df["Period"] = df.apply(
    lambda row: (
        "drop1"
        if start_date_drop1 <= row["Date"] <= end_date_drop1
        else "drop2"
    ),
    axis=1,
)
# Get the column names except for the last one
cols_except_last = df.columns[:-1].tolist()
# Reorder the columns as desired
new_order = ["Date", "Period"] + cols_except_last
df = df[new_order]

# Create a new DataFrame with the desired column order
df.to_csv("data/october_dm_called.csv")

FileNotFoundError: [Errno 2] No such file or directory: 'data/Inbound-Call-Notes.csv'

adding missing rows

In [None]:
import pandas as pd

# Assuming you have your existing DataFrame 'pivoted_df'

# Define the data for the two new rows
new_rows_data = [
    {
        "ID": "31533",
        "Date": "10/12/23",
        "drop": "drop1",
        "submit_form": True,
        "AnyFormSubmit": 1,
        "click": 0,
        "cta_click": 0,
        "first_visit": 0,
        "form_start": 0,
        "page_view": 0,
        "scroll": 0,
        "session_start": 0,
        "user_engagement": 0,
        "visit_again": False,
        "Phone": "7854919016",
        "inbound_call": True,
    },
    {
        "ID": "18238",
        "Date": "10/11/23",
        "drop": "drop1",
        "submit_form": True,
        "AnyFormSubmit": 1,
        "click": 0,
        "cta_click": 0,
        "first_visit": 0,
        "form_start": 0,
        "page_view": 0,
        "scroll": 0,
        "session_start": 0,
        "user_engagement": 0,
        "visit_again": False,
        "Phone": "201-523-6952",
        "inbound_call": False,
    },
]

# Convert the list of dictionaries into a DataFrame
new_rows_df = pd.DataFrame(new_rows_data)

# Append the new rows to the 'pivoted_df' DataFrame
pivoted_df = pd.concat([pivoted_df, new_rows_df], ignore_index=True)


# 'ignore_index=True' resets the index of the combined DataFrame.

In [None]:
pivoted_df.to_csv("data/clean_data.csv")