In [1]:
import numpy as np  # NumPy: For numerical and array operations.
import pandas as pd  # Pandas: For data manipulation and analysis.
import matplotlib.pyplot as plt  # Matplotlib: For creating various types of plots and charts.
import seaborn as sns  # Seaborn: For making data visualizations more attractive and informative.
import re
import glob

In [3]:
def categorize_drop(row):
    lowered_row = row.lower()
    if "airfryer2" in lowered_row or "airfryer4" in lowered_row:
        return "DM1_envelope"
    elif "airfryer3" in lowered_row or "airfryer5" in lowered_row:
        return "DM1_trifold"
    elif "airfryer6" in lowered_row:
        return "DM2_postcard"
    elif "airfryer" in lowered_row:
        return "DM1_trifold"  # Update with the appropriate category for general "airfryer"
    else:
        return None

In [4]:
form_submit = pd.read_csv("../data/RAW/submissions.csv")
form_submit["ID"] = form_submit["Referrer"].str.extract(r"utm_id=(\d+)")
form_submit["utm_id"].fillna(
    form_submit["Referrer"]
    .str.extract(r"utm_id=(\d+)")
    .iloc[:, 0]
    .astype(float),
    inplace=True,
)

form_submit.rename(columns={"姓氏": "Name", "手机号码": "Phone"}, inplace=True)


form_submit["Category"] = form_submit["Referrer"].apply(categorize_drop)

form_submit["Airfryer_Type"] = form_submit["Referrer"].str.extract(
    r"(airfryer\d?)", flags=re.IGNORECASE
)
form_submit["Phone"] = form_submit["Phone"].str.replace("-", "", regex=True)

In [9]:
# Extract IDs from form_submit as a list
form_submit["utm_id"] = form_submit["utm_id"].astype(str).fillna("Unknown")
form_submit["ID"] = form_submit["ID"].astype(str).fillna("Unknown")

extracted_ids = form_submit["utm_id"].tolist()

formatted_ids = [
    str(int(id)).zfill(5) if isinstance(id, (int, float)) else id
    for id in extracted_ids
]

In [10]:
print(form_submit.columns)

Index(['Name', 'resturantname', 'Phone', 'Terms and Conditions',
       'utm_campaign', 'utm_id', 'utm_source', 'utm_medium', 'Unnamed: 8',
       'Form Name (ID)', 'Submission ID', 'Created At', 'User ID',
       'User Agent', 'User IP', 'Referrer', 'ID', 'Category', 'Airfryer_Type'],
      dtype='object')


In [25]:
final_form_submit = form_submit[
    [
        "Name",
        "Phone",
        "utm_id",
        "Created At",
        "Referrer",
        "ID",
        "Category",
        "Airfryer_Type",
    ]
].copy()

final_form_submit["ID"] = final_form_submit["ID"].astype(str)

In [18]:
brizo = pd.read_csv("../data/Clean/all_brizo.csv", low_memory=False)
brizo = brizo.dropna(subset=["Snowball Map"])
extracted_ids_brizo = brizo["Snowball Map"].tolist()
brizo["Snowball Map"] = [
    str(int(id)).zfill(5) if isinstance(id, (int, float)) else id
    for id in extracted_ids_brizo
]

In [26]:
form_submit_combine_brizo_info = pd.merge(
    final_form_submit, brizo, left_on="ID", right_on="Snowball Map", how="left"
)

In [28]:
form_submit_combine_brizo_info.to_csv("../data/Clean/form_submit_w_info.csv")