# Hanukkah of Data: Day 07

We are looking for the phone number of Emily Randolph's (puzzle 06) ex-boyfriend.

## Setup

In [1]:
import pandas as pd

from hanukkah_of_data.utils import load_data

## Load data

In [2]:
dfs = load_data()

In [3]:
customers_df = dfs["customers"]
orders_df = dfs["orders"]
items_df = dfs["orders_items"]
products_df = dfs["products"]

## Solve puzzle

### Get products ordered by Emily

In [4]:
customers_df.loc[customers_df.name == "Emily Randolph"]

Unnamed: 0,customerid,name,address,citystatezip,birthdate,phone
7341,8342,Emily Randolph,1055A E 3rd St,"Brooklyn, NY 11230",1988-10-30,914-868-0316


In [5]:
emily_orders_df = (
    orders_df.loc[orders_df["customerid"] == 8342]
    .merge(items_df, how="left", on="orderid")
    .merge(products_df, how="left", on="sku")
)

In [6]:
emily_orders_df.head()

Unnamed: 0,orderid,customerid,ordered,shipped,items,total,sku,qty,unit_price,desc,wholesale_cost
0,12624,8342,2017-05-28 09:15:06,2017-05-28 11:00:00,,17.56,HOM7105,1,17.56,Handmade Toaster (blue),33.65
1,33563,8342,2017-12-24 17:05:45,2017-12-24 17:05:45,,1.72,KIT0100,1,1.72,Super Coconut Fork,2.99
2,75257,8342,2019-02-11 18:52:45,2019-02-11 18:52:45,,5.82,TOY9308,1,5.82,Disney Dreidel,10.22
3,86371,8342,2019-06-01 12:50:16,2019-06-01 12:50:16,,15.9,HOM5139,1,15.9,Electric Machine (amber),31.81
4,87599,8342,2019-06-13 15:56:36,2019-06-13 15:56:36,,3.51,PET6786,1,0.99,Senior Mouse Food,1.4


In [7]:
emily_products_df = emily_orders_df.loc[emily_orders_df.desc.str.endswith(")")]

In [90]:
emily_products_df

Unnamed: 0,orderid,customerid,ordered,shipped,items,total,sku,qty,unit_price,desc,wholesale_cost
0,12624,8342,2017-05-28 09:15:06,2017-05-28 11:00:00,,17.56,HOM7105,1,17.56,Handmade Toaster (blue),33.65
3,86371,8342,2019-06-01 12:50:16,2019-06-01 12:50:16,,15.9,HOM5139,1,15.9,Electric Machine (amber),31.81
6,94040,8342,2019-08-18 14:20:28,2019-08-18 14:20:28,,78.95,HOM1216,1,53.81,Manual Mixer (orange),94.72
8,94040,8342,2019-08-18 14:20:28,2019-08-18 14:20:28,,78.95,HOM5127,1,12.34,Electric Toaster (blue),21.72
9,96220,8342,2019-09-09 16:30:05,2019-09-09 17:45:00,,9.18,COL4384,1,4.26,Noah's Poster (white),7.52
20,154876,8342,2021-04-17 19:52:28,2021-04-17 19:52:28,,76.21,COL4001,1,29.39,Noah's Jewelry (yellow),53.74
21,154876,8342,2021-04-17 19:52:28,2021-04-17 19:52:28,,76.21,HOM5838,1,39.02,Electric Crockpot (purple),71.36
25,200947,8342,2022-07-26 08:18:05,2022-07-26 16:00:00,,28.1,COL3831,1,28.1,Noah's Jewelry (orange),49.8


In [130]:
emily_dates = emily_products_df["ordered"].dt.date
emily_dates

0     2017-05-28
3     2019-06-01
6     2019-08-18
8     2019-08-18
9     2019-09-09
20    2021-04-17
21    2021-04-17
25    2022-07-26
Name: ordered, dtype: object

In [153]:
emily_products = emily_products_df["desc"].str.replace("\(\w+\)", "", regex=True)
emily_products

0      Handmade Toaster 
3      Electric Machine 
6          Manual Mixer 
8      Electric Toaster 
9         Noah's Poster 
20       Noah's Jewelry 
21    Electric Crockpot 
25       Noah's Jewelry 
Name: desc, dtype: string

In [154]:
emily_colors = emily_products_df["desc"].str.extract("\((\w+)\)", expand=False)
emily_colors

0       blue
3      amber
6     orange
8       blue
9      white
20    yellow
21    purple
25    orange
Name: desc, dtype: string

In [155]:
emily_product_color_map = {
    prod: color for prod, color in zip(emily_products, emily_colors)
}

### Filter for customers having ordered _Emily_ products on _Emily_ dates

In [156]:
merged_df = (
    customers_df.merge(orders_df, how="inner", on="customerid")
    .merge(items_df, how="left", on="orderid")
    .merge(products_df, how="left", on="sku")
)

In [157]:
merged_df["clean_desc"] = merged_df["desc"].str.replace("\(\w+\)", "", regex=True)

In [158]:
merged_df["color"] = merged_df["desc"].str.extract("\((\w+)\)")

In [174]:
candidates_df = merged_df.loc[
    (merged_df["ordered"].dt.date.isin(emily_dates))
    & (merged_df["clean_desc"].isin(emily_products))
    & (merged_df["name"] != "Emily Randolph")
    & (merged_df["ordered"] == merged_df["shipped"]),
    ["name", "phone", "ordered", "clean_desc", "color"],
]

In [175]:
candidates_df

Unnamed: 0,name,phone,ordered,clean_desc,color
29146,Julie Tina Hernandez,516-713-5712,2019-06-01 16:31:10,Noah's Jewelry,orange
48574,Frank Roach,516-520-9716,2021-04-17 12:22:55,Noah's Jewelry,yellow
77349,Maria Barbara Sanchez,838-599-2765,2021-04-17 13:50:31,Noah's Poster,orange
116192,Anita Ford,516-645-7010,2019-06-01 11:08:59,Noah's Poster,red
126536,Melvin Rodriguez III,914-698-1257,2019-08-18 12:47:14,Electric Toaster,mauve
128680,Timothy Jones,716-691-6727,2021-04-17 11:14:30,Noah's Jewelry,green
205603,Amy Watson,585-876-8529,2022-07-26 12:57:17,Electric Machine,amber
214369,Jessica Claudia Smith,838-703-5339,2017-05-28 18:01:52,Noah's Jewelry,magenta
283916,Kerri Black,838-589-7675,2017-05-28 11:56:51,Noah's Jewelry,blue
286836,Joe Conway,607-601-8847,2022-07-26 12:07:50,Manual Mixer,orange


In [215]:
for row in candidates_df.itertuples():
    # check if purchase of candidate occurred within 5min of purchase of Emily
    if ((row.ordered - emily_products_df["ordered"]).dt.seconds <= 300).any():
        if row.color != emily_product_color_map[row.clean_desc]:
            print(row.name, row.phone, row.clean_desc, row.color)

Jonathan Adams 315-618-5263 Electric Machine  purple
