# 2. The Contractor

Target
- the phone number of a contractor called JP
- The claim ticket said ‘2017 JP’
- they stopped outsourcing a few years ago

---

- ## Setup: all the CSV filenames w/o extensions

In [1]:
import pandas as pd
import os
filenames = [_.split('.')[0] for _ in os.listdir('./') if '.csv' in _]
filenames

['noahs-orders', 'noahs-products', 'noahs-orders_items', 'noahs-customers']

- ## Setup: Should be 4 as there are 4 files in total

In [2]:
all_csvs = [pd.read_csv(filename + '.csv') for filename in filenames]
len( all_csvs )

4

- ## Final setup: we need all 4 DataFrames

In [3]:
NO, NP, NOIT, NC = all_csvs
[len(_) for _ in [NO, NP, NOIT, NC]]

[213232, 1278, 426541, 8260]

- ## Get all contractors whose initials == JP

In [4]:
def abbrev (name):
    return ''.join(_[0] for _ in name.split()).upper() == 'JP'

JP = NC[NC['name'].apply(lambda x: abbrev(x))]
JP.head()

Unnamed: 0,customerid,name,address,citystatezip,birthdate,phone,timezone,lat,long
165,1166,John Phillips,806A E 230th St,"Bronx, NY 10466",1968-04-26,716-900-2422,America/New_York,40.88879,-73.85127
194,1195,Julie Page,462 Rear E New York Ave,"Brooklyn, NY 11225",1990-01-25,332-864-1901,America/New_York,40.66201,-73.94547
263,1264,Justin Perez,402-1 Willow Rd E,"Staten Island, NY 10314",1988-08-04,585-300-8492,America/New_York,40.61668,-74.15085
297,1298,Jacqueline Patel,3657 NE 13th Rd,"Houston, TX 77004",1969-09-07,430-877-4645,America/Chicago,29.786,-95.3885
311,1312,Judy Page,974 E 149th St,"Bronx, NY 10455",1973-05-16,516-779-4396,America/New_York,40.81625,-73.90358


- ## List of orders made back in 2O17

In [5]:
orders17 = NO[NO['ordered'].str.startswith('2017')]
orders17.head() # most_recent_order

Unnamed: 0,orderid,customerid,ordered,shipped,items,total
0,1001,6878,2017-01-31 02:56:45,2017-01-31 09:00:00,,0.99
1,1002,6375,2017-01-31 04:13:35,2017-01-31 12:15:00,,13.59
2,1003,8045,2017-01-31 04:45:12,2017-01-31 10:45:00,,1.23
3,1004,5385,2017-01-31 05:49:19,2017-01-31 09:00:00,,2.1
4,1005,9235,2017-01-31 06:28:11,2017-01-31 16:00:00,,13.23


- ## List of orders made back in 2O17 where contractor is JP

In [6]:
JP17 = pd.merge(orders17, JP, on='customerid')
JP17[:3], JP17['name'].value_counts()[:10]

(   orderid  customerid              ordered              shipped  items  \
 0     1007        4285  2017-01-31 08:01:48  2017-01-31 19:15:00    NaN   
 1     9172        4285  2017-04-22 15:01:45  2017-04-22 15:01:45    NaN   
 2     1102        1195  2017-02-01 07:49:16  2017-02-01 10:45:00    NaN   
 
    total           name                  address               citystatezip  \
 0   3.58  Joshua Peters            34-49 85th St  Jackson Heights, NY 11372   
 1  57.91  Joshua Peters            34-49 85th St  Jackson Heights, NY 11372   
 2  57.44     Julie Page  462 Rear E New York Ave         Brooklyn, NY 11225   
 
     birthdate         phone          timezone       lat      long  
 0  1964-10-17  914-322-7233  America/New_York  40.75464 -73.88258  
 1  1964-10-17  914-322-7233  America/New_York  40.75464 -73.88258  
 2  1990-01-25  332-864-1901  America/New_York  40.66201 -73.94547  ,
 name
 Judy Potter          29
 Justin Perez         28
 Jonathan Phillips    27
 James Perez  

- ## List of orders made back in 2017 reverse-sorted

In [7]:
orders17last = orders17.sort_values('ordered', ascending=False)\
    .groupby('customerid')\
    .first()\
    .ordered

orders17last[83:88]

customerid
1188    2017-04-06 16:28:55
1194    2017-11-23 10:06:36
1195    2017-12-25 10:32:50
1198    2017-03-10 13:10:45
1202    2017-11-17 17:24:20
Name: ordered, dtype: object

In [8]:
JP17last = pd.merge(orders17last, JP17, on="customerid")#.sort_values('ordered_x')
JP17last[:3]

Unnamed: 0,customerid,ordered_x,orderid,ordered_y,shipped,items,total,name,address,citystatezip,birthdate,phone,timezone,lat,long
0,1195,2017-12-25 10:32:50,1102,2017-02-01 07:49:16,2017-02-01 10:45:00,,57.44,Julie Page,462 Rear E New York Ave,"Brooklyn, NY 11225",1990-01-25,332-864-1901,America/New_York,40.66201,-73.94547
1,1195,2017-12-25 10:32:50,2371,2017-02-13 10:54:48,2017-02-13 14:45:00,,1.18,Julie Page,462 Rear E New York Ave,"Brooklyn, NY 11225",1990-01-25,332-864-1901,America/New_York,40.66201,-73.94547
2,1195,2017-12-25 10:32:50,3344,2017-02-22 10:46:16,2017-02-22 15:00:00,,4.12,Julie Page,462 Rear E New York Ave,"Brooklyn, NY 11225",1990-01-25,332-864-1901,America/New_York,40.66201,-73.94547


In [9]:
Coffee = NP[NP['desc'].str.contains('coffee', case=False)]
Coffee

Unnamed: 0,sku,desc,wholesale_cost,dims_cm
1112,DLI8820,"Coffee, Drip",1.44,9.6|7.8|0.7


In [10]:
Coffee_order = pd.merge(Coffee, NOIT, on='sku')
Coffee_order.head()

Unnamed: 0,sku,desc,wholesale_cost,dims_cm,orderid,qty,unit_price
0,DLI8820,"Coffee, Drip",1.44,9.6|7.8|0.7,1289,1,1.5
1,DLI8820,"Coffee, Drip",1.44,9.6|7.8|0.7,2012,1,2.23
2,DLI8820,"Coffee, Drip",1.44,9.6|7.8|0.7,2503,1,1.98
3,DLI8820,"Coffee, Drip",1.44,9.6|7.8|0.7,2534,1,1.61
4,DLI8820,"Coffee, Drip",1.44,9.6|7.8|0.7,4976,1,1.75


In [11]:
Coffee_order_details = pd.merge(Coffee_order, JP17last, on='orderid')
Coffee_order_details.head()
# Coffee_order.head(), JP17.head()

Unnamed: 0,sku,desc,wholesale_cost,dims_cm,orderid,qty,unit_price,customerid,ordered_x,ordered_y,...,items,total,name,address,citystatezip,birthdate,phone,timezone,lat,long
0,DLI8820,"Coffee, Drip",1.44,9.6|7.8|0.7,7459,1,1.87,1475,2017-04-17 12:12:21,2017-04-05 11:42:15,...,,4.96,Joshua Peterson,100-75 148th St,"Jamaica, NY 11435",1947-02-05,332-274-4185,America/New_York,40.70895,-73.80856
