In [20]:
import requests
import pandas as pd
import json
import numpy as np
import statistics
from tqdm import tqdm
import time
import re

In [43]:
## reading in the csv of Detroit addresses (with median taskrabbit cleaning rates and median household income included)

gigwork = pd.read_csv("senior_pet_special_line_cleaning_detroit.csv")

gigwork['zip'] = (
    gigwork['zip']
    .astype(str)
    .str.replace(".0", "")
)

gigwork.head()

Unnamed: 0,address_full,number,street,city,state,zip,longitude,latitude,statefp,countyfp,...,blkgrpce,median_cleaning_price,median_household_income_17-21,seniorcare_median_price,median_line_price,petcare_median_price,spclneeds_median_price,single_child_living_wage,single_nochild_living_wage,couple_child_living_wage
0,"748 DUNKIRK DETROIT, MI 48217",748,DUNKIRK,DETROIT,MI,48217,-83.150601,42.286192,26,163,...,2,27.87,18750.0,20.5,25.8,17.5,20.0,37.22,20.42,20.89
1,"1558 CALVERT DETROIT, MI 48206",1558,CALVERT,DETROIT,MI,48206,-83.100993,42.384252,26,163,...,1,30.97,27197.0,23.0,28.38,17.5,20.0,37.22,20.42,20.89
2,"8083 NORMILE DETROIT, MI 48204",8083,NORMILE,DETROIT,MI,48204,-83.158999,42.35295,26,163,...,4,27.87,36571.0,22.0,30.97,17.5,20.0,37.22,20.42,20.89
3,"4054 RICHTON DETROIT, MI 48204",4054,RICHTON,DETROIT,MI,48204,-83.130003,42.381745,26,163,...,2,30.97,53333.0,22.0,28.9,17.5,20.0,37.22,20.42,20.89
4,"8317 PENROD DETROIT, MI 48228",8317,PENROD,DETROIT,MI,48228,-83.220193,42.354021,26,163,...,2,27.87,23315.0,22.5,28.9,17.5,19.25,37.22,20.42,20.89


In [44]:
## converting the address dataframe to a list

gigwork_list = gigwork.to_dict('records')
gigwork_list

[{'address_full': '748 DUNKIRK DETROIT, MI 48217 ',
  'number': 748,
  'street': 'DUNKIRK',
  'city': 'DETROIT',
  'state': 'MI',
  'zip': '48217',
  'longitude': -83.150601,
  'latitude': 42.286192,
  'statefp': 26,
  'countyfp': 163,
  'tractce': 524500,
  'blkgrpce': 2,
  'median_cleaning_price': 27.87,
  'median_household_income_17-21': 18750.0,
  'seniorcare_median_price': 20.5,
  'median_line_price': 25.8,
  'petcare_median_price': 17.5,
  'spclneeds_median_price': 20.0,
  'single_child_living_wage': 37.22,
  'single_nochild_living_wage': 20.42,
  'couple_child_living_wage': 20.89},
 {'address_full': '1558 CALVERT DETROIT, MI 48206 ',
  'number': 1558,
  'street': 'CALVERT',
  'city': 'DETROIT',
  'state': 'MI',
  'zip': '48206',
  'longitude': -83.100993,
  'latitude': 42.384252,
  'statefp': 26,
  'countyfp': 163,
  'tractce': 531900,
  'blkgrpce': 1,
  'median_cleaning_price': 30.97,
  'median_household_income_17-21': 27197.0,
  'seniorcare_median_price': 23.0,
  'median_line_

In [45]:
## defining a function to call the Care.com api and retrieve the hourly senior care rates and number of hours worked per week for the first 100 caregivers working in a given zip code 

def seniorcare_rates(address):
    headers = {
        'accept': 'application/json, text/javascript, */*; q=0.01',
        'accept-language': 'en-US,en;q=0.9',
        # 'cookie': 'JSESSIONID=9EBA7F44BF3067C97971469F83181948; n_vis=dom-gold-prod-webapp-b13.use.dom.carezen.net1722273897345; care_did=e5adc5ac-4ed8-4691-8db4-2096dfb4c7db; FPC=fb31416b-a8df-438c-956d-61c0d392c372; _ga=GA1.1.446140601.1722274031; FPID=FPID2.2.z6MQUgRYtM9hnFqdFq%2FNNQH0j6X8QOCZo4wU5pHUbtM%3D.1722274031; FPAU=1.2.888495742.1722274031; enrollment-session-id=f93ebf6a-3b96-495d-aac0-262ec8f7310d; JSESSIONID=70729B89E4DD1D33C582E1979B226AA6; csc=dom-gold-prod-webapp-b14.use.dom.carezen.net1722530561457; vc=aa112d1a-4a70-483a-97c6-c00c9b0ed338; ROUTE2=d; bm_mi=4DF25BADE412B5D911BA12224C07D4B3~YAAQrJUzuIIXgv6QAQAAaZOEExiVhKI6hop69w5Bznc9+aUO9ZUwEPaGfyCNVoh5cOXAV6afLUHr0QR64IQhDrOVRCmLzbnK5hjxu4lh4QlTR+L3nSqi2wr74GI0Hw2Av0rocG4e6Z5XT8ozKr5UbQW5f+L9obftkyKMHNXQn8dlbhwlwsRjUH88RX6CofbVCLWCiVH9lCgjZRH/CGOsxj/3XScmj40dhIs2l3kFHPr0poQ6ARH/rdqtdSh4ua+gsxSfHwRfWyaZxqFtbisvaM/qo8VQ1i/Zk/qKUgMmAq3cLJ51RkaHStThdB6IvFOhXuRv2KXFdjA=~1; ak_bmsc=52DED96B27C90908E889FE17AAC30339~000000000000000000000000000000~YAAQrJUzuIkXgv6QAQAAG5iEExg+cFljlx9RFPWYlcvnAkJLIo7L4JGsmWhdDJYxrIbw8HC0kzR08+R1xviHT6TZV21gP5hsin/suk7IYFxLkHs+1gJ3DmoORSt0oaQq9c0VS+midrBtHFXu6FL7uvwnYUxSscjHhotsFXEVNqXZJUk5AzcKHKXxi/ryjVOB90ypYPYdHdPEyz3aG9ZPX9VFPlvXwWEUhNPBFKiGWEKpukXqAdMhwjS8fgPPmj+xCmnANmORMHTlA4F6AKcoArg5u+FX+dUobTpo6tvCraePAQN/4rIIs8Z4xqvohbj6dw8BQfbHxCyEjPUfvA1qhPUoC0XNoxkPSSX9upHYTwerHcxsxvvxXUk5plOIUImtDPUmP7aiz+GwGGISjf0XuNvfXFk91BdzMvjFechB5WDMw7llCDjUcVQz/0oGKaFZQwizNsJNWMeoI70rgfL2EnRN7zj0UX/PBuglQjvnKQo/5+IKov3qR35G; FPLC=5d52N1m1ktvqeYG6SwfcP%2FWOmjneSlTQvE137qKHhxJbsP3UTBtB%2BE7vCZc%2F2o%2BTo2eSFqVN59ZNDAHnXRbxJQ3OZHiOrFTBEYfGVYbjpg7sz%2FvCMGM1D4sqs60Icg%3D%3D; frsc=RgO_unCVesmFsT1eYk3YVHnKypObmmEoXmacCCUK_oZFeZRZfxHcS1TnRQx4BBXo-Atsgh0hBFZ2IDOaGlSoQ9A..; owaspToken=9EVD-AZTP-Q6CY-8CCU-FXTY-Z3I0-EEEG-WNO1; cr_wp=eyJ6aXAiOiIxMTIxNiIsImhtIjp0cnVlLCJ2Ijoic2MiLCJtdCI6InMiLCJpc3AiOmZhbHNlLCJsYyI6ImVuLXVzIiwibWkiOjY1MDE2MDkxLCJ1dWlkIjoiNzIzZTZlYzMtYTBkZC00NjY2LTg3NjctMGI3Mzc3YTMxNjhkIiwibiI6Ik1yayBTLiJ9; wordpress_logged_in=1; mt=seeker; oidc-session=MTcyMjYwOTQ2N3xEWDhFQVFMX2dBQUJFQUVRQUFEXzBQLUFBQVFHYzNSeWFXNW5EQTRBREhCeVpWOXlaV2RwYzNSbGNnWnpkSEpwYm1jTUJ3QUZabUZzYzJVR2MzUnlhVzVuREEwQUMyWnZjbmRoY21SZmRYSnBCbk4wY21sdVp3d0NBQUFHYzNSeWFXNW5EQTRBREdOaGJHeGlZV05yWDNWeWFRWnpkSEpwYm1jTUtRQW5hSFIwY0hNNkx5OTNkM2N1WTJGeVpTNWpiMjB2ZG1sekwyRjFkR2d2WTJGc2JHSmhZMnN6Qm5OMGNtbHVad3dIQUFWemRHRjBaUVp6ZEhKcGJtY01JZ0FnVXpSUlFuVXhkVFZ6VDA1V1FXMTVWSGxOZW0xNFdVdGtkV2xUY0hVM1JqUT18GyD5muiOFWtuFuIcX1JsoxGEcJfekHPJh1Jzzp8hOmk=; eps="error standard error"; amp_8bbcdd=e5adc5ac-4ed8-4691-8db4-2096dfb4c7db...1i49ob6e7.1i49oea4h.5.5.a; iterableEmailCampaignId=6246681; iterableTemplateId=8388458; iterableMessageId=39ab2e6ae0624dab9045b14d70980cc0; iterableEndUserId=mrkstat%40gmail.com; lad=ZG9tLWdvbGQtcHJvZC13ZWJhcHAtYjE0LnVzZS5kb20uY2FyZXplbi5uZXQxNzIyNTMwNTYxNDU3; mc=35M7p7y13hPnXdH46fjIpAVJAfvsKRzFKsz73XEzN6SF70FjRpscwnbO1xg5LEdZfouAHUeQFk8yf0%2Bd%2Ferbhg%3D%3D; care_mid=723e6ec3-a0dd-4666-8767-0b7377a3168d; mc_verify=35M7p7y13hPnXdH46fjIpAVJAfvsKRzFKsz73XEzN6SF70FjRpscwnbO1xg5LEdZfouAHUeQFk8yf0*d_erbhg..; sc=qCD2sGJxKGBz2vI*ItIuR5wOjWyhdK93Z6dkZtHHGL0.; csc-session=csc.1.CiBhPnsjKJm5WnSR9JZqISI14l8bzFWIqo_FZwBp2IMXuA.7N4nB74IWVaY9eaZX0voYo_AwByGdtXlC56RVpWjUMY; amp_49ee77=7ZvZ2QTIqPShl-hqk7syXi.NjUwMTYwOTE=..1i49oavgt.1i49of96l.90.n.9n; n_tc=8201%7C2416%7C7579; csc-session-ttl=1726497568; isc=IODv*hpR9Rn4*cpYFDdNqw..; bm_sv=F3BEFE5DB691F59CB988261501E3A1F7~YAAQrJUzuDwjgv6QAQAA5SWIExiCg/IolSeN4nnSjnW/y9o7E7DysruTnbz8G5macsOgq9cQaLJCHnX4FsZCyi8bXmgwEKieV2fXG3hwEZGOjiRHQInHVX6XayJKFOSzMV6el9Y739hY7izd9qgr0GeY66OT14ldZRDgvLYF4Hz0h1OLhCmk83jXgR6VExLM2TMg/RBkdiQiZr83yG5tJSY1NJuHg/enwZ+Kkl8LLI6iwqH40p8PoldgK4DDS/g=~1; _ga_XMJQJVFLTW=GS1.1.1722609342.2.1.1722609575.0.0.350001791; amplitude_id_49ee77491a2b775cf48ecf4a55ce04c5care.com=eyJkZXZpY2VJZCI6IjBiMWMxN2I1LWU5M2ItNGJjNy1hOGM4LTY2ZjcwMjlmMjg2ZVIiLCJ1c2VySWQiOiI2NTAxNjA5MSIsIm9wdE91dCI6ZmFsc2UsInNlc3Npb25JZCI6MTcyMjYwOTM0MjU5NCwibGFzdEV2ZW50VGltZSI6MTcyMjYwOTU3NjU4OSwiZXZlbnRJZCI6MjcsImlkZW50aWZ5SWQiOjEzLCJzZXF1ZW5jZU51bWJlciI6NDB9',
        'owasptoken': 'undefined',
        'priority': 'u=1, i',
        'referer': 'https://www.care.com/visitor/captureSearchBar.do?searchPerformed=true&sitterService=seniorCare&zipCode=11216&milesFromZipCode=1',
        'sec-ch-ua': '"Not)A;Brand";v="99", "Brave";v="127", "Chromium";v="127"',
        'sec-ch-ua-mobile': '?0',
        'sec-ch-ua-platform': '"Windows"',
        'sec-fetch-dest': 'empty',
        'sec-fetch-mode': 'cors',
        'sec-fetch-site': 'same-origin',
        'sec-gpc': '1',
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',
        'x-care.com-apikey': 'cmtux62opFFZ8Aov8J0aoJ1zRonczZyqP60pTTURdfIx',
        'x-care.com-os': 'Desktop',
        'x-care.com-visitid': 'aa112d1a-4a70-483a-97c6-c00c9b0ed338',
        'x-requested-with': 'XMLHttpRequest',
    }
    
    params = {
        'serviceId': 'SENIRCARE',
        'sitterService': 'seniorCare',
        'zipCode': json.dumps(address),
        'radius': '10',
        'start': '0',
        'max': '100',
        'sortByColumn': 'relevance desc',
        'pageNumber': '1',
    }
    
    response = requests.get('https://www.care.com/platform/spi/search/v2/provider', params=params, headers=headers)
    seniorcare = response.json()
    providers = seniorcare['data']['providerList']
    

    provider_rates = []
    provider_hours = []
    for provider in providers:
        provider_rates.append(provider['hourlyRate'])
        provider_hours.append(provider['weeklyHours'])

    return provider_rates, provider_hours


In [112]:
def clean_hours(hours):
    hour_range = '(\d+)'
    match = re.findall(hour_range, hours)
    if len(match) > 1:
        cleaned_hours = ((float(match[1]) - float(match[0]))/2) + float(match[0])
    else:
        cleaned_hours = float(match[0])
    return cleaned_hours   
  

In [47]:
## creating a function to clean the hourly rates. In instances where the rates are presented as a range (for example: $15-25/hr), the midpoint in that range (20) is used.

def clean_rate(rate):
    semiclean = rate.strip('$').strip("/hr").split('-')
    if len(semiclean) > 1:
        cleaned_rate = ((float(semiclean[1]) - float(semiclean[0]))/2) + float(semiclean[0])
    else:
        cleaned_rate = float(semiclean[0])
    return cleaned_rate

In [113]:
## looping through the Detroit addresses, retreiving the first 100 senior caregivers' hourly rates and weekly hours worked for each address, cleaning the results, and appending them to the address list

for place in tqdm(gigwork_list):
    zip_code = int(place['zip'])
    rate_list = seniorcare_rates(zip_code)[0] ## retrieve the provider_rate list from seniorcare_rates function
    if "N/A" in rate_list:
        rate_list.remove("N/A") ## remove all "N/A" values from the provider rate list
    dirty_hour_list = seniorcare_rates(zip_code)[1] ## retrieve the provider_hours list from seniorcare_rates function
    semiclean_hours = [i for i in dirty_hour_list if i] ## remove all missing values from the list
    pretty_rate = []
    pretty_hours = []
    for rate in rate_list:
        pretty_rate.append(clean_rate(rate)) ## clean the hourly rates, turning them into floats
    for hours in semiclean_hours:
        pretty_hours.append(clean_hours(hours)) ## clean the weekly hours, turning them into floats
    median_price = statistics.median(pretty_rate) ## calculate the median rate for providers in the given zip code
    median_hours = statistics.median(pretty_hours) ## calculate the median weekly hours worked by providers in the given zip code 
    place['seniorcare_median_price'] = median_price
    place['seniorcare_median_hours'] = median_hours
    time.sleep(1)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [22:25<00:00,  6.73s/it]


In [116]:
## converting the list with the additional information back into a dataframe

gig_prices = pd.DataFrame(gigwork_list)
gig_prices

Unnamed: 0,address_full,number,street,city,state,zip,longitude,latitude,statefp,countyfp,...,median_cleaning_price,median_household_income_17-21,seniorcare_median_price,median_line_price,petcare_median_price,spclneeds_median_price,single_child_living_wage,single_nochild_living_wage,couple_child_living_wage,seniorcare_median_hours
0,"748 DUNKIRK DETROIT, MI 48217",748,DUNKIRK,DETROIT,MI,48217,-83.150601,42.286192,26,163,...,27.87,18750.0,21.75,25.80,17.5,20.00,37.22,20.42,20.89,30.00
1,"1558 CALVERT DETROIT, MI 48206",1558,CALVERT,DETROIT,MI,48206,-83.100993,42.384252,26,163,...,30.97,27197.0,23.00,28.38,17.5,20.00,37.22,20.42,20.89,30.00
2,"8083 NORMILE DETROIT, MI 48204",8083,NORMILE,DETROIT,MI,48204,-83.158999,42.352950,26,163,...,27.87,36571.0,22.50,30.97,17.5,20.00,37.22,20.42,20.89,29.25
3,"4054 RICHTON DETROIT, MI 48204",4054,RICHTON,DETROIT,MI,48204,-83.130003,42.381745,26,163,...,30.97,53333.0,22.50,28.90,17.5,20.00,37.22,20.42,20.89,29.25
4,"8317 PENROD DETROIT, MI 48228",8317,PENROD,DETROIT,MI,48228,-83.220193,42.354021,26,163,...,27.87,23315.0,22.50,28.90,17.5,19.25,37.22,20.42,20.89,30.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,"3145 CHARLEVOIX DETROIT, MI 48207",3145,CHARLEVOIX,DETROIT,MI,48207,-83.026192,42.354579,26,163,...,29.42,29500.0,22.75,28.90,17.5,20.00,37.22,20.42,20.89,30.00
196,"2305 BLAINE DETROIT, MI 48206",2305,BLAINE,DETROIT,MI,48206,-83.102370,42.370948,26,163,...,30.97,24479.0,23.00,28.90,17.5,20.00,37.22,20.42,20.89,30.00
197,"4187 COPLIN DETROIT, MI 48215",4187,COPLIN,DETROIT,MI,48215,-82.960567,42.387589,26,163,...,35.61,,21.50,30.97,17.5,20.00,37.22,20.42,20.89,30.00
198,"15810 HARTWELL DETROIT, MI 48235",15810,HARTWELL,DETROIT,MI,48235,-83.177977,42.407778,26,163,...,30.45,54375.0,24.00,28.90,18.0,20.00,37.22,20.42,20.89,28.00


In [40]:
## dropping the unneccessary index column

gig_prices = gig_prices.drop(columns=['Unnamed: 0'])

In [41]:
gig_prices.head()

Unnamed: 0,address_full,number,street,city,state,zip,longitude,latitude,statefp,countyfp,tractce,blkgrpce,median_cleaning_price,median_household_income_17-21,seniorcare_median_price
0,"748 DUNKIRK DETROIT, MI 48217",748,DUNKIRK,DETROIT,MI,48217,-83.150601,42.286192,26,163,524500,2,27.87,18750.0,20.5
1,"1558 CALVERT DETROIT, MI 48206",1558,CALVERT,DETROIT,MI,48206,-83.100993,42.384252,26,163,531900,1,30.97,27197.0,23.0
2,"8083 NORMILE DETROIT, MI 48204",8083,NORMILE,DETROIT,MI,48204,-83.158999,42.35295,26,163,535600,4,27.87,36571.0,22.0
3,"4054 RICHTON DETROIT, MI 48204",4054,RICHTON,DETROIT,MI,48204,-83.130003,42.381745,26,163,530900,2,30.97,53333.0,22.0
4,"8317 PENROD DETROIT, MI 48228",8317,PENROD,DETROIT,MI,48228,-83.220193,42.354021,26,163,546000,2,27.87,23315.0,22.5


In [115]:
gig_prices.to_csv("gigwork_rates_seniorhours.csv", index=False)