# Web scraping: headers, the networks tab and parsing an API URL
## Helpful links and resources
- [urllib](https://docs.python.org/3/library/urllib.parse.html#) is a Python library that will pick apart URLs
- [Sessions object - request library](https://docs.python-requests.org/en/master/user/advanced/#session-objects)

In [247]:
#import libraries
import pandas as pd
import requests
import json

## The networks tab
### Static data files
[Covid cases in the US - New York Times](https://www.nytimes.com/interactive/2021/us/covid-cases.html)

In [248]:
# get static data file
covid_cases_r = requests.get('https://static01.nyt.com/newsgraphics/2021/coronavirus-tracking/data/pages/usa/data.json')

In [249]:
covid_cases = covid_cases_r.json()

In [250]:
covid_cases
#when it's a dictionary...take a look at keys to see what's best to call

{'updated': 'July 11',
 'updated_datetime': '2021-07-12T00:24:12.695Z',
 'location': {'metadata': {'geoid': 'USA',
   'hierarchy': ['NYT-World'],
   'population': 331811257,
   'slug': 'us/covid-cases',
   'href': 'https://www.nytimes.com/interactive/2021/us/covid-cases.html',
   'display_name': 'United States',
   'long_name': 'United States',
   'nyt_abbr': '',
   'country': 'United States',
   'region': '',
   'subregion': '',
   'region_type': 'country'},
  'anomalies': {'date_based': [{'id': 38,
     'date': '2020-06-25',
     'end_date': '',
     'geoid': 'USA',
     'name': 'United States',
     'type': 'deaths',
     'category': 'added_probables',
     'omit_from_rolling_average': True,
     'omit_from_rolling_average_on_subgeographies': False,
     'short_note': '',
     'long_note': 'New Jersey began reporting probable deaths, including those from earlier in the pandemic, causing a jump in the number of total deaths.'},
    {'id': 46,
     'date': '2020-06-30',
     'end_date

In [251]:
covid_cases.keys()

dict_keys(['updated', 'updated_datetime', 'location', 'counties', 'states', 'clusters', 'page_notes', 'headline_override'])

In [252]:
len(covid_cases['states'])

56

In [253]:
covid_cases['states']

[{'country': 'United States',
  'display_name': 'Alabama',
  'nyt_abbr': 'Ala.',
  'geoid': 'USA-01',
  'href': 'https://www.nytimes.com/interactive/2021/us/alabama-covid-cases.html',
  'latest': {'total': {'cases': 554605, 'deaths': 11402},
   'average': {'cases': 472.42857142857144,
    'deaths': 6.285714285714286,
    'hospitalized': 319.4,
    'tests': 2862.714285714286},
   'vaccination': {'date': '2021-07-09',
    'people_vaccinated_pct_of_pop': 40.49576,
    'people_vaccinated_pct_of_pop_display': '40%',
    'people_fully_vaccinated_pct_of_pop': 33.16871,
    'people_fully_vaccinated_pct_of_pop_display': '33%',
    'people_vaccinated_12plus_pct_of_pop': 47.4,
    'people_vaccinated_12plus_pct_of_pop_display': '47%',
    'people_vaccinated_18plus_pct_of_pop': 50.7,
    'people_vaccinated_18plus_pct_of_pop_display': '51%',
    'people_vaccinated_65plus_pct_of_pop': 79.3,
    'people_vaccinated_65plus_pct_of_pop_display': '79%',
    'people_fully_vaccinated_12plus_pct_of_pop': 38.8

In [254]:
#turn this into a dataframe so it's easier to read

covid_cases_df = pd.DataFrame(covid_cases['states'])

In [255]:
covid_cases_df
#would need to parse dictionary more to get the latest numbers (see dictionaries in latest, percent change, etc...)
#This is a static datafile...all data is just THERE...APIs need a search parameter; static files are more often in news sites

Unnamed: 0,country,display_name,nyt_abbr,geoid,href,latest,long_name,percent_change_14day,population,hospital_area_population,region,region_type,slug,state,subregion,date,population_adjustment
0,United States,Alabama,Ala.,USA-01,https://www.nytimes.com/interactive/2021/us/al...,"{'total': {'cases': 554605, 'deaths': 11402}, ...",Alabama,"{'raw': {'cases': 158.96632732967896, 'deaths'...",4903185,4903185,Alabama,state,us/alabama-covid-cases,"{'us_state_fips': '01', 'name': 'Alabama', 'sh...",,2021-07-11,
1,United States,Alaska,Alaska,USA-02,https://www.nytimes.com/interactive/2021/us/al...,"{'total': {'cases': 71118, 'deaths': 362}, 'av...",Alaska,"{'raw': {'cases': 72, 'deaths': 100, 'hospital...",731545,731545,Alaska,state,us/alaska-covid-cases,"{'us_state_fips': '02', 'name': 'Alaska', 'sho...",,2021-07-11,
2,United States,Arizona,Ariz.,USA-04,https://www.nytimes.com/interactive/2021/us/ar...,"{'total': {'cases': 901439, 'deaths': 18033}, ...",Arizona,"{'raw': {'cases': 30.964912280701775, 'deaths'...",7278717,7278717,Arizona,state,us/arizona-covid-cases,"{'us_state_fips': '04', 'name': 'Arizona', 'sh...",,2021-07-11,
3,United States,Arkansas,Ark.,USA-05,https://www.nytimes.com/interactive/2021/us/ar...,"{'total': {'cases': 355460, 'deaths': 5948}, '...",Arkansas,"{'raw': {'cases': 195.99757428744695, 'deaths'...",3017804,3017804,Arkansas,state,us/arkansas-covid-cases,"{'us_state_fips': '05', 'name': 'Arkansas', 's...",,2021-07-11,
4,United States,California,Calif.,USA-06,https://www.nytimes.com/interactive/2021/us/ca...,"{'total': {'cases': 3834396, 'deaths': 63485},...",California,"{'raw': {'cases': 77.01039387308532, 'deaths':...",39512223,39512223,California,state,us/california-covid-cases,"{'us_state_fips': '06', 'name': 'California', ...",,2021-07-11,
5,United States,Colorado,Colo.,USA-08,https://www.nytimes.com/interactive/2021/us/co...,"{'total': {'cases': 565731, 'deaths': 7000}, '...",Colorado,"{'raw': {'cases': 10.782918149466191, 'deaths'...",5758736,5758736,Colorado,state,us/colorado-covid-cases,"{'us_state_fips': '08', 'name': 'Colorado', 's...",,2021-07-11,
6,United States,Connecticut,Conn.,USA-09,https://www.nytimes.com/interactive/2021/us/co...,"{'total': {'cases': 349969, 'deaths': 8279}, '...",Connecticut,"{'raw': {'cases': 8.351648351648343, 'deaths':...",3565287,3565287,Connecticut,state,us/connecticut-covid-cases,"{'us_state_fips': '09', 'name': 'Connecticut',...",,2021-07-11,
7,United States,Delaware,Del.,USA-10,https://www.nytimes.com/interactive/2021/us/de...,"{'total': {'cases': 110074, 'deaths': 1695}, '...",Delaware,"{'raw': {'cases': 48.50746268656716, 'deaths':...",973764,973764,Delaware,state,us/delaware-covid-cases,"{'us_state_fips': '10', 'name': 'Delaware', 's...",,2021-07-11,
8,United States,"Washington, D.C.",D.C.,USA-11,,"{'total': {'cases': 49455, 'deaths': 1143}, 'a...","Washington, D.C.","{'raw': {'cases': 24.193548387096776, 'deaths'...",705749,705749,District of Columbia,state,us/washington-district-of-columbia-covid-cases,"{'us_state_fips': '11', 'name': 'District of C...",,2021-07-11,
9,United States,Florida,Fla.,USA-12,https://www.nytimes.com/interactive/2021/us/fl...,"{'total': {'cases': 2361360, 'deaths': 38157},...",Florida,"{'raw': {'cases': 114.9438812454743, 'deaths':...",21477737,21477737,Florida,state,us/florida-covid-cases,"{'us_state_fips': '12', 'name': 'Florida', 'sh...",,2021-07-11,


In [256]:
#WashPo Scavenger Hunt

In [257]:
df_washpo = pd.read_csv("https://www.washingtonpost.com/business/interactive/2021/dejoy-usps-delays-by-zip-code-map/usps.csv")

In [258]:
df_washpo.head(10)

Unnamed: 0,zip3,avg_days_cur,avg_days_new,avg_days_diff
0,5,2.665517,3.103653,-0.438135
1,10,2.420418,2.778212,-0.357794
2,11,2.411635,2.745268,-0.333633
3,12,2.439943,2.809153,-0.36921
4,13,2.422443,2.835077,-0.412634
5,14,2.437847,2.854191,-0.416344
6,15,2.421166,2.815743,-0.394577
7,16,2.446946,2.885994,-0.439048
8,17,2.426084,2.863679,-0.437595
9,18,2.450887,2.838465,-0.387578


### "Secret" APIs
Shopping websites are good candidates for secret APIs, such as [Target](www.target.com)

What can we do with this? WSJ wanted to track items that were popular for holiday sales...tracked items using APIs to see when it dipped below a certain point; MarkUp used Amazon API - wanted to see what Amazon was promoting first...are different locations getting different prices, etc.

Plan: 
Goal: Identify prices and rating of the first 24 results that appear when searching for paper cups on today's date
1. Get these results with the first endpoint (search, plp_search_v1)
2. Take product IDs from search endpoint results and plug them into a new endpoint

In general - you're probably not going to get the data you want in one place
Relational databases - data lives in different tables but are related to each other based on an attribute

Secret APIs just mean that there's no documentation

#### Target's Search API

How to find Secret APIs:
- open the networks tab (right-click -> inspect -> networks -> xhr tab)
- look for clues in the name: v1, v2 - common in API because site changes the way API is set up; ? - signals parameters (separates endpoint and parameters)
- work backwards, go to the preview tab to find the content you're looking for first

In [259]:
# search for an item with the networks tab open to ID which APIs you can use
#paper towels
from urllib.parse import urlparse, parse_qs

In [260]:
# parse the URL so it's easier to read
search_url = "https://redsky.target.com/redsky_aggregations/v1/web/plp_search_v1?key=ff457966e64d5e877fdbad070f276d18ecec4a01&channel=WEB&count=24&default_purchasability_filter=true&include_sponsored=true&keyword=paper plates&offset=0&page=/s/paper plates&platform=desktop&pricing_store_id=3249&scheduled_delivery_store_id=3249&store_ids=3249,3284,3321,3277,3229&useragent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:89.0) Gecko/20100101 Firefox/89.0&visitor_id=017A792FCE4C0201B5B1AFEBB3D29979"

In [261]:
parsed_url = urlparse(search_url)

In [262]:
# check the parsed URL
parsed_url

ParseResult(scheme='https', netloc='redsky.target.com', path='/redsky_aggregations/v1/web/plp_search_v1', params='', query='key=ff457966e64d5e877fdbad070f276d18ecec4a01&channel=WEB&count=24&default_purchasability_filter=true&include_sponsored=true&keyword=paper plates&offset=0&page=/s/paper plates&platform=desktop&pricing_store_id=3249&scheduled_delivery_store_id=3249&store_ids=3249,3284,3321,3277,3229&useragent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:89.0) Gecko/20100101 Firefox/89.0&visitor_id=017A792FCE4C0201B5B1AFEBB3D29979', fragment='')

scheme, netloc, path - are all endpoints
for an API you need an endpoint and parameters

In [263]:
# format the endpoint and parameters
target_search_endpoint = parsed_url[0]+'://'+ parsed_url[1] + parsed_url[2]
target_search_endpoint

'https://redsky.target.com/redsky_aggregations/v1/web/plp_search_v1'

In [264]:
# key:value pairs are separated by &
parsed_url[4]

'key=ff457966e64d5e877fdbad070f276d18ecec4a01&channel=WEB&count=24&default_purchasability_filter=true&include_sponsored=true&keyword=paper plates&offset=0&page=/s/paper plates&platform=desktop&pricing_store_id=3249&scheduled_delivery_store_id=3249&store_ids=3249,3284,3321,3277,3229&useragent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:89.0) Gecko/20100101 Firefox/89.0&visitor_id=017A792FCE4C0201B5B1AFEBB3D29979'

to get the different key value pairs, need to use split method (split a string based on a character)

In [265]:
parsed_url[4].split('&')

['key=ff457966e64d5e877fdbad070f276d18ecec4a01',
 'channel=WEB',
 'count=24',
 'default_purchasability_filter=true',
 'include_sponsored=true',
 'keyword=paper plates',
 'offset=0',
 'page=/s/paper plates',
 'platform=desktop',
 'pricing_store_id=3249',
 'scheduled_delivery_store_id=3249',
 'store_ids=3249,3284,3321,3277,3229',
 'useragent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:89.0) Gecko/20100101 Firefox/89.0',
 'visitor_id=017A792FCE4C0201B5B1AFEBB3D29979']

In [266]:
#need a dictionary of key value pairs to send to the API

target_search_parameters = {}

for key_values in parsed_url[4].split('&'):
    print(key_values.split('='))
    
#more lists!

['key', 'ff457966e64d5e877fdbad070f276d18ecec4a01']
['channel', 'WEB']
['count', '24']
['default_purchasability_filter', 'true']
['include_sponsored', 'true']
['keyword', 'paper plates']
['offset', '0']
['page', '/s/paper plates']
['platform', 'desktop']
['pricing_store_id', '3249']
['scheduled_delivery_store_id', '3249']
['store_ids', '3249,3284,3321,3277,3229']
['useragent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:89.0) Gecko/20100101 Firefox/89.0']
['visitor_id', '017A792FCE4C0201B5B1AFEBB3D29979']


In [267]:

target_search_parameters = {}
#manually break down queries
for key_values in parsed_url[4].split('&'):
    key_value_list = (key_values.split('='))
    target_search_parameters[key_value_list[0]] = key_value_list[1]

In [268]:
target_search_parameters

{'key': 'ff457966e64d5e877fdbad070f276d18ecec4a01',
 'channel': 'WEB',
 'count': '24',
 'default_purchasability_filter': 'true',
 'include_sponsored': 'true',
 'keyword': 'paper plates',
 'offset': '0',
 'page': '/s/paper plates',
 'platform': 'desktop',
 'pricing_store_id': '3249',
 'scheduled_delivery_store_id': '3249',
 'store_ids': '3249,3284,3321,3277,3229',
 'useragent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:89.0) Gecko/20100101 Firefox/89.0',
 'visitor_id': '017A792FCE4C0201B5B1AFEBB3D29979'}

In [269]:
#alt way to break down a query (with library) - try this way first 

target_search_parameters = parse_qs(parsed_url[4])

target_search_parameters

{'key': ['ff457966e64d5e877fdbad070f276d18ecec4a01'],
 'channel': ['WEB'],
 'count': ['24'],
 'default_purchasability_filter': ['true'],
 'include_sponsored': ['true'],
 'keyword': ['paper plates'],
 'offset': ['0'],
 'page': ['/s/paper plates'],
 'platform': ['desktop'],
 'pricing_store_id': ['3249'],
 'scheduled_delivery_store_id': ['3249'],
 'store_ids': ['3249,3284,3321,3277,3229'],
 'useragent': ['Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:89.0) Gecko/20100101 Firefox/89.0'],
 'visitor_id': ['017A792FCE4C0201B5B1AFEBB3D29979']}

In [270]:
#this is a GET request; let's try to request... "params" helps format the parameters based on the previous field automatically so we don't have to worry about typos
target_search_r = requests.get(target_search_endpoint, params = target_search_parameters)

In [271]:
target_search_r

<Response [200]>

In [272]:
target_search_r.json()

{'data': {'search': {'search_suggestions': ['plastic utensils',
    'valentines paper plates',
    'colored paper plates',
    'paper ice cream bowls',
    'light pink plastic plates',
    'white plastic plates',
    'birthday paper plates',
    'bridal shower paper plates',
    'baby shower paper plates',
    'rainbow party plates',
    'red plastic plates',
    'light pink paper plates',
    'brown paper plates',
    'paper plates napkins',
    'gold plastic plates',
    'pastel party plates',
    'pastel plastic plates',
    'paper plates for wedding',
    'wedding plastic plates',
    'elegant paper dinner napkins'],
   'search_recommendations': {'related_categories': [{'url': '/c/disposable-tableware-household-essentials/disposable-plates/-/N-5xsynZ55t5k',
      'title': 'Disposable Plates disposable tableware'},
     {'url': '/c/party-dinnerware-tableware-supplies/nickelodeon/-/N-tvtyiZ5xjj4',
      'title': 'Nickelodeon party dinnerware'},
     {'url': '/c/construction-paper-sch

In [273]:
target_search_r.json().keys()

dict_keys(['data'])

In [274]:
target_search_r.json()['data'].keys()

dict_keys(['search'])

In [275]:
target_search_r.json()['data']['search']

{'search_suggestions': ['plastic utensils',
  'valentines paper plates',
  'colored paper plates',
  'paper ice cream bowls',
  'light pink plastic plates',
  'white plastic plates',
  'birthday paper plates',
  'bridal shower paper plates',
  'baby shower paper plates',
  'rainbow party plates',
  'red plastic plates',
  'light pink paper plates',
  'brown paper plates',
  'paper plates napkins',
  'gold plastic plates',
  'pastel party plates',
  'pastel plastic plates',
  'paper plates for wedding',
  'wedding plastic plates',
  'elegant paper dinner napkins'],
 'search_recommendations': {'related_categories': [{'url': '/c/disposable-tableware-household-essentials/disposable-plates/-/N-5xsynZ55t5k',
    'title': 'Disposable Plates disposable tableware'},
   {'url': '/c/party-dinnerware-tableware-supplies/nickelodeon/-/N-tvtyiZ5xjj4',
    'title': 'Nickelodeon party dinnerware'},
   {'url': '/c/construction-paper-school-office-supplies/target-brands/-/N-4xudfZxmf9o',
    'title': ' c

In [276]:
target_search_r.json()['data']['search'].keys()

dict_keys(['search_suggestions', 'search_recommendations', 'search_response', 'products'])

In [277]:
target_search_r.json()['data']['search']['products']

[{'__typename': 'ProductSummary',
  'tcin': '75666853',
  'original_tcin': '75666853',
  'item': {'relationship_type': 'Stand Alone',
   'relationship_type_code': 'SA',
   'merchandise_classification': {'class_id': 5, 'department_id': 253},
   'eligibility_rules': {'add_on': {'is_active': True},
    'scheduled_delivery': {'is_active': True}},
   'enrichment': {'buy_url': 'https://www.target.com/p/line-plaid-paper-plate-8-5-34-90ct-up-38-up-8482/-/A-75666853',
    'images': {'primary_image_url': 'https://target.scene7.com/is/image/Target/GUEST_39b91919-bb96-44a4-a419-2257cfd40fc5',
     'alternate_image_urls': ['https://target.scene7.com/is/image/Target/GUEST_8ffaba71-1687-4107-9e3d-c49036c358ed']}},
   'dpci': '253-05-0356',
   'cart_add_on_threshold': 35.0,
   'product_description': {'title': 'Line Plaid Paper Plate 8.5&#34; - 90ct - up &#38; up&#8482;',
    'bullet_descriptions': ['<B>Features:</B> Round (shape)',
     '<B>Dimensions (Overall):</B> 8.55 Inches (L), 8.55 Inches (W)',


In [278]:
len(target_search_r.json()['data']['search']['products'])

24

In [279]:
target_search_r.json()['data']['search']['products'][0]
#tcin might ID specific product...might be a way to track product over time via another API

{'__typename': 'ProductSummary',
 'tcin': '75666853',
 'original_tcin': '75666853',
 'item': {'relationship_type': 'Stand Alone',
  'relationship_type_code': 'SA',
  'merchandise_classification': {'class_id': 5, 'department_id': 253},
  'eligibility_rules': {'add_on': {'is_active': True},
   'scheduled_delivery': {'is_active': True}},
  'enrichment': {'buy_url': 'https://www.target.com/p/line-plaid-paper-plate-8-5-34-90ct-up-38-up-8482/-/A-75666853',
   'images': {'primary_image_url': 'https://target.scene7.com/is/image/Target/GUEST_39b91919-bb96-44a4-a419-2257cfd40fc5',
    'alternate_image_urls': ['https://target.scene7.com/is/image/Target/GUEST_8ffaba71-1687-4107-9e3d-c49036c358ed']}},
  'dpci': '253-05-0356',
  'cart_add_on_threshold': 35.0,
  'product_description': {'title': 'Line Plaid Paper Plate 8.5&#34; - 90ct - up &#38; up&#8482;',
   'bullet_descriptions': ['<B>Features:</B> Round (shape)',
    '<B>Dimensions (Overall):</B> 8.55 Inches (L), 8.55 Inches (W)',
    '<B>Package 

In [280]:
# change something in the parameters (like keyword) 
#papercups

In [281]:
#to re-assign keyword
target_search_parameters['keyword'] = 'paper+cups'

In [282]:
# get request with endpoint and params

In [283]:
target_search_r = requests.get(target_search_endpoint, params = target_search_parameters)

In [284]:
target_search_r

<Response [200]>

In [285]:
target_search_r.json

<bound method Response.json of <Response [200]>>

In [286]:
# drill down the json file

In [287]:
# drill down some more

In [306]:
target_search_products = target_search_r.json()['data']['search']['products']

In [307]:
target_search_products

[{'__typename': 'ProductSummary',
  'tcin': '12970172',
  'original_tcin': '12970172',
  'item': {'relationship_type': 'Stand Alone',
   'relationship_type_code': 'SA',
   'merchandise_classification': {'class_id': 5, 'department_id': 253},
   'eligibility_rules': {'add_on': {'is_active': True},
    'scheduled_delivery': {'is_active': True}},
   'enrichment': {'buy_url': 'https://www.target.com/p/dixie-everyday-assorted-designs-cold-cups-54ct-9oz/-/A-12970172',
    'images': {'primary_image_url': 'https://target.scene7.com/is/image/Target/GUEST_9b4d6235-6e2e-4c7c-a541-6babba8eec63',
     'alternate_image_urls': ['https://target.scene7.com/is/image/Target/GUEST_d2d66bf2-7f60-444c-a286-16f29b008b5f',
      'https://target.scene7.com/is/image/Target/GUEST_4f0f4c6a-4310-4db5-b9ff-f5ef64671679',
      'https://target.scene7.com/is/image/Target/GUEST_5e6b372c-c099-4e8b-93b0-8611f6580497']},
    'videos': [{'is_list_page_eligible': False,
      'video_files': [{'mime_type': 'video/mp4',
     

#### Target's aggregation API

In [308]:
# parse the URL so it's easier to read
target_list = urlparse('https://redsky.target.com/redsky_aggregations/v1/web/plp_fulfillment_v1?key=ff457966e64d5e877fdbad070f276d18ecec4a01&tcins=81107269%2C81068829%2C14135567%2C81068792%2C82079503%2C81829962%2C81068790%2C81506339%2C80935950%2C81107259%2C81068797%2C11069188%2C81506334%2C81107271%2C81068773%2C81180792%2C81107267%2C81068789%2C81068796%2C81506336%2C81107268%2C81068821%2C81564691%2C81953908%2C81068815%2C81068825%2C81068787%2C81564688&store_id=2850&zip=11201&state=NY&latitude=40.690&longitude=-74.000&scheduled_delivery_store_id=2850')

In [309]:
# check the parsed URL
target_list

ParseResult(scheme='https', netloc='redsky.target.com', path='/redsky_aggregations/v1/web/plp_fulfillment_v1', params='', query='key=ff457966e64d5e877fdbad070f276d18ecec4a01&tcins=81107269%2C81068829%2C14135567%2C81068792%2C82079503%2C81829962%2C81068790%2C81506339%2C80935950%2C81107259%2C81068797%2C11069188%2C81506334%2C81107271%2C81068773%2C81180792%2C81107267%2C81068789%2C81068796%2C81506336%2C81107268%2C81068821%2C81564691%2C81953908%2C81068815%2C81068825%2C81068787%2C81564688&store_id=2850&zip=11201&state=NY&latitude=40.690&longitude=-74.000&scheduled_delivery_store_id=2850', fragment='')

In [310]:
# format the endpoint and parameters
target_list_endpoint = target_list[0] + '://' + target_list[1] + target_list[2]
target_list_params = {}
for parameter in target_list[4].split('&'):
    key_value = parameter.split('=')
    target_list_params[key_value[0]] = key_value[1]

In [311]:
# change something in the parameters (like tcins)
target_list_params['tcins'] = '81107269'

In [312]:
# get request with endpoint and params
target_list_r = requests.get(target_list_endpoint, params=target_list_params)

In [313]:
# drill down the json file
target_list_r.json()['data']['product_summaries']

[{'__typename': 'ProductSummary',
  'tcin': '81107269',
  'fulfillment': {'product_id': '81107269',
   'is_out_of_stock_in_all_store_locations': False,
   'shipping_options': {'availability_status': 'IN_STOCK',
    'loyalty_availability_status': 'IN_STOCK',
    'available_to_promise_quantity': 248.0,
    'minimum_order_quantity': 1.0,
    'services': [{'shipping_method_id': 'STANDARD',
      'min_delivery_date': '2021-07-15',
      'max_delivery_date': '2021-07-15',
      'is_two_day_shipping': False,
      'is_base_shipping_method': True,
      'service_level_description': 'Standard Shipping',
      'shipping_method_short_description': 'Standard',
      'cutoff': '2021-07-13T16:00:00Z'}]},
   'store_options': [{'location_name': 'Brooklyn Fulton St',
     'location_address': '445 Albee Square West,BROOKLYN,NY,11201-3016',
     'location_id': '2850',
     'search_response_store_type': 'PRIMARY',
     'order_pickup': {'availability_status': 'UNAVAILABLE',
      'reason_code': 'IN_ELIGIBL

In [314]:
# drill down some more
target_list_r.json()['data']['product_summaries'][0]

{'__typename': 'ProductSummary',
 'tcin': '81107269',
 'fulfillment': {'product_id': '81107269',
  'is_out_of_stock_in_all_store_locations': False,
  'shipping_options': {'availability_status': 'IN_STOCK',
   'loyalty_availability_status': 'IN_STOCK',
   'available_to_promise_quantity': 248.0,
   'minimum_order_quantity': 1.0,
   'services': [{'shipping_method_id': 'STANDARD',
     'min_delivery_date': '2021-07-15',
     'max_delivery_date': '2021-07-15',
     'is_two_day_shipping': False,
     'is_base_shipping_method': True,
     'service_level_description': 'Standard Shipping',
     'shipping_method_short_description': 'Standard',
     'cutoff': '2021-07-13T16:00:00Z'}]},
  'store_options': [{'location_name': 'Brooklyn Fulton St',
    'location_address': '445 Albee Square West,BROOKLYN,NY,11201-3016',
    'location_id': '2850',
    'search_response_store_type': 'PRIMARY',
    'order_pickup': {'availability_status': 'UNAVAILABLE',
     'reason_code': 'IN_ELIGIBLE'},
    'in_store_onl

In [315]:
target_client_url = "https://redsky.target.com/redsky_aggregations/v1/web/pdp_client_v1?key=ff457966e64d5e877fdbad070f276d18ecec4a01&tcin=12970172&member_id=10009200625&store_id=3249&has_store_id=true&pricing_store_id=3249&has_pricing_store_id=true&scheduled_delivery_store_id=3249&has_scheduled_delivery_store_id=true&has_financing_options=true&visitor_id=017A7778B66A0201B89A83A0646DDF50&has_size_context=true"

In [316]:
parsed_url = urlparse(target_client_url)
parsed_url

ParseResult(scheme='https', netloc='redsky.target.com', path='/redsky_aggregations/v1/web/pdp_client_v1', params='', query='key=ff457966e64d5e877fdbad070f276d18ecec4a01&tcin=12970172&member_id=10009200625&store_id=3249&has_store_id=true&pricing_store_id=3249&has_pricing_store_id=true&scheduled_delivery_store_id=3249&has_scheduled_delivery_store_id=true&has_financing_options=true&visitor_id=017A7778B66A0201B89A83A0646DDF50&has_size_context=true', fragment='')

In [317]:
target_client_endpoint = parsed_url[0] + '://' + parsed_url[1] + parsed_url[2]
target_client_endpoint

'https://redsky.target.com/redsky_aggregations/v1/web/pdp_client_v1'

In [318]:
target_client_params = parse_qs(parsed_url[4])
target_client_params

{'key': ['ff457966e64d5e877fdbad070f276d18ecec4a01'],
 'tcin': ['12970172'],
 'member_id': ['10009200625'],
 'store_id': ['3249'],
 'has_store_id': ['true'],
 'pricing_store_id': ['3249'],
 'has_pricing_store_id': ['true'],
 'scheduled_delivery_store_id': ['3249'],
 'has_scheduled_delivery_store_id': ['true'],
 'has_financing_options': ['true'],
 'visitor_id': ['017A7778B66A0201B89A83A0646DDF50'],
 'has_size_context': ['true']}

In [321]:
target_search_products[1]['tcin']

'75557224'

In [322]:
target_client_params['tcin'] = ['75557224']

In [323]:
target_client_params

{'key': ['ff457966e64d5e877fdbad070f276d18ecec4a01'],
 'tcin': ['75557224'],
 'member_id': ['10009200625'],
 'store_id': ['3249'],
 'has_store_id': ['true'],
 'pricing_store_id': ['3249'],
 'has_pricing_store_id': ['true'],
 'scheduled_delivery_store_id': ['3249'],
 'has_scheduled_delivery_store_id': ['true'],
 'has_financing_options': ['true'],
 'visitor_id': ['017A7778B66A0201B89A83A0646DDF50'],
 'has_size_context': ['true']}

In [324]:
target_client_r = requests.get(target_client_endpoint, params=target_client_params)

In [325]:
target_client_r.json()

{'data': {'product': {'__typename': 'Product',
   'tcin': '75557224',
   'ratings_and_reviews': {'has_verified': True,
    'statistics': {'not_recommended_count': 2,
     'question_count': 0,
     'rating': {'average': 4.74,
      'count': 110,
      'distribution': {'rating1': 2,
       'rating2': 1,
       'rating3': 2,
       'rating4': 14,
       'rating5': 91},
      'secondary_averages': [{'id': 'Durable',
        'label': 'durability',
        'range': 5,
        'type': 'RATING',
        'value': 4.73},
       {'id': 'Value',
        'label': 'value',
        'range': 5,
        'type': 'RATING',
        'value': 4.71},
       {'id': 'strength',
        'label': 'strength',
        'range': 5,
        'type': 'RATING',
        'value': 4.39},
       {'id': 'Quality',
        'label': 'quality',
        'range': 5,
        'type': 'RATING',
        'value': 4.73}]},
     'recommended_count': 14,
     'recommended_percentage': 87,
     'review_count': 26},
    'most_recent': [{'i

In [328]:
target_paper_cups = []

for product in target_search_products:
    target_client_params['tcin'] = product['tcin']
    target_client_r = requests.get(target_client_endpoint, params = target_client_params)
    target_paper_cups.append(target_client_r.json())

In [329]:
len(target_paper_cups)

25

In [330]:
target_paper_cups[2].keys()

dict_keys(['data'])

In [334]:
target_paper_cups_2 = []
for item in target_paper_cups:
    target_paper_cups_2.append(item['data']['product'])

In [335]:
target_paper_cups_2[0].keys()

dict_keys(['__typename', 'tcin', 'ratings_and_reviews', 'financing_options', 'item', 'price', 'promotions'])

In [336]:
target_df = pd.DataFrame(target_paper_cups_2)

In [337]:
target_df.head()

Unnamed: 0,__typename,tcin,ratings_and_reviews,financing_options,item,price,promotions,variation_hierarchy,children
0,Product,12970172,"{'has_verified': True, 'statistics': {'not_rec...","{'channel': 'DIGITAL', 'providers': [{'finance...","{'dpci': '253-05-0076', 'relationship_type_cod...","{'current_retail': 2.99, 'formatted_current_pr...",[],,
1,Product,75557224,"{'has_verified': True, 'statistics': {'not_rec...","{'channel': 'DIGITAL', 'providers': [{'finance...","{'dpci': '253-05-0032', 'relationship_type_cod...","{'current_retail': 4.39, 'formatted_current_pr...",[],,
2,Product,82242697,"{'has_verified': True, 'statistics': {'not_rec...","{'channel': 'DIGITAL', 'providers': [{'finance...","{'dpci': '253-05-0166', 'relationship_type_cod...","{'current_retail': 6.79, 'formatted_current_pr...",[],,
3,Product,79620967,"{'has_verified': True, 'statistics': {'not_rec...",,"{'relationship_type_code': 'VAP', 'cart_add_on...","{'current_retail_min': 2.49, 'formatted_curren...",[],"[{'name': 'Size', 'value': '72ct', 'tcin': '14...","[{'__typename': 'Product', 'tcin': '14736272',..."
4,Product,75557230,"{'has_verified': True, 'statistics': {'not_rec...","{'channel': 'DIGITAL', 'providers': [{'finance...","{'dpci': '253-05-0040', 'relationship_type_cod...","{'current_retail': 2.39, 'formatted_current_pr...",[],,


In [None]:
# to identify best URL - just start from the top and look for question marks; then check the preview

## Using sessions to login
### Accessing password-protected pages
[Sessions object - request library](https://docs.python-requests.org/en/master/user/advanced/#session-objects)

GET requests don't save information
Sessions open a door and keep it open

In [339]:
# open up a session so that your login credentials are saved

session = requests.Session()

In [340]:
# load in config file with passwords
with open('.../config/config.json') as json_file:
    config = json.load(json_file)

FileNotFoundError: [Errno 2] No such file or directory: '.../config/config.json'

In [None]:
# check the website for the login parameters
# atom.finance 
atom_signin_endpoint = "https://atom.finance/session/signin"

In [None]:
atom_signin_payload = {
    'username': '....',
    'password': config['atom_password']
}

In [None]:
# post the payload to the site to login with the correct log in endpoint
# s = session.post(endpoint, data = payload)
s = session.post(atom_signin_endpoint, data = atom_signin_payload)

In [None]:
# check credentials to see if successful
s.text

In [None]:
payload = {
    "variables":{"symbol":"SPY"},
    "query": "query getETFProfile($symbol: String!) {\n  etfProfile(symbol: $symbol) {\n    id\n    issuer\n    description\n    }\n}\n"
}

In [None]:
# look at an example page to get you started with a query
s = session.post('https://atom.finance/graphql', json=payload)
s.text

In [None]:
# create a new post object from the example

In [None]:
# post request for the data

In [None]:
# check to see what is returned