In [60]:
import requests
import re
from bs4 import BeautifulSoup
from typing import Dict, Any
from wine_reviews.scrape import get_review_page, parse_review_card, HEADERS

In [2]:
review = get_review_page(1)

In [4]:
review_cards = review.find_all("li", {"class": "review-item"})

In [7]:
review_cards_parsed = [
    parse_review_card(x) for x in review_cards
]


In [10]:
review_cards_parsed[0]

['https://www.winemag.com/buying-guide/felton-road-2019-block-5-pinot-noir-central-otago/',
 'Felton Road 2019 Block 5 Pinot Noir (Central Otago)',
 'New Zealand',
 '97 Points',
 '$84']

In [11]:
len(review_cards_parsed)

20

In [15]:
with requests.Session() as sesh:
    response = sesh.get(review_cards_parsed[0][0], headers=HEADERS)
content = BeautifulSoup(response.content, 'html.parser')

In [20]:
test = content.find("meta", {"content": "Wine Reviews"})

In [32]:
import json
all_json = content.find_all("script", {"type":"application/ld+json"})
len(all_json[1].contents)

1

In [40]:
json.loads(all_json[1].contents[0])

{'@context': 'https://schema.org',
 '@type': 'Product',
 'image': 'https://www.winemag.com/wp-content/assets/reviews/label-images/wine/Wine_Default_label.jpg',
 'name': 'Felton Road 2019 Block 5 Pinot Noir (Central Otago)',
 'category': 'Red',
 'review': {'@type': 'Review',
  'name': 'Felton Road 2019 Block 5 Pinot Noir (Central Otago)',
  'reviewRating': {'@type': 'Rating', 'bestRating': 100, 'ratingValue': '97'},
  'author': {'@type': 'Person', 'name': 'Christina Pickard'},
  'datePublished': '1970-01-01T00:00:00+00:00',
  'reviewBody': 'This is one of the most gorgeous bottlings of FR Pinot this reviewer has tasted in recent years. Ethereal, aromatic and complete, it threads together notes of fresh berries, violet, rose petal, earthy spice and a stony minerality. Tannins are Nebbiolo-like in their texture, power and downright sexiness, intricately woven into the wine. Drinking beautifully now, it has the capacity to hold for another decade, at least.',
  'publisher': {'@type': 'Orga

In [96]:
a=content.find_all("meta", {"name": "pubDate"})[0]

In [97]:
a['content']

'December 31, 2021'

In [89]:
json_blogpost_contents = [
    "@context",
    "@type",
    "image",
    "name",
    "category",
    "review"
]

def get_json_review_contents(content: BeautifulSoup):
    all_json = content.find_all("script", {"type":"application/ld+json"})
    json_loaded = [
        json.loads(x.contents[0]) for x in all_json if len(x.contents) == 1
    ]

    for j in json_loaded:
        correct_keys = [x for x in j.keys() if x in json_blogpost_contents]
        if len(correct_keys) == len(json_blogpost_contents):
            return j

    return None

year_finder = re.compile(r"\d{4}")
appellation_finder = re.compile('\((.*?)\)')

In [90]:
json_review = get_json_review_contents(content)

def get_values_from_json(json_review: Dict[str, Any]) -> Dict[str, Any]:
    values = {}
    values['category'] = json_review['category']
    values['author'] = json_review['review']['author']['name']
    values['body'] = json_review['review']['reviewBody']
    values['rating'] = int(json_review['review']['reviewRating']['ratingValue'])

    # extract info from name
    name = json_review['review']['name']
    yearlike = year_finder.findall(name)

    if len(yearlike) != 1:
        print(f'Multiple year-like values found in {name}')
    
    appellationlike = appellation_finder.findall(name)

    if len(appellationlike) != 1:
        print(f"Multiple appellation-like values found in {name}")
    
    values['year'] = yearlike[0]
    values['appellation'] = appellationlike[0]
    values['title'] = name

    return values




In [82]:
name = json_review['review']['name']

In [55]:
import re

In [81]:
year_finder.findall(json_review['review']['name'])[0]

'2019'

In [84]:
re.findall('\((.*?)\)', name)

['Central Otago']

In [85]:
json_review

{'@context': 'https://schema.org',
 '@type': 'Product',
 'image': 'https://www.winemag.com/wp-content/assets/reviews/label-images/wine/Wine_Default_label.jpg',
 'name': 'Felton Road 2019 Block 5 Pinot Noir (Central Otago)',
 'category': 'Red',
 'review': {'@type': 'Review',
  'name': 'Felton Road 2019 Block 5 Pinot Noir (Central Otago)',
  'reviewRating': {'@type': 'Rating', 'bestRating': 100, 'ratingValue': '97'},
  'author': {'@type': 'Person', 'name': 'Christina Pickard'},
  'datePublished': '1970-01-01T00:00:00+00:00',
  'reviewBody': 'This is one of the most gorgeous bottlings of FR Pinot this reviewer has tasted in recent years. Ethereal, aromatic and complete, it threads together notes of fresh berries, violet, rose petal, earthy spice and a stony minerality. Tannins are Nebbiolo-like in their texture, power and downright sexiness, intricately woven into the wine. Drinking beautifully now, it has the capacity to hold for another decade, at least.',
  'publisher': {'@type': 'Orga

In [91]:
get_values_from_json(json_review)

{'category': 'Red',
 'author': 'Christina Pickard',
 'body': 'This is one of the most gorgeous bottlings of FR Pinot this reviewer has tasted in recent years. Ethereal, aromatic and complete, it threads together notes of fresh berries, violet, rose petal, earthy spice and a stony minerality. Tannins are Nebbiolo-like in their texture, power and downright sexiness, intricately woven into the wine. Drinking beautifully now, it has the capacity to hold for another decade, at least.',
 'rating': 97,
 'year': '2019',
 'appellation': 'Central Otago'}

In [92]:
review_cards_parsed[0]

['https://www.winemag.com/buying-guide/felton-road-2019-block-5-pinot-noir-central-otago/',
 'Felton Road 2019 Block 5 Pinot Noir (Central Otago)',
 'New Zealand',
 '97 Points',
 '$84']