In [1]:
import requests
import math
import re
import pandas as pd
from bs4 import BeautifulSoup
from random import randint
from time import sleep

### Get Wine Data

In [5]:
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0"}
url = 'https://www.vivino.com/'

# Get Cache key to get country codes and type of wines
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
script = soup.find('script', text = re.compile('var vivinoCacheKey'))
vivinoCacheKey = str(script).split('vivinoCacheKey = ')[-1].split(';')[0].replace("'",'').strip()

# Get countries
api_url = 'https://www.vivino.com/api/countries'
payload = {
    'cache_key':vivinoCacheKey}
countryData = requests.get(api_url, headers=headers, params=payload).json()['countries']

rows = []
# Iterate through countries and wine types
api_url = 'https://www.vivino.com/api/explore/explore'
for country in countryData:
    payload = {
    "country_code": country['code'].upper(),
    "currency_code":"USD",
    "grape_filter":"varietal",
    "min_rating":"1",
    "min_ratings_count":200,
    "order_by":"ratings_count",
    "order":"desc",
    "page": '1',
    "price_range_max": 150,
    "price_range_min": 50,
    "wine_type_id": "1"}

    try:
        jsonData = requests.get(api_url, params=payload, headers=headers).json()
        total_pages = math.ceil(jsonData['explore_vintage']['records_matched'] / 100)
        #print('%s' %(country['code'].upper()))
        
        for page in range(1,total_pages+1):
            if page != 1:   
                payload.update({'page':page})
            jsonData = requests.get(api_url, params=payload, headers=headers).json()
            for each in jsonData['explore_vintage']['records']:
                name = f'{each["vintage"]["wine"]["name"]} {each["vintage"]["year"]}'
                year = each["vintage"]["year"]
                id = each["vintage"]["wine"]["id"]
                rating =  each['vintage']['statistics']['ratings_average']
                price = each['price']['amount']
                winery = each["vintage"]["wine"]["winery"]["name"]
                ratings_count = each["vintage"]["statistics"]["ratings_count"]
                region =  each['vintage']['wine']['region']['name']
                number_of_flavors_in_wine = int(len(each['vintage']['wine']['taste']['flavor']))
                flavors_in_wine = []
                for flavor in range(number_of_flavors_in_wine):
                    flavors_in_wine.append(each['vintage']['wine']['taste']['flavor'][flavor]) #flavor(s)

                number_of_grapes_in_wine = int(len(each["vintage"]["wine"]["style"]["grapes"]))
                grapes_in_wine = []
                for grape in range(number_of_grapes_in_wine):
                    grapes_in_wine.append(each["vintage"]["wine"]["style"]["grapes"][grape]["name"]) #Grape(s)

                
                
                row = {'name':name, "year":year, "wine ID":id, 'rating':rating, 'price':price, 'winery': winery, 'ratings_count': ratings_count, 'region': region,\
                        'number_of_flavors': number_of_flavors_in_wine, 'flavors': flavors_in_wine, 'number_of_grapes': number_of_grapes_in_wine, 'grapes': grapes_in_wine}



                rows.append(row)
            print('Aquired page: %s - %s ' %(country['code'].upper(), page))
    except:
        continue

df = pd.DataFrame(rows)

Aquired page: AS - 1 
Aquired page: AM - 1 
Aquired page: AU - 1 
Aquired page: AT - 1 
Aquired page: BE - 1 
Aquired page: BA - 1 
Aquired page: BA - 2 
Aquired page: BA - 3 
Aquired page: BI - 1 
Aquired page: CA - 1 
Aquired page: CA - 2 
Aquired page: CA - 3 
Aquired page: CA - 4 
Aquired page: CA - 5 
Aquired page: CA - 6 
Aquired page: CA - 7 
Aquired page: CL - 1 
Aquired page: CN - 1 
Aquired page: CN - 2 
Aquired page: CO - 1 
Aquired page: CY - 1 
Aquired page: CY - 2 
Aquired page: CZ - 1 
Aquired page: DK - 1 
Aquired page: EC - 1 
Aquired page: FO - 1 
Aquired page: FJ - 1 
Aquired page: FI - 1 
Aquired page: FR - 1 
Aquired page: FR - 2 
Aquired page: FR - 3 
Aquired page: PF - 1 
Aquired page: GI - 1 
Aquired page: GP - 1 
Aquired page: GU - 1 
Aquired page: GT - 1 
Aquired page: HK - 1 
Aquired page: ID - 1 
Aquired page: ID - 2 
Aquired page: ID - 3 
Aquired page: IE - 1 
Aquired page: IE - 2 
Aquired page: IE - 3 
Aquired page: IT - 1 
Aquired page: JO - 1 
Aquired pa

In [4]:
df = df.drop_duplicates(subset=['name', 'wine ID'], keep='first')
df.to_csv("asset/wines.csv", index=False)
df

Unnamed: 0,name,year,wine ID,rating,price,winery,ratings_count,region,number_of_flavors,flavors,number_of_grapes,grapes
0,Brut Premier Champagne N.V.,N.V.,74298,4.2,65.098163,Louis Roederer,52143,Champagne Premier Cru,13,"[{'group': 'tree_fruit', 'stats': {'count': 21...",3,"['Chardonnay', 'Pinot Noir', 'Pinot Meunier']"
1,Oeil de Perdrix Brut Rosé Champagne N.V.,N.V.,1998957,3.9,80.132381,Champagne Devaux,856,Champagne,12,"[{'group': 'red_fruit', 'stats': {'count': 86,...",3,"['Chardonnay', 'Pinot Noir', 'Pinot Meunier']"
2,Grand Brut Champagne N.V.,N.V.,79160,4.2,67.333250,Perrier-Jouët,39646,Champagne,13,"[{'group': 'tree_fruit', 'stats': {'count': 17...",3,"['Chardonnay', 'Pinot Noir', 'Pinot Meunier']"
3,Brut Cuvée Champagne Rosé N.V.,N.V.,8305,4.4,69.057174,Laurent-Perrier,35543,Champagne,13,"[{'group': 'red_fruit', 'stats': {'count': 147...",3,"['Chardonnay', 'Pinot Noir', 'Pinot Meunier']"
4,Brut Rosé Champagne N.V.,N.V.,1211816,4.4,57.029800,Billecart-Salmon,21474,Champagne,13,"[{'group': 'red_fruit', 'stats': {'count': 131...",3,"['Chardonnay', 'Pinot Noir', 'Pinot Meunier']"
...,...,...,...,...,...,...,...,...,...,...,...,...
3187,Barbaresco 2018,2018,61144,4.1,66.194090,Prunotto,1421,Barbaresco,13,"[{'group': 'red_fruit', 'stats': {'count': 574...",1,['Nebbiolo']
3189,Max's Shiraz 2016,2016,4275488,3.9,85.807154,Penfolds,1093,South Australia,13,"[{'group': 'oak', 'stats': {'count': 404, 'sco...",1,['Shiraz/Syrah']
3224,Vieilles Vignes Gevrey-Chambertin 2012,2012,1189367,4.0,105.860000,Domaine Rossignol-Trapet,326,Gevrey-Chambertin,12,"[{'group': 'red_fruit', 'stats': {'count': 174...",1,['Pinot Noir']
3225,Pomerol 2011,2011,1112303,4.1,108.400000,Château Vray Croix de Gay,154,Pomerol,12,"[{'group': 'oak', 'stats': {'count': 107, 'sco...",6,"['Cabernet Sauvignon', 'Cabernet Franc', 'Malb..."


### Get Review Data

In [12]:
def get_wine_data(wine_id, year, page):
    headers = {
        "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0",
    }

    api_url = "https://www.vivino.com/api/wines/{id}/reviews?per_page=50&year={year}&page={page}"  # <-- increased the number of reviews to 9999

    data = requests.get(
        api_url.format(id=wine_id, year=year, page=page), headers=headers
    ).json()

    return data

In [14]:
ratings=[]
for _, row in df.iterrows():
    page = 1
    while True:
        print(
            f'Getting info about wine {row["wine ID"]}-{row["year"]} Page {page}'
            )
        try:
            d = get_wine_data(row["wine ID"], row["year"], page)
        except:
            sleep(10)
            continue

        try:
            for r in d["reviews"]:
                if r["language"] != "en": # <-- get only english reviews
                    continue

                ratings.append(
                        [
                            row["wine ID"],
                            r["rating"],
                            r["note"],
                            r["created_at"],
                            r['vintage']['wine']['region']['country']['name']
                        ]
                    )
        except:
            break


        if page == 3:
            sleep(randint(1,3))
            break

        page += 1


    df_reviews = pd.DataFrame(
        ratings, columns=["wine ID", "User Rating", "Review", "CreatedAt", "country"]
    )

    df_out = df_reviews.merge(df)
df_out.to_pickle("asset/reviews.pkl")

Getting info about wine 74298-N.V. Page 1
Getting info about wine 74298-N.V. Page 2
Getting info about wine 74298-N.V. Page 3
Getting info about wine 1998957-N.V. Page 1
Getting info about wine 1998957-N.V. Page 2
Getting info about wine 1998957-N.V. Page 3
Getting info about wine 79160-N.V. Page 1
Getting info about wine 79160-N.V. Page 2
Getting info about wine 79160-N.V. Page 3
Getting info about wine 8305-N.V. Page 1
Getting info about wine 8305-N.V. Page 2
Getting info about wine 8305-N.V. Page 3
Getting info about wine 1211816-N.V. Page 1
Getting info about wine 1211816-N.V. Page 2
Getting info about wine 1211816-N.V. Page 3
Getting info about wine 1148298-N.V. Page 1
Getting info about wine 1148298-N.V. Page 2
Getting info about wine 1148298-N.V. Page 3
Getting info about wine 1987886-N.V. Page 1
Getting info about wine 1987886-N.V. Page 2
Getting info about wine 1987886-N.V. Page 3
Getting info about wine 22917-2013 Page 1
Getting info about wine 22917-2013 Page 2
Getting info 

In [16]:
df_out

Unnamed: 0,wine ID,User Rating,Review,CreatedAt,country,name,year,rating,price,winery,ratings_count,region,number_of_flavors,flavors,number_of_grapes,grapes
0,74298,4.5,Beautifully complex champagne. Begins with a y...,2015-07-13T22:05:13.000Z,France,Brut Premier Champagne N.V.,N.V.,4.2,65.098163,Louis Roederer,52143,Champagne Premier Cru,13,"[{'group': 'tree_fruit', 'stats': {'count': 21...",3,"[Chardonnay, Pinot Noir, Pinot Meunier]"
1,74298,4.5,"Its a Non Vintage, but drinking old as cellare...",2021-06-19T03:38:24.000Z,France,Brut Premier Champagne N.V.,N.V.,4.2,65.098163,Louis Roederer,52143,Champagne Premier Cru,13,"[{'group': 'tree_fruit', 'stats': {'count': 21...",3,"[Chardonnay, Pinot Noir, Pinot Meunier]"
2,74298,4.5,"In between the Italian wine reviews, it was ti...",2016-10-20T18:45:18.000Z,France,Brut Premier Champagne N.V.,N.V.,4.2,65.098163,Louis Roederer,52143,Champagne Premier Cru,13,"[{'group': 'tree_fruit', 'stats': {'count': 21...",3,"[Chardonnay, Pinot Noir, Pinot Meunier]"
3,74298,4.5,"Ripe open toasty, powerfull, Smells less compl...",2017-06-06T10:03:49.000Z,France,Brut Premier Champagne N.V.,N.V.,4.2,65.098163,Louis Roederer,52143,Champagne Premier Cru,13,"[{'group': 'tree_fruit', 'stats': {'count': 21...",3,"[Chardonnay, Pinot Noir, Pinot Meunier]"
4,74298,4.5,This is a lovely NV Champagne at this price le...,2018-03-18T13:43:44.000Z,France,Brut Premier Champagne N.V.,N.V.,4.2,65.098163,Louis Roederer,52143,Champagne Premier Cru,13,"[{'group': 'tree_fruit', 'stats': {'count': 21...",3,"[Chardonnay, Pinot Noir, Pinot Meunier]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
351761,1208798,3.0,"Dry, slight oak. Crisp. Great French chard for...",2016-10-19T21:05:57.000Z,France,Vosne-Romanée 2017,2017,4.3,91.500000,Albert Bichot,48,Vosne-Romanée,11,"[{'group': 'red_fruit', 'stats': {'count': 28,...",1,[Pinot Noir]
351762,1208798,4.0,"Superb, with a long life ahead of it. Well, no...",2014-08-18T16:33:43.000Z,France,Vosne-Romanée 2017,2017,4.3,91.500000,Albert Bichot,48,Vosne-Romanée,11,"[{'group': 'red_fruit', 'stats': {'count': 28,...",1,[Pinot Noir]
351763,1208798,4.5,A bit heavy but very good after 6y,2020-10-09T18:42:04.000Z,France,Vosne-Romanée 2017,2017,4.3,91.500000,Albert Bichot,48,Vosne-Romanée,11,"[{'group': 'red_fruit', 'stats': {'count': 28,...",1,[Pinot Noir]
351764,1208798,5.0,"Very fruity, Elegant, expressive and Hint of O...",2020-03-19T19:13:30.000Z,France,Vosne-Romanée 2017,2017,4.3,91.500000,Albert Bichot,48,Vosne-Romanée,11,"[{'group': 'red_fruit', 'stats': {'count': 28,...",1,[Pinot Noir]


### Get Country Data

In [None]:
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0"}
url = 'https://www.vivino.com/'

# Get Cache key to get country codes and type of wines
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
script = soup.find('script', text = re.compile('var vivinoCacheKey'))
vivinoCacheKey = str(script).split('vivinoCacheKey = ')[-1].split(';')[0].replace("'",'').strip()

# Get countries
api_url = 'https://www.vivino.com/api/countries'
payload = {
    'cache_key':vivinoCacheKey}
countryData = requests.get(api_url, headers=headers, params=payload).json()['countries']
df = pd.json_normalize(countryData)
df.columns = ["country_code", "country", "native_name", "seo_name", "regions_count", "users_count", "wines_count", "wineries_count", "most_used_grapes", "currency_code", "currency_name", "currency_prefix", "currency_suffix"]
df.to_csv("asset/country.csv", index=False)
df.head()