In [1]:
import numpy as np
import pandas as pd
import swifter
import geopandas
from geodatasets import get_path
from ratelimit import limits, sleep_and_retry

from random_address import real_random_address, real_random_address_by_state
from faker import Faker

from time import perf_counter_ns as timer

import plotly.express as px

In [2]:
# load reviews
import kagglehub
import os

path = kagglehub.dataset_download("christopheiv/winemagdata130k")
fname = "winemag-data-130k-v2.csv"
reviews = pd.read_csv(os.path.join(path, fname), index_col=0)
reviews.info()

reviews

<class 'pandas.core.frame.DataFrame'>
Index: 129971 entries, 0 to 129970
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype  
---  ------                 --------------   -----  
 0   country                129908 non-null  object 
 1   description            129971 non-null  object 
 2   designation            92506 non-null   object 
 3   points                 129971 non-null  int64  
 4   price                  120975 non-null  float64
 5   province               129908 non-null  object 
 6   region_1               108724 non-null  object 
 7   region_2               50511 non-null   object 
 8   taster_name            103727 non-null  object 
 9   taster_twitter_handle  98758 non-null   object 
 10  title                  129971 non-null  object 
 11  variety                129970 non-null  object 
 12  winery                 129971 non-null  object 
dtypes: float64(1), int64(1), object(11)
memory usage: 13.9+ MB


Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia
1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos
2,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm
3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian
4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,Willamette Valley,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks
...,...,...,...,...,...,...,...,...,...,...,...,...,...
129966,Germany,Notes of honeysuckle and cantaloupe sweeten th...,Brauneberger Juffer-Sonnenuhr Spätlese,90,28.0,Mosel,,,Anna Lee C. Iijima,,Dr. H. Thanisch (Erben Müller-Burggraef) 2013 ...,Riesling,Dr. H. Thanisch (Erben Müller-Burggraef)
129967,US,Citation is given as much as a decade of bottl...,,90,75.0,Oregon,Oregon,Oregon Other,Paul Gregutt,@paulgwine,Citation 2004 Pinot Noir (Oregon),Pinot Noir,Citation
129968,France,Well-drained gravel soil gives this wine its c...,Kritt,90,30.0,Alsace,Alsace,,Roger Voss,@vossroger,Domaine Gresser 2013 Kritt Gewurztraminer (Als...,Gewürztraminer,Domaine Gresser
129969,France,"A dry style of Pinot Gris, this is crisp with ...",,90,32.0,Alsace,Alsace,,Roger Voss,@vossroger,Domaine Marcel Deiss 2012 Pinot Gris (Alsace),Pinot Gris,Domaine Marcel Deiss


In [3]:
# determine the top 5 countries
locales = reviews.groupby('country').count()[['winery']].rename(columns={'winery':'count'}).sort_values('count', ascending=False).head(5).copy().reset_index()
locales

Unnamed: 0,country,count
0,US,54504
1,France,22093
2,Italy,19540
3,Spain,6645
4,Portugal,5691


In [4]:
us_reviews = reviews[(reviews.country == 'US')&(reviews.province != 'America')&(reviews.province != 'Washington-Oregon')]
us_reviews.info()

<class 'pandas.core.frame.DataFrame'>
Index: 54402 entries, 2 to 129967
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   country                54402 non-null  object 
 1   description            54402 non-null  object 
 2   designation            36834 non-null  object 
 3   points                 54402 non-null  int64  
 4   price                  54163 non-null  float64
 5   province               54402 non-null  object 
 6   region_1               54226 non-null  object 
 7   region_2               50511 non-null  object 
 8   taster_name            37644 non-null  object 
 9   taster_twitter_handle  34663 non-null  object 
 10  title                  54402 non-null  object 
 11  variety                54402 non-null  object 
 12  winery                 54402 non-null  object 
dtypes: float64(1), int64(1), object(11)
memory usage: 5.8+ MB


In [5]:
s1 = set(us_reviews.sample(500).index)

In [6]:
s2 = set(us_reviews.sample(500).index)

while np.sum([s in s2 for s in s1]) < 10:
  s2 = set(us_reviews.sample(500).index)

In [7]:
print(np.sum([s in s2 for s in s1]), 'items in common to test caching')

11 items in common to test caching


In [8]:
us_reviews.loc[list(s1)].head()

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
49152,US,"A hot, rather brutal wine. High alcohol combin...",,84,30.0,California,Santa Ynez Valley,Central Coast,,,Carr 2007 Cabernet Franc (Santa Ynez Valley),Cabernet Franc,Carr
71685,US,"Fragrant in leather, lavender and licorice, th...",Reserve,86,28.0,California,Amador County,Sierra Foothills,Virginie Boone,@vboone,Amador Cellars 2010 Reserve G-S-M (Amador County),G-S-M,Amador Cellars
104458,US,Just 50 cases exist of this cellar-worthy bott...,Reserve,93,50.0,California,Sta. Rita Hills,Central Coast,Matt Kettmann,@mattkettmann,Point & Line 2014 Reserve Pinot Noir (Sta. Rit...,Pinot Noir,Point & Line
112653,US,"Dark for a rosé and rather full-bodied, this b...",Oasis,84,24.0,California,Paso Robles,Central Coast,,,Cass 2008 Oasis Rosé (Paso Robles),Rosé,Cass
16402,US,A directly appealing wine with little obvious ...,,87,11.0,Washington,Columbia Valley (WA),Columbia Valley,Sean P. Sullivan,@wawinereport,Bridgman 2012 Chardonnay (Columbia Valley (WA)),Chardonnay,Bridgman


In [9]:
# map of US state names to 2 letter state code
us_state_to_abbrev = {
    "Alabama": "AL",
    "Alaska": "AK",
    "Arizona": "AZ",
    "Arkansas": "AR",
    "California": "CA",
    "Colorado": "CO",
    "Connecticut": "CT",
    "Delaware": "DE",
    "Florida": "FL",
    "Georgia": "GA",
    "Hawaii": "HI",
    "Idaho": "ID",
    "Illinois": "IL",
    "Indiana": "IN",
    "Iowa": "IA",
    "Kansas": "KS",
    "Kentucky": "KY",
    "Louisiana": "LA",
    "Maine": "ME",
    "Maryland": "MD",
    "Massachusetts": "MA",
    "Michigan": "MI",
    "Minnesota": "MN",
    "Mississippi": "MS",
    "Missouri": "MO",
    "Montana": "MT",
    "Nebraska": "NE",
    "Nevada": "NV",
    "New Hampshire": "NH",
    "New Jersey": "NJ",
    "New Mexico": "NM",
    "New York": "NY",
    "North Carolina": "NC",
    "North Dakota": "ND",
    "Ohio": "OH",
    "Oklahoma": "OK",
    "Oregon": "OR",
    "Pennsylvania": "PA",
    "Rhode Island": "RI",
    "South Carolina": "SC",
    "South Dakota": "SD",
    "Tennessee": "TN",
    "Texas": "TX",
    "Utah": "UT",
    "Vermont": "VT",
    "Virginia": "VA",
    "Washington": "WA",
    "West Virginia": "WV",
    "Wisconsin": "WI",
    "Wyoming": "WY",
    "District of Columbia": "DC",
    "American Samoa": "AS",
    "Guam": "GU",
    "Northern Mariana Islands": "MP",
    "Puerto Rico": "PR",
    "United States Minor Outlying Islands": "UM",
    "Virgin Islands, U.S.": "VI",
}

In [10]:
no_address = []
found = []


def geocode(row):
  state = us_state_to_abbrev[row['province']]
  address = real_random_address_by_state(state)
  (no_address if len(address) == 0 else found).append(state)

  

In [11]:
us_reviews.loc[list(s1)].swifter.apply(geocode, axis=1)

Pandas Apply:   0%|          | 0/500 [00:00<?, ?it/s]

49152     None
71685     None
104458    None
112653    None
16402     None
          ... 
104435    None
12278     None
90104     None
24570     None
65534     None
Length: 500, dtype: object

In [12]:
np.unique(no_address)

array(['ID', 'MO', 'NY', 'OR', 'TX', 'VA', 'WA'], dtype='<U2')

In [13]:
any([s in found for s in np.unique(no_address)])

False

In [14]:
[real_random_address_by_state(s) for s in np.unique(no_address)]

[{}, {}, {}, {}, {}, {}, {}]

In [15]:
location_cols = ['winery', 'region_1', 'region_2', 'province', 'country']

In [16]:
# no location information
mask = (reviews.region_1.isna())&(reviews.region_2.isna())&(reviews.province.isna())&(reviews.country.isna())
reviews[location_cols][mask]

Unnamed: 0,winery,region_1,region_2,province,country
913,Gotsa Family Wines,,,,
3131,Barton & Guestier,,,,
4243,Kakhetia Traditional Winemaking,,,,
9509,Tsililis,,,,
9750,Ross-idi,,,,
...,...,...,...,...,...
124176,Les Frères Dutruy,,,,
129407,El Capricho,,,,
129408,El Capricho,,,,
129590,Büyülübağ,,,,


In [17]:
# only country information
mask = (reviews.country.notna())&(reviews.region_1.isna())&(reviews.region_2.isna())&(reviews.province.isna())
reviews[location_cols][mask]

Unnamed: 0,winery,region_1,region_2,province,country


In [18]:
# only province information
mask = (reviews.country.notna())&(reviews.province.notna()&(reviews.region_1.isna())&(reviews.region_2.isna()))
reviews[location_cols][mask]

Unnamed: 0,winery,region_1,region_2,province,country
1,Quinta dos Avidagos,,,Douro,Portugal
8,Heinz Eifel,,,Rheinhessen,Germany
15,Richard Böcking,,,Mosel,Germany
36,Estampa,,,Colchagua Valley,Chile
44,Sundance,,,Maule Valley,Chile
...,...,...,...,...,...
129956,Esk Valley,,,Hawke's Bay,New Zealand
129958,Babich,,,Hawke's Bay,New Zealand
129960,Caves Transmontanas,,,Douro,Portugal
129963,Dalton,,,Galilee,Israel


In [19]:
# location only contains country information, in redundant forms
def test(row):
  return (str(row.country) in str(row.province)) or (str(row.country) in str(row.region_1)) or (str(row.country) in str(row.region_2))

mask = reviews[location_cols].swifter.apply(test, axis=1)
reviews.loc[mask,['region_1','region_2','province']] = None
reviews.loc[mask,location_cols]

Pandas Apply:   0%|          | 0/129971 [00:00<?, ?it/s]

Unnamed: 0,winery,region_1,region_2,province,country
5,Tandem,,,,Spain
18,Pradorey,,,,Spain
38,Feudi di San Marzano,,,,Italy
61,Podere dal Nespoli,,,,Italy
72,Grifalco,,,,Italy
...,...,...,...,...,...
129900,Psagot,,,,
129902,Carlos Moro,,,,Spain
129930,Costaval,,,,Spain
129933,Bründlmayer,,,,Austria


In [20]:
def test(row):
  return (row.region_2 == row.region_1) or ('Other' in str(row.region_2))

mask = reviews.swifter.apply(test, axis=1)
reviews.loc[mask, 'region_2'] = None
reviews[mask]

Pandas Apply:   0%|          | 0/129971 [00:00<?, ?it/s]

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery
2,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm
4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks
5,Spain,Blackberry and raspberry aromas show a typical...,Ars In Vitro,87,15.0,,,,Michael Schachner,@wineschach,Tandem 2011 Ars In Vitro Tempranillo-Merlot (N...,Tempranillo-Merlot,Tandem
14,US,Building on 150 years and six generations of w...,,87,12.0,California,Central Coast,,Matt Kettmann,@mattkettmann,Mirassou 2012 Chardonnay (Central Coast),Chardonnay,Mirassou
18,Spain,"Desiccated blackberry, leather, charred wood a...",Vendimia Seleccionada Finca Valdelayegua Singl...,87,28.0,,,,Michael Schachner,@wineschach,Pradorey 2010 Vendimia Seleccionada Finca Vald...,Tempranillo Blend,Pradorey
...,...,...,...,...,...,...,...,...,...,...,...,...,...
129902,Spain,Dry apple and lemon aromas are clean and one s...,Finca Las Marcas Fermentado en Barrica,91,40.0,,,,Michael Schachner,@wineschach,Carlos Moro 2015 Finca Las Marcas Fermentado e...,Verdejo,Carlos Moro
129930,Spain,"Arguably on the heavy and rich side, but if yo...",Crianza,91,26.0,,,,Michael Schachner,@wineschach,Costaval 2005 Crianza (Ribera del Duero),Tempranillo Blend,Costaval
129933,Austria,Bründlmayer makes a particularly felicitous br...,Brut,91,46.0,,,,Roger Voss,@vossroger,Bründlmayer 2005 Brut White (Austria),White Blend,Bründlmayer
129957,Spain,Lightly baked berry aromas vie for attention w...,Crianza,90,17.0,,,,Michael Schachner,@wineschach,Viñedos Real Rubio 2010 Crianza (Rioja),Tempranillo Blend,Viñedos Real Rubio


In [21]:
def test(row):
  return (row.province is not None) and (row.region_1 == row.province) or ('Other' in str(row.region_1))

mask = (reviews.swifter.apply(test, axis=1))
reviews.loc[mask, 'region_1'] = None
reviews.loc[mask,location_cols]

Pandas Apply:   0%|          | 0/129971 [00:00<?, ?it/s]

Unnamed: 0,winery,region_1,region_2,province,country
7,Trimbach,,,Alsace,France
9,Jean-Baptiste Adam,,,Alsace,France
11,Leon Beyer,,,Alsace,France
19,Quiévremont,,,Virginia,US
20,Quiévremont,,,Virginia,US
...,...,...,...,...,...
129965,Domaine Rieflé-Landmann,,,Alsace,France
129967,Citation,,,Oregon,US
129968,Domaine Gresser,,,Alsace,France
129969,Domaine Marcel Deiss,,,Alsace,France


In [22]:
mask = (reviews.region_1.notna())&((reviews.region_1.str.contains('Vin Santo'))|(reviews.region_1.str.contains('Vin Doux')))
reviews.loc[mask, 'region_1'] = None
reviews.loc[mask, location_cols]

Unnamed: 0,winery,region_1,region_2,province,country
780,Castello d'Albola,,,Tuscany,Italy
3954,Fattoria di Casalbosco,,,Tuscany,Italy
3968,Pietro Beconcini,,,Tuscany,Italy
7335,Avignonesi,,,Tuscany,Italy
8587,I Veroni,,,Tuscany,Italy
10777,Fattoria di Grignano,,,Tuscany,Italy
11529,Fattoria di Basciano,,,Tuscany,Italy
18195,Felsina,,,Tuscany,Italy
19825,Fattoria di Grignano,,,Tuscany,Italy
21917,Lornano,,,Tuscany,Italy


In [23]:
np.unique(reviews[(reviews.region_1.notna())&(reviews.region_1.str.contains('Vin'))].region_1)

array(['Mâcon La Roche Vineuse', 'Mâcon-Vinzelles', 'Pouilly-Vinzelles',
       'Vin de Pays Cité de Carcassonne', 'Vin de Pays Var',
       "Vin de Pays d'Oc", "Vin de Pays de L'Aude",
       "Vin de Pays de L'Herault", 'Vin de Pays de Montferrand',
       'Vin de Pays de Vaucluse', "Vin de Pays de l'Atlantique",
       'Vin de Pays de la Haute Vallée du Gassac',
       'Vin de Pays des Alpilles', 'Vin de Pays des Coteaux de Bessilles',
       'Vin de Pays des Cévennes', 'Vin de Pays des Côtes Catalanes',
       'Vin de Pays des Maures', 'Vin de Pays du Gard',
       'Vin de Pays du Val de Cesse', 'Vin de Pays du Val de Loire',
       'Vino Nobile di Montepulciano',
       'Vino de la Tierra Altiplano de Sierra Nevada',
       'Vino de la Tierra Contraviesa Alpujarra',
       'Vino de la Tierra de Castelló', 'Vino de la Tierra de Cádiz'],
      dtype=object)

In [24]:
np.unique(reviews[((reviews.region_1).notna())&(not all(reviews.region_1.str.contains('Vin')))].region_1)

array(['Adelaida District', 'Agrelo', 'Alcamo', "Aleatico dell'Elba",
       'Alella', 'Alexander Valley', 'Alghero', 'Alicante',
       'Aloxe-Corton', 'Alpilles', 'Alpine Valleys', 'Alta Langa',
       'Alta Mesa', 'Alta Valle della Greve', 'Alto Valle del Río Negro',
       'Altos de Mendoza', 'Amador County', 'Amador-Napa',
       'Amarone della Valpolicella',
       'Amarone della Valpolicella Classico',
       'Amarone della Valpolicella Valpantena', 'Ancient Lakes',
       'Anderson Valley', 'Anjou', 'Anjou Villages',
       'Anjou Villages Brissac',
       'Antelope Valley of the California High Desert',
       'Applegate Valley', 'Ardèche', 'Arroyo Grande Valley',
       'Arroyo Seco', 'Asolo Prosecco Superiore', 'Asti', 'Atlas Peak',
       'Aude Hauterive', 'Augusta', 'Auxey-Duresses', 'Ballard Canyon',
       'Bandol', 'Banyuls', 'Barbaresco', "Barbera d'Alba",
       "Barbera d'Alba Superiore", "Barbera d'Asti",
       "Barbera d'Asti Superiore", "Barbera d'Asti Superiore 

In [25]:
np.unique(reviews[(reviews.region_2.notna())].region_2)

array(['Central Coast', 'Central Valley', 'Columbia Valley',
       'Finger Lakes', 'Long Island', 'Napa', 'Napa-Sonoma',
       'North Coast', 'Sierra Foothills', 'Sonoma', 'South Coast',
       'Southern Oregon', 'Willamette Valley'], dtype=object)

In [26]:
mask = reviews[(reviews.region_1.notna())|(reviews.region_2.notna())|reviews.province.notna()].groupby(['winery', 'country']).count()[['title']].rename(columns={'title': 'count'}).query('count == 1').index.to_list()
expected = len(mask)
mask

[('1070 Green', 'US'),
 ('13 Celsius', 'New Zealand'),
 ('1752 Signature Wines', 'France'),
 ('1789 Wines', 'US'),
 ('181', 'US'),
 ('2 Copas', 'Argentina'),
 ('24 Knots', 'US'),
 ('25 Lagunas', 'Argentina'),
 ('3 Badge Beverage', 'US'),
 ('3 Ball', 'US'),
 ('3 Spells', 'US'),
 ('3000 BC', 'South Africa'),
 ('31st State', 'US'),
 ('3CV', 'US'),
 ('428 Wines', 'US'),
 ('60 North', 'US'),
 ('60 Souls', 'US'),
 ('7 Heavenly Chards', 'US'),
 ('7 Peaks', 'US'),
 ('A Cellar Full of Noise', 'US'),
 ('A2O', 'Spain'),
 ('AJB Vineyards', 'US'),
 ('AZS', 'US'),
 ('Aaldering', 'South Africa'),
 ('Abadía da Cova', 'Spain'),
 ('Abbaye De Tholomies', 'France'),
 ('Abbaye Sylva Plana', 'France'),
 ('Abbey Creek', 'US'),
 ('Abbotts', 'France'),
 ('Abiouness', 'US'),
 ('Abraham Perold', 'South Africa'),
 ('Acaibo', 'US'),
 ('Acre', 'US'),
 ('Adalia', 'Italy'),
 ('Adega Algueira', 'Spain'),
 ('Adega Cooperativa da Batalha', 'Portugal'),
 ('Adega Cooperativa de Vila Real', 'Portugal'),
 ('Adega Northwest'

In [27]:
mask = reviews.swifter.apply(lambda row: (row.region_1 is not None or row.region_2 is not None or row.province is not None) and ((row.winery, row.country) in mask), axis=1)
reviews.loc[mask, 'q'] = reviews.loc[mask].winery

Pandas Apply:   0%|          | 0/129971 [00:00<?, ?it/s]

In [28]:
assert reviews[reviews.q.notna()].shape[0] == expected

In [29]:
mask = (reviews.q.isna())&(reviews.region_1.isna())&(reviews.region_2.isna())&(reviews.province.isna())
expected += reviews[mask].shape[0]
reviews.loc[mask,'q'] = reviews.loc[mask].winery
assert reviews[reviews.q.notna()].shape[0] == expected
reviews[reviews.q.notna()]

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery,q
5,Spain,Blackberry and raspberry aromas show a typical...,Ars In Vitro,87,15.0,,,,Michael Schachner,@wineschach,Tandem 2011 Ars In Vitro Tempranillo-Merlot (N...,Tempranillo-Merlot,Tandem,Tandem
18,Spain,"Desiccated blackberry, leather, charred wood a...",Vendimia Seleccionada Finca Valdelayegua Singl...,87,28.0,,,,Michael Schachner,@wineschach,Pradorey 2010 Vendimia Seleccionada Finca Vald...,Tempranillo Blend,Pradorey,Pradorey
38,Italy,"Inky in color, this wine has plump aromas of r...",I Tratturi,86,11.0,,,,,,Feudi di San Marzano 2011 I Tratturi Primitivo...,Primitivo,Feudi di San Marzano,Feudi di San Marzano
61,Italy,This densely hued wine has aromas of black plu...,Prugneto,86,17.0,,,,Kerin O’Keefe,@kerinokeefe,Podere dal Nespoli 2015 Prugneto Sangiovese (R...,Sangiovese,Podere dal Nespoli,Podere dal Nespoli
72,Italy,"Aromas of black-skinned fruit, leather, underb...",Daginestra,86,32.0,,,,Kerin O’Keefe,@kerinokeefe,Grifalco 2013 Daginestra (Aglianico del Vulture),Aglianico,Grifalco,Grifalco
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129902,Spain,Dry apple and lemon aromas are clean and one s...,Finca Las Marcas Fermentado en Barrica,91,40.0,,,,Michael Schachner,@wineschach,Carlos Moro 2015 Finca Las Marcas Fermentado e...,Verdejo,Carlos Moro,Carlos Moro
129917,Argentina,"This dark, meaty Malbec has some Agrelo fruit ...",Piedra Negra,91,40.0,Mendoza Province,Mendoza,,Michael Schachner,@wineschach,J. & F. Lurton 2004 Piedra Negra Malbec (Mendoza),Malbec,J. & F. Lurton,J. & F. Lurton
129930,Spain,"Arguably on the heavy and rich side, but if yo...",Crianza,91,26.0,,,,Michael Schachner,@wineschach,Costaval 2005 Crianza (Ribera del Duero),Tempranillo Blend,Costaval,Costaval
129933,Austria,Bründlmayer makes a particularly felicitous br...,Brut,91,46.0,,,,Roger Voss,@vossroger,Bründlmayer 2005 Brut White (Austria),White Blend,Bründlmayer,Bründlmayer


In [30]:
reviews[(reviews.q.notna())&(reviews.country == 'US')]

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery,q
252,US,"Shows the briary, brambly character of Foothil...",Seity,85,35.0,California,Amador County,Sierra Foothills,,,Cloud 9 2006 Seity Zinfandel (Amador County),Zinfandel,Cloud 9,Cloud 9
410,US,An elegant nose of violet and blackberry gives...,,85,20.0,New York,Finger Lakes,,Susan Kostrzewa,@suskostrzewa,Miles 2006 Cabernet Franc (Finger Lakes),Cabernet Franc,Miles,Miles
446,US,Fresh blueberry and grape aromas meet with bla...,,88,20.0,California,Paso Robles,Central Coast,Matt Kettmann,@mattkettmann,Flying Cloud 2014 Cabernet Sauvignon (Paso Rob...,Cabernet Sauvignon,Flying Cloud,Flying Cloud
583,US,This incredibly zesty and fruity bottling show...,Unfiltered Estate,89,32.0,California,Santa Cruz Mountains,Central Coast,Matt Kettmann,@mattkettmann,Silvertip 2014 Unfiltered Estate Pinot Noir (S...,Pinot Noir,Silvertip,Silvertip
724,US,"Peach, banana peel and a slight petrol note sh...",Mont Sec Vineyards,85,14.0,Texas,,,Alexander Peartree,,Mont Sec 2015 Mont Sec Vineyards Viognier (Texas),Viognier,Mont Sec,Mont Sec
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
128504,US,"This spicy, medium-bodied wine is heavily spri...",,89,25.0,California,,,Jim Gordon,@gordone_cellars,Sundae wines 2014 Pinot Noir (California),Pinot Noir,Sundae wines,Sundae wines
129301,US,This well-balanced and smooth wine is the debu...,Brut,90,22.0,California,,,Jim Gordon,@gordone_cellars,Louis Pommery NV Brut Sparkling (California),Sparkling Blend,Louis Pommery,Louis Pommery
129324,US,"Almost as pale in color as a rosé, this Pinot ...",Hidden Hills Vineyard,86,35.0,California,Monterey County,Central Coast,,,Carmel Hills Winery 2009 Hidden Hills Vineyard...,Pinot Noir,Carmel Hills Winery,Carmel Hills Winery
129736,US,This has the pretty slightly candied flavors o...,,90,22.0,Oregon,Willamette Valley,,Paul Gregutt,@paulgwine,Black Magnolia 2015 Pinot Noir (Willamette Val...,Pinot Noir,Black Magnolia,Black Magnolia


In [31]:
mask = (reviews.q.isna()) & (reviews.province.notna()) & (reviews.region_1.isna())& (reviews.region_2.isna())
expected += reviews[mask].shape[0]
reviews.loc[mask,'q'] = reviews.loc[mask].winery + ', ' + reviews.loc[mask].province
assert reviews[reviews.q.notna()].shape[0] == expected
reviews.loc[mask]

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery,q
1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos,"Quinta dos Avidagos, Douro"
7,France,This dry and restrained wine offers spice in p...,,87,24.0,Alsace,,,Roger Voss,@vossroger,Trimbach 2012 Gewurztraminer (Alsace),Gewürztraminer,Trimbach,"Trimbach, Alsace"
8,Germany,Savory dried thyme notes accent sunnier flavor...,Shine,87,12.0,Rheinhessen,,,Anna Lee C. Iijima,,Heinz Eifel 2013 Shine Gewürztraminer (Rheinhe...,Gewürztraminer,Heinz Eifel,"Heinz Eifel, Rheinhessen"
9,France,This has great depth of flavor with its fresh ...,Les Natures,87,27.0,Alsace,,,Roger Voss,@vossroger,Jean-Baptiste Adam 2012 Les Natures Pinot Gris...,Pinot Gris,Jean-Baptiste Adam,"Jean-Baptiste Adam, Alsace"
11,France,"This is a dry wine, very spicy, with a tight, ...",,87,30.0,Alsace,,,Roger Voss,@vossroger,Leon Beyer 2012 Gewurztraminer (Alsace),Gewürztraminer,Leon Beyer,"Leon Beyer, Alsace"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129966,Germany,Notes of honeysuckle and cantaloupe sweeten th...,Brauneberger Juffer-Sonnenuhr Spätlese,90,28.0,Mosel,,,Anna Lee C. Iijima,,Dr. H. Thanisch (Erben Müller-Burggraef) 2013 ...,Riesling,Dr. H. Thanisch (Erben Müller-Burggraef),"Dr. H. Thanisch (Erben Müller-Burggraef), Mosel"
129967,US,Citation is given as much as a decade of bottl...,,90,75.0,Oregon,,,Paul Gregutt,@paulgwine,Citation 2004 Pinot Noir (Oregon),Pinot Noir,Citation,"Citation, Oregon"
129968,France,Well-drained gravel soil gives this wine its c...,Kritt,90,30.0,Alsace,,,Roger Voss,@vossroger,Domaine Gresser 2013 Kritt Gewurztraminer (Als...,Gewürztraminer,Domaine Gresser,"Domaine Gresser, Alsace"
129969,France,"A dry style of Pinot Gris, this is crisp with ...",,90,32.0,Alsace,,,Roger Voss,@vossroger,Domaine Marcel Deiss 2012 Pinot Gris (Alsace),Pinot Gris,Domaine Marcel Deiss,"Domaine Marcel Deiss, Alsace"


In [32]:
reviews[reviews.q.notna()]

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery,q
1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos,"Quinta dos Avidagos, Douro"
5,Spain,Blackberry and raspberry aromas show a typical...,Ars In Vitro,87,15.0,,,,Michael Schachner,@wineschach,Tandem 2011 Ars In Vitro Tempranillo-Merlot (N...,Tempranillo-Merlot,Tandem,Tandem
7,France,This dry and restrained wine offers spice in p...,,87,24.0,Alsace,,,Roger Voss,@vossroger,Trimbach 2012 Gewurztraminer (Alsace),Gewürztraminer,Trimbach,"Trimbach, Alsace"
8,Germany,Savory dried thyme notes accent sunnier flavor...,Shine,87,12.0,Rheinhessen,,,Anna Lee C. Iijima,,Heinz Eifel 2013 Shine Gewürztraminer (Rheinhe...,Gewürztraminer,Heinz Eifel,"Heinz Eifel, Rheinhessen"
9,France,This has great depth of flavor with its fresh ...,Les Natures,87,27.0,Alsace,,,Roger Voss,@vossroger,Jean-Baptiste Adam 2012 Les Natures Pinot Gris...,Pinot Gris,Jean-Baptiste Adam,"Jean-Baptiste Adam, Alsace"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129966,Germany,Notes of honeysuckle and cantaloupe sweeten th...,Brauneberger Juffer-Sonnenuhr Spätlese,90,28.0,Mosel,,,Anna Lee C. Iijima,,Dr. H. Thanisch (Erben Müller-Burggraef) 2013 ...,Riesling,Dr. H. Thanisch (Erben Müller-Burggraef),"Dr. H. Thanisch (Erben Müller-Burggraef), Mosel"
129967,US,Citation is given as much as a decade of bottl...,,90,75.0,Oregon,,,Paul Gregutt,@paulgwine,Citation 2004 Pinot Noir (Oregon),Pinot Noir,Citation,"Citation, Oregon"
129968,France,Well-drained gravel soil gives this wine its c...,Kritt,90,30.0,Alsace,,,Roger Voss,@vossroger,Domaine Gresser 2013 Kritt Gewurztraminer (Als...,Gewürztraminer,Domaine Gresser,"Domaine Gresser, Alsace"
129969,France,"A dry style of Pinot Gris, this is crisp with ...",,90,32.0,Alsace,,,Roger Voss,@vossroger,Domaine Marcel Deiss 2012 Pinot Gris (Alsace),Pinot Gris,Domaine Marcel Deiss,"Domaine Marcel Deiss, Alsace"


In [33]:
mask = (reviews.q.isna())&(reviews.region_1.notna()|reviews.region_2.notna()|reviews.province.notna())&(reviews.country == 'US')
expected += reviews[mask].shape[0]
reviews.loc[mask,'q'] = reviews.loc[mask].apply(lambda row: ', '.join([row.winery, row.region_1 if row.region_1 is not None else row.region_2]), axis=1)
assert reviews[reviews.q.notna()].shape[0] == expected
reviews.loc[mask]

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery,q
2,US,"Tart and snappy, the flavors of lime flesh and...",,87,14.0,Oregon,Willamette Valley,,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm,"Rainstorm, Willamette Valley"
3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian,"St. Julian, Lake Michigan Shore"
4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks,"Sweet Cheeks, Willamette Valley"
10,US,"Soft, supple plum envelopes an oaky structure ...",Mountain Cuvée,87,19.0,California,Napa Valley,Napa,Virginie Boone,@vboone,Kirkland Signature 2011 Mountain Cuvée Caberne...,Cabernet Sauvignon,Kirkland Signature,"Kirkland Signature, Napa Valley"
12,US,"Slightly reduced, this wine offers a chalky, t...",,87,34.0,California,Alexander Valley,Sonoma,Virginie Boone,@vboone,Louis M. Martini 2012 Cabernet Sauvignon (Alex...,Cabernet Sauvignon,Louis M. Martini,"Louis M. Martini, Alexander Valley"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129942,US,This is classic in herbaceous aromas and flavo...,,90,35.0,California,Sonoma County,Sonoma,Virginie Boone,@vboone,Arrowood 2010 Cabernet Sauvignon (Sonoma County),Cabernet Sauvignon,Arrowood,"Arrowood, Sonoma County"
129945,US,Hailing from one of the more popular vineyards...,Jurassic Park Vineyard Old Vines,90,20.0,California,Santa Ynez Valley,Central Coast,Matt Kettmann,@mattkettmann,Birichino 2013 Jurassic Park Vineyard Old Vine...,Chenin Blanc,Birichino,"Birichino, Santa Ynez Valley"
129949,US,There's no bones about the use of oak in this ...,Barrel Fermented,90,35.0,California,Napa Valley,Napa,Virginie Boone,@vboone,Flora Springs 2013 Barrel Fermented Chardonnay...,Chardonnay,Flora Springs,"Flora Springs, Napa Valley"
129950,US,This opens with herbaceous dollops of thyme an...,Blocks 7 & 22,90,35.0,California,Napa Valley,Napa,Virginie Boone,@vboone,Hendry 2012 Blocks 7 & 22 Zinfandel (Napa Valley),Zinfandel,Hendry,"Hendry, Napa Valley"


In [34]:
mask = (reviews.q.isna())&(reviews.region_1.notna()|reviews.region_2.notna()|reviews.province.notna())&(reviews.country != 'US')
expected += reviews[mask].shape[0]
reviews.loc[mask,'q'] = reviews.loc[mask].apply(lambda row: ', '.join([row.winery, row.region_1 if row.region_1 is not None else row.region_2]), axis=1)
assert reviews[reviews.q.notna()].shape[0] == expected
reviews.loc[mask]

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery,q
0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia,"Nicosia, Etna"
6,Italy,"Here's a bright, informal red that opens with ...",Belsito,87,16.0,Sicily & Sardinia,Vittoria,,Kerin O’Keefe,@kerinokeefe,Terre di Giurfo 2013 Belsito Frappato (Vittoria),Frappato,Terre di Giurfo,"Terre di Giurfo, Vittoria"
13,Italy,This is dominated by oak and oak-driven aromas...,Rosso,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Masseria Setteporte 2012 Rosso (Etna),Nerello Mascalese,Masseria Setteporte,"Masseria Setteporte, Etna"
16,Argentina,"Baked plum, molasses, balsamic vinegar and che...",Felix,87,30.0,Other,Cafayate,,Michael Schachner,@wineschach,Felix Lavaque 2010 Felix Malbec (Cafayate),Malbec,Felix Lavaque,"Felix Lavaque, Cafayate"
17,Argentina,Raw black-cherry aromas are direct and simple ...,Winemaker Selection,87,13.0,Mendoza Province,Mendoza,,Michael Schachner,@wineschach,Gaucho Andino 2011 Winemaker Selection Malbec ...,Malbec,Gaucho Andino,"Gaucho Andino, Mendoza"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129943,Italy,"A blend of Nero d'Avola and Syrah, this convey...",Adènzia,90,29.0,Sicily & Sardinia,Sicilia,,Kerin O’Keefe,@kerinokeefe,Baglio del Cristo di Campobello 2012 Adènzia R...,Red Blend,Baglio del Cristo di Campobello,"Baglio del Cristo di Campobello, Sicilia"
129947,Italy,"A blend of 65% Cabernet Sauvignon, 30% Merlot ...",Symposio,90,20.0,Sicily & Sardinia,Terre Siciliane,,Kerin O’Keefe,@kerinokeefe,Feudo Principi di Butera 2012 Symposio Red (Te...,Red Blend,Feudo Principi di Butera,"Feudo Principi di Butera, Terre Siciliane"
129948,Argentina,Raspberry and cassis aromas are fresh and upri...,Pedernal,90,43.0,Other,San Juan,,Michael Schachner,@wineschach,Finca Las Moras 2010 Pedernal Malbec (San Juan),Malbec,Finca Las Moras,"Finca Las Moras, San Juan"
129961,Italy,"Intense aromas of wild cherry, baking spice, t...",,90,30.0,Sicily & Sardinia,Sicilia,,Kerin O’Keefe,@kerinokeefe,COS 2013 Frappato (Sicilia),Frappato,COS,"COS, Sicilia"


In [35]:
reviews.loc[reviews.designation.isna(), 'designation'] = reviews.loc[reviews.designation.isna()].variety
reviews.loc[reviews.designation.isna()]

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery,q
86909,Chile,"A chalky, dusty mouthfeel nicely balances this...",,88,17.0,Maipo Valley,,,,,Carmen 1999 (Maipo Valley),,Carmen,"Carmen, Maipo Valley"


In [36]:
reviews[(reviews.country == 'US') & (~reviews.province.isin(['America','Washington-Oregon']))]

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery,q
2,US,"Tart and snappy, the flavors of lime flesh and...",Pinot Gris,87,14.0,Oregon,Willamette Valley,,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm,"Rainstorm, Willamette Valley"
3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian,"St. Julian, Lake Michigan Shore"
4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks,"Sweet Cheeks, Willamette Valley"
10,US,"Soft, supple plum envelopes an oaky structure ...",Mountain Cuvée,87,19.0,California,Napa Valley,Napa,Virginie Boone,@vboone,Kirkland Signature 2011 Mountain Cuvée Caberne...,Cabernet Sauvignon,Kirkland Signature,"Kirkland Signature, Napa Valley"
12,US,"Slightly reduced, this wine offers a chalky, t...",Cabernet Sauvignon,87,34.0,California,Alexander Valley,Sonoma,Virginie Boone,@vboone,Louis M. Martini 2012 Cabernet Sauvignon (Alex...,Cabernet Sauvignon,Louis M. Martini,"Louis M. Martini, Alexander Valley"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129945,US,Hailing from one of the more popular vineyards...,Jurassic Park Vineyard Old Vines,90,20.0,California,Santa Ynez Valley,Central Coast,Matt Kettmann,@mattkettmann,Birichino 2013 Jurassic Park Vineyard Old Vine...,Chenin Blanc,Birichino,"Birichino, Santa Ynez Valley"
129949,US,There's no bones about the use of oak in this ...,Barrel Fermented,90,35.0,California,Napa Valley,Napa,Virginie Boone,@vboone,Flora Springs 2013 Barrel Fermented Chardonnay...,Chardonnay,Flora Springs,"Flora Springs, Napa Valley"
129950,US,This opens with herbaceous dollops of thyme an...,Blocks 7 & 22,90,35.0,California,Napa Valley,Napa,Virginie Boone,@vboone,Hendry 2012 Blocks 7 & 22 Zinfandel (Napa Valley),Zinfandel,Hendry,"Hendry, Napa Valley"
129952,US,This Zinfandel from the eastern section of Nap...,Zinfandel,90,22.0,California,Chiles Valley,Napa,Virginie Boone,@vboone,Houdini 2011 Zinfandel (Chiles Valley),Zinfandel,Houdini,"Houdini, Chiles Valley"


In [89]:
from collections import namedtuple

LOCATION = namedtuple('location', ['lat', 'lon', 'address'], defaults = [None]*3)

def geocode_winery(row):
  # Implement your mock logic here
  location = real_random_address_by_state(us_state_to_abbrev[row.province])
  if 'coordinates' in location:            
    lat, lon = location['coordinates'].values()
    # del location['coordinates']
    loc = LOCATION(lat, lon, location)
    # print(loc)
    return loc
  return LOCATION()


In [90]:

tmp = reviews[(reviews.country == 'US') & (~reviews.province.isin(['America','Washington-Oregon']))].sample(100)
tmp.info()
tmp

<class 'pandas.core.frame.DataFrame'>
Index: 100 entries, 83856 to 70179
Data columns (total 17 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   country                100 non-null    object 
 1   description            100 non-null    object 
 2   designation            100 non-null    object 
 3   points                 100 non-null    int64  
 4   price                  100 non-null    float64
 5   province               100 non-null    object 
 6   region_1               90 non-null     object 
 7   region_2               73 non-null     object 
 8   taster_name            66 non-null     object 
 9   taster_twitter_handle  62 non-null     object 
 10  title                  100 non-null    object 
 11  variety                100 non-null    object 
 12  winery                 100 non-null    object 
 13  q                      100 non-null    object 
 14  lat                    0 non-null      float64
 15  lon  

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery,q,lat,lon,address
83856,US,"An astonishing wine, creamy and loaded with lu...",Sigrid,95,80.0,Oregon,Willamette Valley,,Paul Gregutt,@paulgwine,Bergström 2009 Sigrid Chardonnay (Willamette V...,Chardonnay,Bergström,"Bergström, Willamette Valley",,,
44406,US,This is a Pinot-based refresher with substanti...,Nicoletta Rosé of,91,25.0,California,Napa Valley,Napa,Virginie Boone,@vboone,Levendi 2015 Nicoletta Rosé of Pinot Noir (Nap...,Pinot Noir,Levendi,"Levendi, Napa Valley",,,
93629,US,Coming largely from Canoe Ridge and Cold Creek...,Ethos Reserve,91,36.0,Washington,Columbia Valley (WA),Columbia Valley,Sean P. Sullivan,@wawinereport,Chateau Ste. Michelle 2013 Ethos Reserve Chard...,Chardonnay,Chateau Ste. Michelle,"Chateau Ste. Michelle, Columbia Valley (WA)",,,
94237,US,"Dramatic, powerful flavors of blackberries and...",Olive Hill Estate Vineyards Special Selection,94,100.0,California,Sonoma Valley,Sonoma,,,B.R. Cohn 2007 Olive Hill Estate Vineyards Spe...,Cabernet Sauvignon,B.R. Cohn,"B.R. Cohn, Sonoma Valley",,,
73042,US,"Clean and nice upfront, with blackberry aromas...",Reserve,91,45.0,California,Napa Valley,Napa,,,Rosenblum 1997 Reserve Cabernet Sauvignon (Nap...,Cabernet Sauvignon,Rosenblum,"Rosenblum, Napa Valley",,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22152,US,"Aromas of coffee, dried herbs, vanilla, flower...",Estate Riverbend Vineyard,91,34.0,Washington,Wahluke Slope,Columbia Valley,Sean P. Sullivan,@wawinereport,Fielding Hills 2013 Estate Riverbend Vineyard ...,Cabernet Franc,Fielding Hills,"Fielding Hills, Wahluke Slope",,,
59481,US,"Reduced and dominated by tannin and oak, this ...",Pinot Noir,84,29.0,California,Los Carneros,Napa-Sonoma,Virginie Boone,@vboone,Acacia 2014 Pinot Noir (Los Carneros),Pinot Noir,Acacia,"Acacia, Los Carneros",,,
101681,US,"Clean and crisp in acidity, with a rich overla...",Reserve,87,20.0,California,Paso Robles,Central Coast,,,EOS 2008 Reserve Chardonnay (Paso Robles),Chardonnay,EOS,"EOS, Paso Robles",,,
33788,US,"This lovely, layered wine from the Santa Ynez ...",Cuvée Sandre,94,68.0,California,Sta. Rita Hills,Central Coast,Matt Kettmann,@mattkettmann,Demetria 2014 Cuvée Sandre Pinot Noir (Sta. Ri...,Pinot Noir,Demetria,"Demetria, Sta. Rita Hills",,,


In [91]:
geoloc = pd.DataFrame(tmp.swifter.apply(geocode_winery, axis=1).to_list(), index=tmp.index)
geoloc.info()
geoloc

Pandas Apply:   0%|          | 0/100 [00:00<?, ?it/s]

<class 'pandas.core.frame.DataFrame'>
Index: 100 entries, 83856 to 70179
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   lat      71 non-null     float64
 1   lon      71 non-null     float64
 2   address  71 non-null     object 
dtypes: float64(2), object(1)
memory usage: 7.2+ KB


Unnamed: 0,lat,lon,address
83856,,,
44406,37.879266,-122.271533,"{'address1': '1532 Milvia Street', 'address2':..."
93629,,,
94237,37.390391,-122.060062,"{'address1': '152 Holly Court', 'address2': ''..."
73042,37.572724,-122.076515,"{'address1': '32512 Christine Drive', 'address..."
...,...,...,...
22152,,,
59481,37.153463,-119.648192,"{'address1': '49548 Road 200', 'address2': '',..."
101681,37.543515,-122.008143,"{'address1': '4876 Norris Road', 'address2': '..."
33788,37.858979,-122.275741,"{'address1': '2704 McGee Avenue', 'address2': ..."


In [92]:
pd.concat([tmp, geoloc], axis=1)

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery,q,lat,lon,address,lat.1,lon.1,address.1
83856,US,"An astonishing wine, creamy and loaded with lu...",Sigrid,95,80.0,Oregon,Willamette Valley,,Paul Gregutt,@paulgwine,Bergström 2009 Sigrid Chardonnay (Willamette V...,Chardonnay,Bergström,"Bergström, Willamette Valley",,,,,,
44406,US,This is a Pinot-based refresher with substanti...,Nicoletta Rosé of,91,25.0,California,Napa Valley,Napa,Virginie Boone,@vboone,Levendi 2015 Nicoletta Rosé of Pinot Noir (Nap...,Pinot Noir,Levendi,"Levendi, Napa Valley",,,,37.879266,-122.271533,"{'address1': '1532 Milvia Street', 'address2':..."
93629,US,Coming largely from Canoe Ridge and Cold Creek...,Ethos Reserve,91,36.0,Washington,Columbia Valley (WA),Columbia Valley,Sean P. Sullivan,@wawinereport,Chateau Ste. Michelle 2013 Ethos Reserve Chard...,Chardonnay,Chateau Ste. Michelle,"Chateau Ste. Michelle, Columbia Valley (WA)",,,,,,
94237,US,"Dramatic, powerful flavors of blackberries and...",Olive Hill Estate Vineyards Special Selection,94,100.0,California,Sonoma Valley,Sonoma,,,B.R. Cohn 2007 Olive Hill Estate Vineyards Spe...,Cabernet Sauvignon,B.R. Cohn,"B.R. Cohn, Sonoma Valley",,,,37.390391,-122.060062,"{'address1': '152 Holly Court', 'address2': ''..."
73042,US,"Clean and nice upfront, with blackberry aromas...",Reserve,91,45.0,California,Napa Valley,Napa,,,Rosenblum 1997 Reserve Cabernet Sauvignon (Nap...,Cabernet Sauvignon,Rosenblum,"Rosenblum, Napa Valley",,,,37.572724,-122.076515,"{'address1': '32512 Christine Drive', 'address..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22152,US,"Aromas of coffee, dried herbs, vanilla, flower...",Estate Riverbend Vineyard,91,34.0,Washington,Wahluke Slope,Columbia Valley,Sean P. Sullivan,@wawinereport,Fielding Hills 2013 Estate Riverbend Vineyard ...,Cabernet Franc,Fielding Hills,"Fielding Hills, Wahluke Slope",,,,,,
59481,US,"Reduced and dominated by tannin and oak, this ...",Pinot Noir,84,29.0,California,Los Carneros,Napa-Sonoma,Virginie Boone,@vboone,Acacia 2014 Pinot Noir (Los Carneros),Pinot Noir,Acacia,"Acacia, Los Carneros",,,,37.153463,-119.648192,"{'address1': '49548 Road 200', 'address2': '',..."
101681,US,"Clean and crisp in acidity, with a rich overla...",Reserve,87,20.0,California,Paso Robles,Central Coast,,,EOS 2008 Reserve Chardonnay (Paso Robles),Chardonnay,EOS,"EOS, Paso Robles",,,,37.543515,-122.008143,"{'address1': '4876 Norris Road', 'address2': '..."
33788,US,"This lovely, layered wine from the Santa Ynez ...",Cuvée Sandre,94,68.0,California,Sta. Rita Hills,Central Coast,Matt Kettmann,@mattkettmann,Demetria 2014 Cuvée Sandre Pinot Noir (Sta. Ri...,Pinot Noir,Demetria,"Demetria, Sta. Rita Hills",,,,37.858979,-122.275741,"{'address1': '2704 McGee Avenue', 'address2': ..."


In [93]:
addresses = pd.DataFrame(
  reviews[(reviews.country == 'US') & (~reviews.province.isin(['America','Washington-Oregon']))].swifter.apply(geocode_winery, axis=1).to_list(), 
  index=reviews[(reviews.country == 'US') & (~reviews.province.isin(['America','Washington-Oregon']))].index
)
addresses

Pandas Apply:   0%|          | 0/54402 [00:00<?, ?it/s]

Unnamed: 0,lat,lon,address
2,,,
3,,,
4,,,
10,37.694978,-122.131114,"{'address1': '15267 Hesperian Boulevard', 'add..."
12,37.727808,-122.152331,"{'address1': '424 Callan Avenue', 'address2': ..."
...,...,...,...
129945,37.849964,-122.266668,"{'address1': '725 65th Street', 'address2': ''..."
129949,37.153463,-119.648192,"{'address1': '49548 Road 200', 'address2': '',..."
129950,33.867356,-118.259771,"{'address1': '849 East Victoria Street', 'addr..."
129952,33.119461,-117.007959,"{'address1': '2335 Orchard View Lane', 'addres..."


In [94]:
pd.concat([reviews, addresses], axis=1)

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery,q,lat,lon,address,lat.1,lon.1,address.1
0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia,"Nicosia, Etna",,,,,,
1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos,"Quinta dos Avidagos, Douro",,,,,,
2,US,"Tart and snappy, the flavors of lime flesh and...",Pinot Gris,87,14.0,Oregon,Willamette Valley,,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm,"Rainstorm, Willamette Valley",,,,,,
3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian,"St. Julian, Lake Michigan Shore",,,,,,
4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks,"Sweet Cheeks, Willamette Valley",,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129966,Germany,Notes of honeysuckle and cantaloupe sweeten th...,Brauneberger Juffer-Sonnenuhr Spätlese,90,28.0,Mosel,,,Anna Lee C. Iijima,,Dr. H. Thanisch (Erben Müller-Burggraef) 2013 ...,Riesling,Dr. H. Thanisch (Erben Müller-Burggraef),"Dr. H. Thanisch (Erben Müller-Burggraef), Mosel",,,,,,
129967,US,Citation is given as much as a decade of bottl...,Pinot Noir,90,75.0,Oregon,,,Paul Gregutt,@paulgwine,Citation 2004 Pinot Noir (Oregon),Pinot Noir,Citation,"Citation, Oregon",,,,,,
129968,France,Well-drained gravel soil gives this wine its c...,Kritt,90,30.0,Alsace,,,Roger Voss,@vossroger,Domaine Gresser 2013 Kritt Gewurztraminer (Als...,Gewürztraminer,Domaine Gresser,"Domaine Gresser, Alsace",,,,,,
129969,France,"A dry style of Pinot Gris, this is crisp with ...",Pinot Gris,90,32.0,Alsace,,,Roger Voss,@vossroger,Domaine Marcel Deiss 2012 Pinot Gris (Alsace),Pinot Gris,Domaine Marcel Deiss,"Domaine Marcel Deiss, Alsace",,,,,,


In [95]:
reviews[(reviews.country == 'US') & (~reviews.province.isin(['America','Washington-Oregon']))].info()

<class 'pandas.core.frame.DataFrame'>
Index: 54402 entries, 2 to 129967
Data columns (total 17 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   country                54402 non-null  object 
 1   description            54402 non-null  object 
 2   designation            54402 non-null  object 
 3   points                 54402 non-null  int64  
 4   price                  54163 non-null  float64
 5   province               54402 non-null  object 
 6   region_1               49702 non-null  object 
 7   region_2               41084 non-null  object 
 8   taster_name            37644 non-null  object 
 9   taster_twitter_handle  34663 non-null  object 
 10  title                  54402 non-null  object 
 11  variety                54402 non-null  object 
 12  winery                 54402 non-null  object 
 13  q                      54402 non-null  object 
 14  lat                    0 non-null      float64
 15  lon   

In [97]:
import geopandas as gpd

gdf = gpd.GeoDataFrame(reviews, geometry=gpd.points_from_xy(reviews.lon, reviews.lat))
gdf.info()
gdf

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 129971 entries, 0 to 129970
Data columns (total 18 columns):
 #   Column                 Non-Null Count   Dtype   
---  ------                 --------------   -----   
 0   country                129908 non-null  object  
 1   description            129971 non-null  object  
 2   designation            129970 non-null  object  
 3   points                 129971 non-null  int64   
 4   price                  120975 non-null  float64 
 5   province               116120 non-null  object  
 6   region_1               86104 non-null   object  
 7   region_2               41084 non-null   object  
 8   taster_name            103727 non-null  object  
 9   taster_twitter_handle  98758 non-null   object  
 10  title                  129971 non-null  object  
 11  variety                129970 non-null  object  
 12  winery                 129971 non-null  object  
 13  q                      129971 non-null  object  
 14  lat              

Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,taster_name,taster_twitter_handle,title,variety,winery,q,lat,lon,address,geometry
0,Italy,"Aromas include tropical fruit, broom, brimston...",Vulkà Bianco,87,,Sicily & Sardinia,Etna,,Kerin O’Keefe,@kerinokeefe,Nicosia 2013 Vulkà Bianco (Etna),White Blend,Nicosia,"Nicosia, Etna",,,,POINT EMPTY
1,Portugal,"This is ripe and fruity, a wine that is smooth...",Avidagos,87,15.0,Douro,,,Roger Voss,@vossroger,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,Quinta dos Avidagos,"Quinta dos Avidagos, Douro",,,,POINT EMPTY
2,US,"Tart and snappy, the flavors of lime flesh and...",Pinot Gris,87,14.0,Oregon,Willamette Valley,,Paul Gregutt,@paulgwine,Rainstorm 2013 Pinot Gris (Willamette Valley),Pinot Gris,Rainstorm,"Rainstorm, Willamette Valley",,,,POINT EMPTY
3,US,"Pineapple rind, lemon pith and orange blossom ...",Reserve Late Harvest,87,13.0,Michigan,Lake Michigan Shore,,Alexander Peartree,,St. Julian 2013 Reserve Late Harvest Riesling ...,Riesling,St. Julian,"St. Julian, Lake Michigan Shore",,,,POINT EMPTY
4,US,"Much like the regular bottling from 2012, this...",Vintner's Reserve Wild Child Block,87,65.0,Oregon,Willamette Valley,,Paul Gregutt,@paulgwine,Sweet Cheeks 2012 Vintner's Reserve Wild Child...,Pinot Noir,Sweet Cheeks,"Sweet Cheeks, Willamette Valley",,,,POINT EMPTY
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129966,Germany,Notes of honeysuckle and cantaloupe sweeten th...,Brauneberger Juffer-Sonnenuhr Spätlese,90,28.0,Mosel,,,Anna Lee C. Iijima,,Dr. H. Thanisch (Erben Müller-Burggraef) 2013 ...,Riesling,Dr. H. Thanisch (Erben Müller-Burggraef),"Dr. H. Thanisch (Erben Müller-Burggraef), Mosel",,,,POINT EMPTY
129967,US,Citation is given as much as a decade of bottl...,Pinot Noir,90,75.0,Oregon,,,Paul Gregutt,@paulgwine,Citation 2004 Pinot Noir (Oregon),Pinot Noir,Citation,"Citation, Oregon",,,,POINT EMPTY
129968,France,Well-drained gravel soil gives this wine its c...,Kritt,90,30.0,Alsace,,,Roger Voss,@vossroger,Domaine Gresser 2013 Kritt Gewurztraminer (Als...,Gewürztraminer,Domaine Gresser,"Domaine Gresser, Alsace",,,,POINT EMPTY
129969,France,"A dry style of Pinot Gris, this is crisp with ...",Pinot Gris,90,32.0,Alsace,,,Roger Voss,@vossroger,Domaine Marcel Deiss 2012 Pinot Gris (Alsace),Pinot Gris,Domaine Marcel Deiss,"Domaine Marcel Deiss, Alsace",,,,POINT EMPTY


In [None]:
from ratelimit import limits, sleep_and_retry

# throttled variant of fast operation
@sleep_and_retry
@limits(calls=10, period=1)  # Adjust rate limits as needed
def throttled(row):
    return geocode_winery(row)


In [None]:
addresses = pd.DataFrame(
  reviews[(reviews.country == 'US') & (~reviews.province.isin(['America','Washington-Oregon']))].swifter.apply(throttled, axis=1).to_list(), 
  index=reviews[(reviews.country == 'US') & (~reviews.province.isin(['America','Washington-Oregon']))].index
)
addresses