In [1]:
# We are geo-coding the country, province and region_1 in Wine Data into latitude and longitude pair via Google Cloud Maps 
# API. Then for each location we are evaluating the Number of Varieties of Wine grown in that
#particular location. Finally we are visualizing the Wine hot-spots (number of varieties in each location) in Google
#Satellite Map of the World.

# Dependencies
import pandas as pd
import numpy as np
import gmaps
import requests
from config import  gkey
import time
# Configure gmaps
gmaps.configure(api_key=gkey)

In [2]:
wine_data = "winemag-data_first150k.csv"
wine_df = pd.read_csv(wine_data)
wine_df.head()

Unnamed: 0.1,Unnamed: 0,country,description,designation,points,price,province,region_1,region_2,variety,winery
0,0,US,This tremendous 100% varietal wine hails from ...,Martha's Vineyard,96,235.0,California,Napa Valley,Napa,Cabernet Sauvignon,Heitz
1,1,Spain,"Ripe aromas of fig, blackberry and cassis are ...",Carodorum Selección Especial Reserva,96,110.0,Northern Spain,Toro,,Tinta de Toro,Bodega Carmen Rodríguez
2,2,US,Mac Watson honors the memory of a wine once ma...,Special Selected Late Harvest,96,90.0,California,Knights Valley,Sonoma,Sauvignon Blanc,Macauley
3,3,US,"This spent 20 months in 30% new French oak, an...",Reserve,96,65.0,Oregon,Willamette Valley,Willamette Valley,Pinot Noir,Ponzi
4,4,France,"This is the top wine from La Bégude, named aft...",La Brûlade,95,66.0,Provence,Bandol,,Provence red blend,Domaine de la Bégude


In [3]:
wine_df2=wine_df.drop(columns= ["Unnamed: 0","region_2"], axis=1)
wine_df2.head()

Unnamed: 0,country,description,designation,points,price,province,region_1,variety,winery
0,US,This tremendous 100% varietal wine hails from ...,Martha's Vineyard,96,235.0,California,Napa Valley,Cabernet Sauvignon,Heitz
1,Spain,"Ripe aromas of fig, blackberry and cassis are ...",Carodorum Selección Especial Reserva,96,110.0,Northern Spain,Toro,Tinta de Toro,Bodega Carmen Rodríguez
2,US,Mac Watson honors the memory of a wine once ma...,Special Selected Late Harvest,96,90.0,California,Knights Valley,Sauvignon Blanc,Macauley
3,US,"This spent 20 months in 30% new French oak, an...",Reserve,96,65.0,Oregon,Willamette Valley,Pinot Noir,Ponzi
4,France,"This is the top wine from La Bégude, named aft...",La Brûlade,95,66.0,Provence,Bandol,Provence red blend,Domaine de la Bégude


In [4]:
dropped_wine= wine_df2.dropna()
dropped_wine.count()

country        77284
description    77284
designation    77284
points         77284
price          77284
province       77284
region_1       77284
variety        77284
winery         77284
dtype: int64

In [5]:
# We do not have city or zip codes, so we are going to geocode the area that is a 
#combination of region_1, province and the country
target_area = (dropped_wine["region_1"]+","+dropped_wine["province"]+","+dropped_wine["country"])
type(target_area)

pandas.core.series.Series

In [6]:
# Getting the series for unique varieties of Wine
unique_variety = dropped_wine["variety"]
type(unique_variety)

pandas.core.series.Series

In [8]:
#Creating a separate dataframe for Location and Wine Varities 
wv = pd.DataFrame({"Location": target_area,"Wine Variety": unique_variety})
wv_group = wv.groupby("Location")

#Counting the number of unique varities of wine for a particular location as defined by region_1,province and country
num_varieties = pd.DataFrame(wv_group["Wine Variety"].nunique()).reset_index()
#just sampling for testing purposes
#sample_num_varieties = num_varieties.sample(1000)
sample_num_varieties = num_varieties

In [9]:
#Geocoding the Location in this section via Google Maps API 
# create a params dict that will be updated with new city each iteration
params = {"key": gkey}
base_url = "https://maps.googleapis.com/maps/api/geocode/json"
# Loop through the locations and run a lat/long search for each city
for idx, row in sample_num_varieties.iterrows():
#    print(idx)
    sample_target = row["Location"]
    # update address key value
    params['address'] = f"{sample_target}"
    # make request
    location_lat_lng = requests.get(base_url, params=params)
    # convert to json
    location_lat_lng = location_lat_lng.json()
    try:
        sample_num_varieties.loc[idx, "Lat"] = location_lat_lng["results"][0]["geometry"]["location"]["lat"]
        sample_num_varieties.loc[idx, "Lng"] = location_lat_lng["results"][0]["geometry"]["location"]["lng"]
    except(KeyError, IndexError):
        print("Missing field/result... skipping.")
        continue
# Visualize to confirm lat lng appear
sample_num_varieties = sample_num_varieties.dropna()

Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/result... skipping.
Missing field/

In [10]:
sample_num_varieties

Unnamed: 0,Location,Wine Variety,Lat,Lng
0,"Abruzzo,Central Italy,Italy",3,42.192012,13.728917
1,"Adelaida District,California,US",10,35.645397,-120.874138
2,"Adelaide Hills,South Australia,Australia",16,-34.906256,138.839740
3,"Adelaide,South Australia,Australia",10,-34.928499,138.600746
5,"Aglianico del Taburno,Southern Italy,Italy",1,41.130873,14.647249
6,"Aglianico del Vulture,Southern Italy,Italy",1,40.925633,15.677552
7,"Agrelo,Mendoza Province,Argentina",9,-33.119401,-68.886894
8,"Albana di Romagna,Central Italy,Italy",1,44.596761,11.218640
9,"Albemarle County,Virginia,US",1,38.056709,-78.611500
10,"Alcamo,Sicily & Sardinia,Italy",2,37.978395,12.968625


In [13]:
#Creating google heat maps
num = sample_num_varieties["Wine Variety"].tolist()
# Create a map using state centroid coordinates to set markers
marker_locations = sample_num_varieties[['Lat', 'Lng']]
figure_layout = {
    'width': '1200px',
    'height': '1200px',
    'border': '1px solid black',
    'padding': '1px',
    'margin': '0 auto 0 auto'
}
fig = gmaps.figure(layout=figure_layout)


# Create a marker_layer using the poverty list to fill the info box
heat_layer = gmaps.heatmap_layer(marker_locations, weights=num, 
                                 dissipating=True, max_intensity=10,
                                point_radius=3.5)


fig.add_layer(heat_layer)
fig

Figure(layout=FigureLayout(border='1px solid black', height='1200px', margin='0 auto 0 auto', padding='1px', w…