In [1]:
# Dependencies to pull API
import requests
import json
# Google developer API key
from config import gkey

In [2]:
# Import Dependencies for Database
from config import password
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func, inspect
import psycopg2
import pandas as pd
import time

In [3]:
# Read in csv
location_wine_df = pd.read_csv("US_wine_data.csv")

In [4]:
# Display df columns
location_wine_df.columns

Index(['description', 'points', 'price', 'province', 'region_1', 'region_2',
       'title', 'variety', 'winery', 'price_bins'],
      dtype='object')

In [5]:
# Drop unnecessary columns and duplicates
region_df = location_wine_df.drop(columns = ['description', 'points', 'price', 'province',
       'region_2', 'title', 'variety', 'winery','price_bins'], axis = 1)
region_df = region_df.drop_duplicates()

In [6]:
# Create a region list
region_list = list(region_df['region_1'])
region_list

['Willamette Valley',
 'Lake Michigan Shore',
 'Napa Valley',
 'Alexander Valley',
 'Central Coast',
 'Virginia',
 'Oregon',
 'Paso Robles',
 'Sonoma Coast',
 'Clarksburg',
 'Dry Creek Valley',
 'Sonoma Valley',
 'McMinnville',
 'Lake County',
 'Monticello',
 'North Coast',
 'Columbia Valley (WA)',
 'Santa Ynez Valley',
 'California',
 'Howell Mountain',
 'Calistoga',
 'Eola-Amity Hills',
 'Ancient Lakes',
 'Knights Valley',
 'Sonoma County',
 'Finger Lakes',
 'Santa Clara Valley',
 'Yountville',
 'Rockpile',
 'Santa Lucia Highlands',
 'Spring Mountain District',
 'Monterey',
 'Carmel Valley',
 'Santa Cruz Mountains',
 'Edna Valley',
 'Russian River Valley',
 'Arroyo Seco',
 'Anderson Valley',
 'Nevada County',
 'Cayuga Lake',
 'Santa Barbara County',
 'Diamond Mountain District',
 'Mendocino',
 'Sonoma Mountain',
 'Amador County',
 'St. Helena',
 'Cole Ranch',
 'Santa Maria Valley',
 'Dundee Hills',
 'Arroyo Grande Valley',
 'Mendocino County',
 'Rutherford',
 'Walla Walla Valley (WA)

In [7]:
# Target valley
#region = "Sonoma Valley"

# Build the endpoint URL
valley_url = 'https://maps.googleapis.com/maps/api/place/findplacefromtext/json?inputtype=textquery&key='+ gkey



In [8]:
# Format regions to a string
regions = str(region_df)
regions.format

<function str.format>

In [9]:
# Create an empty list to hold the region data.
place_id_data = []
# Print the beginning of the logging.
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters.
record_count = 1
set_count = 1

Beginning Data Retrieval     
-----------------------------


In [10]:
# Loop through all the regions in the list.
for i, region in enumerate(region_list):

 # Group regions in sets of 25 for logging purposes.
    if (i % 25 == 0 and i >= 25):
        set_count += 1
        record_count = 1
    # Create endpoint URL with each region_id.
    placeid_url = valley_url + '&input='+ region
    

    # Add a one second interval between queries to stay within API query limits
    time.sleep(1)
    
    # Log the URL, record, and set numbers and the region.
    print(f"Processing Record {record_count} of Set {set_count} | {region}")
    # Add 1 to the record count.
    record_count += 1 
    
# Run an API request for each of the regions.
    try:
        # Parse the JSON and retrieve data.
        region_url = requests.get(placeid_url).json()
        # Parse out the needed data.
        place_id = region_url['candidates'][0]['place_id']
        # Append the region information into place_id_data list.
        place_id_data.append({"region": region, 
                              "Place_id": place_id})

# If an error is experienced, skip the region.
    except:
        print("region not found. Skipping...")
        pass



# Indicate that Data Loading is complete.
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")


Processing Record 1 of Set 1 | Willamette Valley
Processing Record 2 of Set 1 | Lake Michigan Shore
Processing Record 3 of Set 1 | Napa Valley
Processing Record 4 of Set 1 | Alexander Valley
Processing Record 5 of Set 1 | Central Coast
Processing Record 6 of Set 1 | Virginia
Processing Record 7 of Set 1 | Oregon
Processing Record 8 of Set 1 | Paso Robles
Processing Record 9 of Set 1 | Sonoma Coast
Processing Record 10 of Set 1 | Clarksburg
Processing Record 11 of Set 1 | Dry Creek Valley
region not found. Skipping...
Processing Record 12 of Set 1 | Sonoma Valley
Processing Record 13 of Set 1 | McMinnville
Processing Record 14 of Set 1 | Lake County
Processing Record 15 of Set 1 | Monticello
Processing Record 16 of Set 1 | North Coast
Processing Record 17 of Set 1 | Columbia Valley (WA)
Processing Record 18 of Set 1 | Santa Ynez Valley
Processing Record 19 of Set 1 | California
Processing Record 20 of Set 1 | Howell Mountain
Processing Record 21 of Set 1 | Calistoga
Processing Record 22

Processing Record 22 of Set 7 | Illinois
Processing Record 23 of Set 7 | Malibu-Newton Canyon
Processing Record 24 of Set 7 | Mount Harlan
Processing Record 25 of Set 7 | Sonoma-Napa-Mendocino
Processing Record 1 of Set 8 | San Francisco Bay
Processing Record 2 of Set 8 | Chelan County
Processing Record 3 of Set 8 | California Other
Processing Record 4 of Set 8 | Saddle Rock-Malibu
Processing Record 5 of Set 8 | Moon Mountain District Sonoma County
Processing Record 6 of Set 8 | Humboldt County
Processing Record 7 of Set 8 | Napa-Sonoma-Marin
region not found. Skipping...
Processing Record 8 of Set 8 | Santa Clara County
Processing Record 9 of Set 8 | Puget Sound
Processing Record 10 of Set 8 | Solano County Green Valley
Processing Record 11 of Set 8 | California-Oregon
Processing Record 12 of Set 8 | Clear Lake
Processing Record 13 of Set 8 | Guenoc Valley
Processing Record 14 of Set 8 | Napa County-Sonoma County-San Joaquin County
Processing Record 15 of Set 8 | Washington-Oregon
Pro

In [11]:
# Create place_id dataframe
placeid_df = pd.DataFrame(place_id_data)
placeid_df.head(10)

Unnamed: 0,region,Place_id
0,Willamette Valley,ChIJbezwJP6hlVQRBDL4exvIWSo
1,Lake Michigan Shore,ChIJbWjISvi2EIgR-XXUbfv_U1k
2,Napa Valley,ChIJMU8qI_lPhIARfMUQoxvSFP4
3,Alexander Valley,ChIJD2TljFMUhIARBOZe4vCoE7o
4,Central Coast,ChIJnShIABvyXIYRZF3DGodjgMQ
5,Virginia,ChIJzbK8vXDWTIgRlaZGt0lBTsA
6,Oregon,ChIJVWqfm3xuk1QRdrgLettlTH0
7,Paso Robles,ChIJvcTFFo_d7IAR79XMkfgilEY
8,Sonoma Coast,ChIJB8Hkg3mehoARNDJoq9RCScI
9,Clarksburg,ChIJd37rCctnSogR4EfMcM4e6V0


In [12]:
# Create place_id list
place_id_list = list(placeid_df['Place_id'])
place_id_list


['ChIJbezwJP6hlVQRBDL4exvIWSo',
 'ChIJbWjISvi2EIgR-XXUbfv_U1k',
 'ChIJMU8qI_lPhIARfMUQoxvSFP4',
 'ChIJD2TljFMUhIARBOZe4vCoE7o',
 'ChIJnShIABvyXIYRZF3DGodjgMQ',
 'ChIJzbK8vXDWTIgRlaZGt0lBTsA',
 'ChIJVWqfm3xuk1QRdrgLettlTH0',
 'ChIJvcTFFo_d7IAR79XMkfgilEY',
 'ChIJB8Hkg3mehoARNDJoq9RCScI',
 'ChIJd37rCctnSogR4EfMcM4e6V0',
 'ChIJI-UCgdlShIARPnjSSZd4foM',
 'ChIJo3PJF7dOlVQRSM9F2xtwRMo',
 'ChIJKdUvmscAD4gRZsVGNFFqkhs',
 'ChIJxVk-0XjKRIYRpN4Q6igZogM',
 'ChIJ91TWEw-1RIYR-SXBulV0iyk',
 'ChIJQyqk37W1RIYRnCG26gFyqjc',
 'ChIJH3v79pZT6YAR-MXTvZmd134',
 'ChIJPV4oX_65j4ARVW8IJ6IJUYs',
 'ChIJo4LFduhbhIARTBAsD475eGc',
 'ChIJkTJxhwlDhIARQO6YM2Hiiwc',
 'ChIJeSwhtQ9OlVQRr1RneWzpXDE',
 'ChIJUVu5cvx4mVQRb1CuMS_kcro',
 'ChIJqe38AQa1RIYRh8jExRIRmZc',
 'ChIJj7atfV0ihIAR_qYnBSATSUk',
 'ChIJF-IKCX_p0IkRO7yA5tLMpSA',
 'ChIJ4Zu4Go4xjoARNcsmHfjo_dI',
 'ChIJfRoJQE1VhIARTB6YA4IIzN0',
 'ChIJyyaMKuUcW4YR1JISPgJLcKM',
 'ChIJY2jxLkBukoARNMBojauvJF8',
 'ChIJ5e4J-wi1RIYR0VuNKIr8jbs',
 'ChIJkfu1cFLkjYARXj1K2AlJSO4',
 'ChIJvU

In [3]:
# Target valley
#region = "Sonoma Valley"

# Build the endpoint URL
valley_url2 = ('https://maps.googleapis.com/maps/api/place/details/json?&key='+ gkey)



In [14]:
# Create an empty list to hold the coord data.
coord_id_data = []
# Print the beginning of the logging.
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters.
record_count = 1
set_count = 1

Beginning Data Retrieval     
-----------------------------


In [15]:
# Loop through all the place ids in the list.
for i, placeids in enumerate(place_id_list):

 # Group place ids in sets of 25 for logging purposes.
    if (i % 25 == 0 and i >= 25):
        set_count += 1
        record_count = 1
    # Create endpoint URL with each region_id.
    coord_url = valley_url2 + '&place_id='+ placeids
    

    # Add a one second interval between queries to stay within API query limits
    time.sleep(1)
    
    # Log the URL, record, and set numbers and the place ids.
    print(f"Processing Record {record_count} of Set {set_count} | {placeids}")
    # Add 1 to the record count.
    record_count += 1 
    
# Run an API request for the place ids and retrieve the lat long info.
    try:
        # Parse the JSON and retrieve data.
        geo_url = requests.get(coord_url).json()
        # Parse out the needed data.
        lat_id = geo_url['result']['geometry']['location']['lat']
        long_id = geo_url['result']['geometry']['location']['lng']
        # Append the region information into place_id_data list.
        coord_id_data.append({"Place_id": placeids, 
                              "Latitude": lat_id,
                             "Longitude": long_id})
        


# If an error is experienced, skip the record.
    except:
        print("record not found. Skipping...")
        pass



# Indicate that Data Loading is complete.
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")



Processing Record 1 of Set 1 | ChIJbezwJP6hlVQRBDL4exvIWSo
Processing Record 2 of Set 1 | ChIJbWjISvi2EIgR-XXUbfv_U1k
Processing Record 3 of Set 1 | ChIJMU8qI_lPhIARfMUQoxvSFP4
Processing Record 4 of Set 1 | ChIJD2TljFMUhIARBOZe4vCoE7o
Processing Record 5 of Set 1 | ChIJnShIABvyXIYRZF3DGodjgMQ
Processing Record 6 of Set 1 | ChIJzbK8vXDWTIgRlaZGt0lBTsA
Processing Record 7 of Set 1 | ChIJVWqfm3xuk1QRdrgLettlTH0
Processing Record 8 of Set 1 | ChIJvcTFFo_d7IAR79XMkfgilEY
Processing Record 9 of Set 1 | ChIJB8Hkg3mehoARNDJoq9RCScI
Processing Record 10 of Set 1 | ChIJd37rCctnSogR4EfMcM4e6V0
Processing Record 11 of Set 1 | ChIJI-UCgdlShIARPnjSSZd4foM
Processing Record 12 of Set 1 | ChIJo3PJF7dOlVQRSM9F2xtwRMo
Processing Record 13 of Set 1 | ChIJKdUvmscAD4gRZsVGNFFqkhs
Processing Record 14 of Set 1 | ChIJxVk-0XjKRIYRpN4Q6igZogM
Processing Record 15 of Set 1 | ChIJ91TWEw-1RIYR-SXBulV0iyk
Processing Record 16 of Set 1 | ChIJQyqk37W1RIYRnCG26gFyqjc
Processing Record 17 of Set 1 | ChIJH3v79pZT6YAR-

Processing Record 14 of Set 6 | ChIJiewjJFxum1QR4B1v94YWsBI
Processing Record 15 of Set 6 | ChIJVzHjgcxKW4YRArZ3TXUc2K8
Processing Record 16 of Set 6 | ChIJhQd-sopHmoARt9CBUcuaJB4
Processing Record 17 of Set 6 | ChIJMU8qI_lPhIARfMUQoxvSFP4
Processing Record 18 of Set 6 | ChIJ82aOMnyhhYARHLRVUdyH0As
Processing Record 19 of Set 6 | ChIJqScrY5WkhoARlNUv2wg3tsA
Processing Record 20 of Set 6 | ChIJN3jyc4YakoAR3dCo-pWP7u8
Processing Record 21 of Set 6 | ChIJ9_fr2eEZkoARE_rvkQgeKQg
Processing Record 22 of Set 6 | ChIJI74a9VJi24ARqAY4oIgOvGg
Processing Record 23 of Set 6 | ChIJK0XprXRLW4YRZWyJ3baNsU0
Processing Record 24 of Set 6 | ChIJt1YYm3QUQIcR_6eQSTGDVMc
Processing Record 25 of Set 6 | ChIJ30PnwuKo14YRUhvNr6Cpals
Processing Record 1 of Set 7 | ChIJB9lyfRj5g4ARblOcfrmkRrg
Processing Record 2 of Set 7 | ChIJP-s5hlDSkYARr2Hy9ng7dKk
Processing Record 3 of Set 7 | ChIJmUlcX5OKwoARog_bRXgdajw
Processing Record 4 of Set 7 | ChIJJ8Jse77m7IARRg_vzsKQArw
Processing Record 5 of Set 7 | ChIJzR-XJnFMW

In [16]:
# Create a coordinate dataframe
coord_df = pd.DataFrame(coord_id_data)
coord_df.head(10)

Unnamed: 0,Place_id,Latitude,Longitude
0,ChIJbezwJP6hlVQRBDL4exvIWSo,44.942554,-122.933762
1,ChIJbWjISvi2EIgR-XXUbfv_U1k,42.22087,-86.369469
2,ChIJMU8qI_lPhIARfMUQoxvSFP4,38.502469,-122.265389
3,ChIJD2TljFMUhIARBOZe4vCoE7o,38.612965,-122.769435
4,ChIJnShIABvyXIYRZF3DGodjgMQ,29.444497,-98.322396
5,ChIJzbK8vXDWTIgRlaZGt0lBTsA,37.431573,-78.656894
6,ChIJVWqfm3xuk1QRdrgLettlTH0,43.804133,-120.554201
7,ChIJvcTFFo_d7IAR79XMkfgilEY,35.636876,-120.654502
8,ChIJB8Hkg3mehoARNDJoq9RCScI,38.424211,-123.101173
9,ChIJd37rCctnSogR4EfMcM4e6V0,39.280645,-80.344534


In [17]:
# Join the place_id and coord dataframes
region_df = pd.concat([placeid_df, coord_df], axis=1, join="inner")
    
region_df

Unnamed: 0,region,Place_id,Place_id.1,Latitude,Longitude
0,Willamette Valley,ChIJbezwJP6hlVQRBDL4exvIWSo,ChIJbezwJP6hlVQRBDL4exvIWSo,44.942554,-122.933762
1,Lake Michigan Shore,ChIJbWjISvi2EIgR-XXUbfv_U1k,ChIJbWjISvi2EIgR-XXUbfv_U1k,42.220870,-86.369469
2,Napa Valley,ChIJMU8qI_lPhIARfMUQoxvSFP4,ChIJMU8qI_lPhIARfMUQoxvSFP4,38.502469,-122.265389
3,Alexander Valley,ChIJD2TljFMUhIARBOZe4vCoE7o,ChIJD2TljFMUhIARBOZe4vCoE7o,38.612965,-122.769435
4,Central Coast,ChIJnShIABvyXIYRZF3DGodjgMQ,ChIJnShIABvyXIYRZF3DGodjgMQ,29.444497,-98.322396
...,...,...,...,...,...
244,Hawaii,ChIJBeB5Twbb_3sRKIbMdNKCd0s,ChIJBeB5Twbb_3sRKIbMdNKCd0s,19.896766,-155.582782
245,San Diego County,ChIJHWD_IzDr24ARKAeA6yv9DTU,ChIJHWD_IzDr24ARKAeA6yv9DTU,32.715730,-117.161097
246,Niagara Escarpment,ChIJ12YMXUm0AogRkMoAelhilAA,ChIJ12YMXUm0AogRkMoAelhilAA,44.718052,-87.595095
247,Sonoma-Napa-Lake,ChIJUWVAwmcGhYARBoANHIv70Os,ChIJUWVAwmcGhYARBoANHIv70Os,38.298327,-122.286570


In [18]:
# Drop Place_id column
region_df.drop(['Place_id'], axis=1)

Unnamed: 0,region,Latitude,Longitude
0,Willamette Valley,44.942554,-122.933762
1,Lake Michigan Shore,42.220870,-86.369469
2,Napa Valley,38.502469,-122.265389
3,Alexander Valley,38.612965,-122.769435
4,Central Coast,29.444497,-98.322396
...,...,...,...
244,Hawaii,19.896766,-155.582782
245,San Diego County,32.715730,-117.161097
246,Niagara Escarpment,44.718052,-87.595095
247,Sonoma-Napa-Lake,38.298327,-122.286570


In [19]:
# Create connection string
db_string = f"postgresql+psycopg2://postgres:" + password + "@127.0.0.1:5432/WineEnthusiast"


In [20]:
# Create the database engine
engine = create_engine(db_string)


In [21]:
# Save the region_df DataFrame to a SQL table called wineregions
region_df.to_sql(name='wineregions', con=engine)
