In [42]:
# Dependencies to pull API
import requests
import json
# Google developer API key
from config import gkey

In [43]:
# Import Dependencies for Database
from config import password
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func, inspect
import psycopg2
import pandas as pd
import time

In [44]:
db_string = f"postgresql+psycopg2://postgres:" + password + "@127.0.0.1:5432/WineEnthusiast"

In [45]:
engine = create_engine(db_string)
inspector = inspect(engine)
inspector.get_table_names()

['uswine', 'winedata', 'wine', 'winedata_ml', 'wineregions']

In [46]:
connection = engine.connect
session = Session(engine)
engine.execute("SELECT * from wine")

<sqlalchemy.engine.cursor.LegacyCursorResult at 0x26f772c3188>

In [47]:
col_names_list = []

for i in range(len(inspector.get_columns('wine'))):
    col_names_list.append(inspector.get_columns('wine')[i]['name'])
    
print(col_names_list)

['index', 'description', 'points', 'price', 'province', 'region_1', 'region_2', 'title', 'variety', 'winery']


In [48]:
location_wine_df = pd.DataFrame(columns = col_names_list)
location_wine_df.drop(columns = ['description', 'points', 'price', 'province',
       'region_2', 'title', 'variety', 'winery'], axis = 1)

Unnamed: 0,index,region_1


In [49]:
# Inner join for wines and regions
import sys
join_db = engine.execute("SELECT DISTINCT region_1 from wine")
for record in join_db:
    record_series = pd.Series((record), index = location_wine_df.columns)
    
    location_wine_df = location_wine_df.append(record_series, ignore_index=True, verify_integrity = True)
    

In [50]:
# Display df columns
location_wine_df

Unnamed: 0,index,description,points,price,province,region_1,region_2,title,variety,winery
0,,,,,,Russian River Valley,,,,
1,,,,,,Clements Hills,,,,
2,,,,,,Temecula Valley,,,,
3,,,,,,Columbia Valley (WA),,,,
4,,,,,,Nevada,,,,
...,...,...,...,...,...,...,...,...,...,...
261,,,,,,San Luis Obispo County,,,,
262,,,,,,Southeastern New England,,,,
263,,,,,,Marin County,,,,
264,,,,,,Texas,,,,


In [51]:
# Drop unnecessary columns and duplicates
region_df = location_wine_df.drop(columns = ['description', 'points', 'price', 'province',
       'region_2', 'title', 'variety', 'winery'], axis = 1)
region_df

Unnamed: 0,index,region_1
0,,Russian River Valley
1,,Clements Hills
2,,Temecula Valley
3,,Columbia Valley (WA)
4,,Nevada
...,...,...
261,,San Luis Obispo County
262,,Southeastern New England
263,,Marin County
264,,Texas


In [52]:
# Create a region list
region_list = list(region_df['region_1'])
region_list

['Russian River Valley',
 'Clements Hills',
 'Temecula Valley',
 'Columbia Valley (WA)',
 'Nevada',
 'Anderson Valley',
 'Solano County',
 'New Mexico',
 'Sonoma County-Monterey County-Napa County',
 'North Coast',
 'Puget Sound',
 'Fair Play',
 'Rutherford',
 'Washington',
 'Kentucky',
 'Sonoma County-Monterey County-Santa Barbara County',
 'Sonoma Mountain',
 'Monterey-Santa Barbara-Sonoma',
 'Calaveras County',
 'San Luis Obispo',
 'Eola-Amity Hills',
 'Sonoita',
 'Napa County-Lake County',
 'Mendocino Ridge',
 'Yolo County',
 'Los Carneros',
 'Napa County',
 'Red Hills Lake County',
 'Napa-Sonoma',
 'Central Coast',
 'Walla Walla Valley (WA)',
 'Elkton Oregon',
 'Oak Knoll District',
 'Madera',
 'California Other',
 'San Antonio Valley',
 'Ventura County',
 'Columbia Valley-Walla Walla Valley',
 'Santa Clara County',
 'Chehalem Mountains',
 'Napa Valley',
 'Chalone',
 'Napa-Amador',
 'Mount Harlan',
 'Cucamonga Valley',
 'Horse Heaven Hills',
 'Lake Chelan',
 'Old Mission Peninsula

In [53]:
# Target valley
#region = "Sonoma Valley"

# Build the endpoint URL
valley_url = 'https://maps.googleapis.com/maps/api/place/findplacefromtext/json?inputtype=textquery&key='+ gkey



In [54]:
# Format regions to a string
regions = str(region_df)
regions.format

<function str.format>

In [56]:
# Create an empty list to hold the region data.
place_id_data = []
# Print the beginning of the logging.
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters.
record_count = 1
set_count = 1

Beginning Data Retrieval     
-----------------------------


In [57]:
# Loop through all the regions in the list.
for i, region in enumerate(region_list):

 # Group regions in sets of 25 for logging purposes.
    if (i % 25 == 0 and i >= 25):
        set_count += 1
        record_count = 1
    # Create endpoint URL with each region_id.
    placeid_url = valley_url + '&input='+ region
    

    # Add a one second interval between queries to stay within API query limits
    time.sleep(1)
    
    # Log the URL, record, and set numbers and the region.
    print(f"Processing Record {record_count} of Set {set_count} | {region}")
    # Add 1 to the record count.
    record_count += 1 
    
# Run an API request for each of the regions.
    try:
        # Parse the JSON and retrieve data.
        region_url = requests.get(placeid_url).json()
        # Parse out the needed data.
        place_id = region_url['candidates'][0]['place_id']
        # Append the region information into place_id_data list.
        place_id_data.append({"region": region, 
                              "Place_id": place_id})

# If an error is experienced, skip the region.
    except:
        print("region not found. Skipping...")
        pass



# Indicate that Data Loading is complete.
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")


Processing Record 1 of Set 1 | Russian River Valley
Processing Record 2 of Set 1 | Clements Hills
Processing Record 3 of Set 1 | Temecula Valley
Processing Record 4 of Set 1 | Columbia Valley (WA)
Processing Record 5 of Set 1 | Nevada
Processing Record 6 of Set 1 | Anderson Valley
Processing Record 7 of Set 1 | Solano County
Processing Record 8 of Set 1 | New Mexico
Processing Record 9 of Set 1 | Sonoma County-Monterey County-Napa County
Processing Record 10 of Set 1 | North Coast
Processing Record 11 of Set 1 | Puget Sound
Processing Record 12 of Set 1 | Fair Play
Processing Record 13 of Set 1 | Rutherford
Processing Record 14 of Set 1 | Washington
Processing Record 15 of Set 1 | Kentucky
Processing Record 16 of Set 1 | Sonoma County-Monterey County-Santa Barbara County
Processing Record 17 of Set 1 | Sonoma Mountain
Processing Record 18 of Set 1 | Monterey-Santa Barbara-Sonoma
Processing Record 19 of Set 1 | Calaveras County
Processing Record 20 of Set 1 | San Luis Obispo
Processing 

Processing Record 14 of Set 7 | Oregon
Processing Record 15 of Set 7 | Naches Heights
Processing Record 16 of Set 7 | Idaho
Processing Record 17 of Set 7 | Coombsville
Processing Record 18 of Set 7 | Colorado
Processing Record 19 of Set 7 | America
Processing Record 20 of Set 7 | New Jersey
Processing Record 21 of Set 7 | Sonoma-Napa
Processing Record 22 of Set 7 | Saddle Rock-Malibu
Processing Record 23 of Set 7 | Willamette Valley
Processing Record 24 of Set 7 | Niagara Escarpment
Processing Record 25 of Set 7 | Lake Erie
Processing Record 1 of Set 8 | Monterey-San Luis Obispo-Santa Barbara
Processing Record 2 of Set 8 | Michigan
Processing Record 3 of Set 8 | Paicines
Processing Record 4 of Set 8 | Oregon Other
Processing Record 5 of Set 8 | Dundee Hills
Processing Record 6 of Set 8 | Suisun Valley
Processing Record 7 of Set 8 | Napa County-Sonoma County-San Joaquin County
Processing Record 8 of Set 8 | Napa County-Sonoma County
Processing Record 9 of Set 8 | California
Processing R

In [58]:
# Create place_id dataframe
placeid_df = pd.DataFrame(place_id_data)
placeid_df.head(10)

Unnamed: 0,region,Place_id
0,Russian River Valley,ChIJhddrQaV7gYAR0SLRbKP8lNc
1,Clements Hills,ChIJn5y8vLhKW4YRUDdxmXnvHf8
2,Temecula Valley,ChIJEdB50Ih_24AR40vEqTx5JWI
3,Columbia Valley (WA),ChIJQyqk37W1RIYRnCG26gFyqjc
4,Nevada,ChIJcbTe-KEKmYARs5X8qooDR88
5,Anderson Valley,ChIJa8v27oYSgYARRA6kQu3ru5s
6,Solano County,ChIJB_R9n88XhYAR-L8fjKYikj0
7,New Mexico,ChIJqVKY50NQGIcRup41Yxpuv0Y
8,Sonoma County-Monterey County-Napa County,ChIJvZ9cEKi1RIYRurdRu42k-uk
9,North Coast,ChIJ91TWEw-1RIYR-SXBulV0iyk


In [59]:
# Create place_id list
place_id_list = list(placeid_df['Place_id'])
place_id_list


['ChIJhddrQaV7gYAR0SLRbKP8lNc',
 'ChIJn5y8vLhKW4YRUDdxmXnvHf8',
 'ChIJEdB50Ih_24AR40vEqTx5JWI',
 'ChIJQyqk37W1RIYRnCG26gFyqjc',
 'ChIJcbTe-KEKmYARs5X8qooDR88',
 'ChIJa8v27oYSgYARRA6kQu3ru5s',
 'ChIJB_R9n88XhYAR-L8fjKYikj0',
 'ChIJqVKY50NQGIcRup41Yxpuv0Y',
 'ChIJvZ9cEKi1RIYRurdRu42k-uk',
 'ChIJ91TWEw-1RIYR-SXBulV0iyk',
 'ChIJ4TiXsFQ-kFQRAX7WBEamkDM',
 'ChIJhUdA46i1RIYR8bUJ_CNh_7E',
 'ChIJPWLoDgm1RIYRFDHnU7D-Guw',
 'ChIJ-bDD5__lhVQRuvNfbGh4QpQ',
 'ChIJyVMZi0xzQogR_N_MxU5vH3c',
 'ChIJ3WuezxjmjYAR17wDhRQoAkk',
 'ChIJoenF7NCyhYARakPvDUbpQQE',
 'ChIJvUHEkSzkjYARem7k3ShZ7lY',
 'ChIJgQMawEWokIARUF3VYX30ANw',
 'ChIJJ8Jse77m7IARRg_vzsKQArw',
 'ChIJeSwhtQ9OlVQRr1RneWzpXDE',
 'ChIJlw9tibqU1oYRLo9u4kwHPA8',
 'ChIJWYxlsL7whIARfR_OUuFQLHQ',
 'ChIJ67ZzfzRNgIARZVnxHr6t1E8',
 'ChIJ69wnuncnhYAR4jxmM3iXoXE',
 'ChIJNRmgFHlA6YAR9LRoUsOcDTs',
 'ChIJMU8qI_lPhIARfMUQoxvSFP4',
 'ChIJOZxxaAi1RIYRe3j7KpIprWo',
 'ChIJOZxxaAi1RIYRe3j7KpIprWo',
 'ChIJO_fQoEsVolQR1LEd58-vxBI',
 'ChIJk6HV2GtUwVQR5Irl6J4W7QI',
 'ChIJn6

In [60]:
# Target valley
#region = "Sonoma Valley"

# Build the endpoint URL
valley_url2 = ('https://maps.googleapis.com/maps/api/place/details/json?&key='+ gkey)



In [61]:
# Create an empty list to hold the coord data.
coord_id_data = []
# Print the beginning of the logging.
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters.
record_count = 1
set_count = 1

Beginning Data Retrieval     
-----------------------------


In [62]:
# Loop through all the place ids in the list.
for i, placeids in enumerate(place_id_list):

 # Group place ids in sets of 25 for logging purposes.
    if (i % 25 == 0 and i >= 25):
        set_count += 1
        record_count = 1
    # Create endpoint URL with each region_id.
    coord_url = valley_url2 + '&place_id='+ placeids
    

    # Add a one second interval between queries to stay within API query limits
    time.sleep(1)
    
    # Log the URL, record, and set numbers and the place ids.
    print(f"Processing Record {record_count} of Set {set_count} | {placeids}")
    # Add 1 to the record count.
    record_count += 1 
    
# Run an API request for the place ids and retrieve the lat long info.
    try:
        # Parse the JSON and retrieve data.
        geo_url = requests.get(coord_url).json()
        # Parse out the needed data.
        lat_id = geo_url['result']['geometry']['location']['lat']
        long_id = geo_url['result']['geometry']['location']['lng']
        # Append the region information into place_id_data list.
        coord_id_data.append({"Place_id": placeids, 
                              "Latitude": lat_id,
                             "Longitude": long_id})
        


# If an error is experienced, skip the record.
    except:
        print("record not found. Skipping...")
        pass



# Indicate that Data Loading is complete.
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")



Processing Record 1 of Set 1 | ChIJhddrQaV7gYAR0SLRbKP8lNc
Processing Record 2 of Set 1 | ChIJn5y8vLhKW4YRUDdxmXnvHf8
Processing Record 3 of Set 1 | ChIJEdB50Ih_24AR40vEqTx5JWI
Processing Record 4 of Set 1 | ChIJQyqk37W1RIYRnCG26gFyqjc
Processing Record 5 of Set 1 | ChIJcbTe-KEKmYARs5X8qooDR88
Processing Record 6 of Set 1 | ChIJa8v27oYSgYARRA6kQu3ru5s
Processing Record 7 of Set 1 | ChIJB_R9n88XhYAR-L8fjKYikj0
Processing Record 8 of Set 1 | ChIJqVKY50NQGIcRup41Yxpuv0Y
Processing Record 9 of Set 1 | ChIJvZ9cEKi1RIYRurdRu42k-uk
Processing Record 10 of Set 1 | ChIJ91TWEw-1RIYR-SXBulV0iyk
Processing Record 11 of Set 1 | ChIJ4TiXsFQ-kFQRAX7WBEamkDM
Processing Record 12 of Set 1 | ChIJhUdA46i1RIYR8bUJ_CNh_7E
Processing Record 13 of Set 1 | ChIJPWLoDgm1RIYRFDHnU7D-Guw
Processing Record 14 of Set 1 | ChIJ-bDD5__lhVQRuvNfbGh4QpQ
Processing Record 15 of Set 1 | ChIJyVMZi0xzQogR_N_MxU5vH3c
Processing Record 16 of Set 1 | ChIJ3WuezxjmjYAR17wDhRQoAkk
Processing Record 17 of Set 1 | ChIJoenF7NCyhYARa

Processing Record 14 of Set 6 | ChIJHWTSAsMRhYAR7nqXk1q6deI
Processing Record 15 of Set 6 | ChIJI74a9VJi24ARqAY4oIgOvGg
Processing Record 16 of Set 6 | ChIJ_fI5LWWsmoARS-gGEa8EBG0
Processing Record 17 of Set 6 | ChIJY2jxLkBukoARNMBojauvJF8
Processing Record 18 of Set 6 | ChIJsUP3Td8ixFQRyE51ASKNrd0
Processing Record 19 of Set 6 | ChIJU06mbyZpS4YRaxZrJLN9aSY
Processing Record 20 of Set 6 | ChIJV3SfjiVkXIYRFVb-fvmE_Rs
Processing Record 21 of Set 6 | ChIJiXWJGHVmhIARgRDJUsHC0z8
Processing Record 22 of Set 6 | ChIJOZxxaAi1RIYRe3j7KpIprWo
Processing Record 23 of Set 6 | ChIJHf28GvKL6IkRjrVcO0U2CJg
Processing Record 24 of Set 6 | ChIJ58L9WshnlVQRK_Ybl-Gu9TM
Processing Record 25 of Set 6 | ChIJZaOyhSfs0IkR7MRQ5ZLpRyQ
Processing Record 1 of Set 7 | ChIJkTJxhwlDhIARQO6YM2Hiiwc
Processing Record 2 of Set 7 | ChIJVWqfm3xuk1QRdrgLettlTH0
Processing Record 3 of Set 7 | ChIJyUnptFdil1QRG49Wjiuz6eY
Processing Record 4 of Set 7 | ChIJ6Znkhaj_WFMRWIf3FQUwa9A
Processing Record 5 of Set 7 | ChIJrQB7dGgGh

In [63]:
# Create a coordinate dataframe
coord_df = pd.DataFrame(coord_id_data)
coord_df.head(10)

Unnamed: 0,Place_id,Latitude,Longitude
0,ChIJhddrQaV7gYAR0SLRbKP8lNc,38.91198,-123.053708
1,ChIJn5y8vLhKW4YRUDdxmXnvHf8,30.271982,-97.800297
2,ChIJEdB50Ih_24AR40vEqTx5JWI,33.473917,-117.140587
3,ChIJQyqk37W1RIYRnCG26gFyqjc,30.282339,-97.742903
4,ChIJcbTe-KEKmYARs5X8qooDR88,38.80261,-116.419389
5,ChIJa8v27oYSgYARRA6kQu3ru5s,39.086566,-123.479454
6,ChIJB_R9n88XhYAR-L8fjKYikj0,38.310497,-121.901795
7,ChIJqVKY50NQGIcRup41Yxpuv0Y,34.51994,-105.87009
8,ChIJvZ9cEKi1RIYRurdRu42k-uk,30.263664,-97.740859
9,ChIJ91TWEw-1RIYR-SXBulV0iyk,30.265789,-97.748916


In [91]:
# Join the place_id and coord dataframes
region_df = pd.concat([placeid_df, coord_df], axis=1, join="inner")
    
region_df

Unnamed: 0,region,Place_id,Place_id.1,Latitude,Longitude
0,Russian River Valley,ChIJhddrQaV7gYAR0SLRbKP8lNc,ChIJhddrQaV7gYAR0SLRbKP8lNc,38.911980,-123.053708
1,Clements Hills,ChIJn5y8vLhKW4YRUDdxmXnvHf8,ChIJn5y8vLhKW4YRUDdxmXnvHf8,30.271982,-97.800297
2,Temecula Valley,ChIJEdB50Ih_24AR40vEqTx5JWI,ChIJEdB50Ih_24AR40vEqTx5JWI,33.473917,-117.140587
3,Columbia Valley (WA),ChIJQyqk37W1RIYRnCG26gFyqjc,ChIJQyqk37W1RIYRnCG26gFyqjc,30.282339,-97.742903
4,Nevada,ChIJcbTe-KEKmYARs5X8qooDR88,ChIJcbTe-KEKmYARs5X8qooDR88,38.802610,-116.419389
...,...,...,...,...,...
242,San Luis Obispo County,ChIJX3PwOP2E7IAR9ToVLk_oEGY,ChIJX3PwOP2E7IAR9ToVLk_oEGY,35.310230,-120.435763
243,Southeastern New England,ChIJpZX7V4BL5IkR4arfG9Insgw,ChIJpZX7V4BL5IkR4arfG9Insgw,41.698581,-71.479685
244,Marin County,ChIJ82aOMnyhhYARHLRVUdyH0As,ChIJ82aOMnyhhYARHLRVUdyH0As,38.083403,-122.763304
245,Texas,ChIJSTKCCzZwQIYRPN4IGI8c6xY,ChIJSTKCCzZwQIYRPN4IGI8c6xY,31.968599,-99.901813


In [92]:
# Drop Place_id column
region_df = region_df.drop(['Place_id'], axis=1)
region_df

Unnamed: 0,region,Latitude,Longitude
0,Russian River Valley,38.911980,-123.053708
1,Clements Hills,30.271982,-97.800297
2,Temecula Valley,33.473917,-117.140587
3,Columbia Valley (WA),30.282339,-97.742903
4,Nevada,38.802610,-116.419389
...,...,...,...
242,San Luis Obispo County,35.310230,-120.435763
243,Southeastern New England,41.698581,-71.479685
244,Marin County,38.083403,-122.763304
245,Texas,31.968599,-99.901813


In [93]:
# Create connection string
db_string = f"postgresql+psycopg2://postgres:" + password + "@127.0.0.1:5432/WineEnthusiast"


In [94]:
# Create the database engine
engine = create_engine(db_string)


In [96]:
# Save the region_df DataFrame to a SQL table called wineregions
region_df.to_sql(name='wineregions', con=engine)


In [97]:
region_df.to_csv("Data/region_df.csv", index=False)