In [14]:
# Dependencies to pull API
import requests
import json
# Google developer API key
from config import gkey

In [15]:
# Import Dependencies for Database
from config import password
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func, inspect
import psycopg2
import pandas as pd
import time

In [16]:
db_string = f"postgresql+psycopg2://postgres:" + password + "@127.0.0.1:5432/WineEnthusiast"

In [17]:
engine = create_engine(db_string)
inspector = inspect(engine)
inspector.get_table_names()

['wine', 'uswine', 'wineregions', 'winedata_ml']

In [18]:
connection = engine.connect
session = Session(engine)
engine.execute("SELECT * from uswine")

<sqlalchemy.engine.result.ResultProxy at 0x7fce9e62e460>

In [19]:
col_names_list = []

for i in range(len(inspector.get_columns('wine'))):
    col_names_list.append(inspector.get_columns('wine')[i]['name'])
    
print(col_names_list)

['index', 'description', 'points', 'price', 'province', 'region', 'title', 'variety', 'winery']


In [20]:
location_wine_df = pd.DataFrame(columns = col_names_list)
location_wine_df.drop(columns = ['description', 'points', 'price', 'province',
                                 'title', 'variety', 'winery'], axis = 1)

Unnamed: 0,index,region


In [21]:
# Inner join for wines and regions
import sys
join_db = engine.execute("SELECT DISTINCT region from uswine")
for record in join_db:
    record_series = pd.Series((record), index = location_wine_df.columns)
    
    location_wine_df = location_wine_df.append(record_series, ignore_index=True, verify_integrity = True)
    

In [22]:
# Display df columns
location_wine_df

Unnamed: 0,index,description,points,price,province,region,title,variety,winery
0,,,,,,Paso Robles Highlands District,,,
1,,,,,,Lake County,,,
2,,,,,,Central Valley,,,
3,,,,,,Santa Clara Valley,,,
4,,,,,,Russian River Valley,,,
...,...,...,...,...,...,...,...,...,...
205,,,,,,Monterey County,,,
206,,,,,,San Luis Obispo County,,,
207,,,,,,Marin County,,,
208,,,,,,Templeton Gap District,,,


In [23]:
# Drop unnecessary columns and duplicates
region_df = location_wine_df.drop(columns = ['index','description', 'points', 'price', 'province',
                                             'title', 'variety', 'winery'], axis = 1)
region_df

Unnamed: 0,region
0,Paso Robles Highlands District
1,Lake County
2,Central Valley
3,Santa Clara Valley
4,Russian River Valley
...,...
205,Monterey County
206,San Luis Obispo County
207,Marin County
208,Templeton Gap District


In [24]:
# Create a region list
region_list = list(region_df['region'])
region_list

['Paso Robles Highlands District',
 'Lake County',
 'Central Valley',
 'Santa Clara Valley',
 'Russian River Valley',
 'Clements Hills',
 'Temecula Valley',
 'Sonoma County-Monterey County',
 'California-Oregon',
 'Napa-Mendocino-Sonoma',
 'Anderson Valley',
 'Solano County',
 'Ancient Lakes',
 'Yakima Valley',
 'Atlas Peak',
 'Polk County',
 'Lime Kiln Valley',
 'Sonoma County-Monterey County-Napa County',
 'Columbia Valley ',
 'North Coast',
 'Puget Sound',
 'Fair Play',
 'Rutherford',
 'Washington',
 'Malibu-Newton Canyon',
 'Sonoma County-Monterey County-Santa Barbara County',
 'Sonoma Mountain',
 'Monterey-Santa Barbara-Sonoma',
 'Alta Mesa',
 'Malibu Coast',
 'Sonoma County-Napa County',
 'Rogue Valley',
 'Calaveras County',
 'El Dorado',
 'Santa Ynez Valley',
 'Solano County Green Valley',
 'San Luis Obispo',
 'Temecula',
 'Eola-Amity Hills',
 'Mokelumne River',
 'Santa Lucia Highlands',
 'Napa County-Lake County',
 'Umpqua Valley',
 'Amador-Napa',
 'Guenoc Valley',
 'Mendocino 

In [25]:
# Target valley
#region = "Sonoma Valley"

# Build the endpoint URL
valley_url = 'https://maps.googleapis.com/maps/api/place/findplacefromtext/json?inputtype=textquery&key='+ gkey



In [26]:
# Format regions to a string
regions = str(region_df)
regions.format

<function str.format>

In [27]:
# Create an empty list to hold the region data.
place_id_data = []
# Print the beginning of the logging.
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters.
record_count = 1
set_count = 1

Beginning Data Retrieval     
-----------------------------


In [28]:
# Loop through all the regions in the list.
for i, region in enumerate(region_list):

 # Group regions in sets of 50 for logging purposes.
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 1
    # Create endpoint URL with each region_id.
    placeid_url = valley_url + '&input='+ region
    

    # Add a one second interval between queries to stay within API query limits
    time.sleep(1)
    
    # Log the URL, record, and set numbers and the region.
    print(f"Processing Record {record_count} of Set {set_count} | {region}")
    # Add 1 to the record count.
    record_count += 1 
    
# Run an API request for each of the regions.
    try:
        # Parse the JSON and retrieve data.
        region_url = requests.get(placeid_url).json()
        # Parse out the needed data.
        place_id = region_url['candidates'][0]['place_id']
        # Append the region information into place_id_data list.
        place_id_data.append({"region": region, 
                              "Place_id": place_id})

# If an error is experienced, skip the region.
    except:
        print("region not found. Skipping...")
        pass



# Indicate that Data Loading is complete.
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")


Processing Record 1 of Set 1 | Paso Robles Highlands District
Processing Record 2 of Set 1 | Lake County
Processing Record 3 of Set 1 | Central Valley
Processing Record 4 of Set 1 | Santa Clara Valley
Processing Record 5 of Set 1 | Russian River Valley
Processing Record 6 of Set 1 | Clements Hills
Processing Record 7 of Set 1 | Temecula Valley
Processing Record 8 of Set 1 | Sonoma County-Monterey County
region not found. Skipping...
Processing Record 9 of Set 1 | California-Oregon
Processing Record 10 of Set 1 | Napa-Mendocino-Sonoma
Processing Record 11 of Set 1 | Anderson Valley
Processing Record 12 of Set 1 | Solano County
Processing Record 13 of Set 1 | Ancient Lakes
Processing Record 14 of Set 1 | Yakima Valley
Processing Record 15 of Set 1 | Atlas Peak
Processing Record 16 of Set 1 | Polk County
Processing Record 17 of Set 1 | Lime Kiln Valley
Processing Record 18 of Set 1 | Sonoma County-Monterey County-Napa County
Processing Record 19 of Set 1 | Columbia Valley 
Processing Reco

Processing Record 8 of Set 4 | Ballard Canyon
Processing Record 9 of Set 4 | Carneros
Processing Record 10 of Set 4 | El Pomar District
Processing Record 11 of Set 4 | Cienega Valley
Processing Record 12 of Set 4 | Ramona Valley
region not found. Skipping...
Processing Record 13 of Set 4 | Chelan County
Processing Record 14 of Set 4 | Potter Valley
Processing Record 15 of Set 4 | Sonoma County-Santa Barbara County
Processing Record 16 of Set 4 | Napa
Processing Record 17 of Set 4 | Sta. Rita Hills
Processing Record 18 of Set 4 | Cole Ranch
Processing Record 19 of Set 4 | Yountville
Processing Record 20 of Set 4 | Chalk Hill
Processing Record 21 of Set 4 | Sonoma Coast
Processing Record 22 of Set 4 | San Bernabe
Processing Record 23 of Set 4 | Yorkville Highlands
Processing Record 24 of Set 4 | Mendocino-Lake
Processing Record 25 of Set 4 | Stags Leap District
Processing Record 26 of Set 4 | McMinnville
Processing Record 27 of Set 4 | Clarksburg
Processing Record 28 of Set 4 | Red Mount

In [29]:
# Create place_id dataframe
placeid_df = pd.DataFrame(place_id_data)
placeid_df.head(10)

Unnamed: 0,region,Place_id
0,Paso Robles Highlands District,ChIJ1xNSFmTBQIYRBDAiMLUJ-nI
1,Lake County,ChIJKdUvmscAD4gRZsVGNFFqkhs
2,Central Valley,ChIJyZkVOx6DgoARU5w0SCOkBvY
3,Santa Clara Valley,ChIJ4Zu4Go4xjoARNcsmHfjo_dI
4,Russian River Valley,ChIJhddrQaV7gYAR0SLRbKP8lNc
5,Clements Hills,ChIJLyGI6pDQdUgRApb5vVVHUw4
6,Temecula Valley,ChIJfYKgDA5_24ARmJsb0bYUFmw
7,California-Oregon,ChIJjUnLoHnJnIARkjPe0zv-PkA
8,Napa-Mendocino-Sonoma,ChIJ3_Uwla-uhYARAAmNhPiOav0
9,Anderson Valley,ChIJa8v27oYSgYARRA6kQu3ru5s


In [30]:
# Create place_id list
place_id_list = list(placeid_df['Place_id'])
place_id_list


['ChIJ1xNSFmTBQIYRBDAiMLUJ-nI',
 'ChIJKdUvmscAD4gRZsVGNFFqkhs',
 'ChIJyZkVOx6DgoARU5w0SCOkBvY',
 'ChIJ4Zu4Go4xjoARNcsmHfjo_dI',
 'ChIJhddrQaV7gYAR0SLRbKP8lNc',
 'ChIJLyGI6pDQdUgRApb5vVVHUw4',
 'ChIJfYKgDA5_24ARmJsb0bYUFmw',
 'ChIJjUnLoHnJnIARkjPe0zv-PkA',
 'ChIJ3_Uwla-uhYARAAmNhPiOav0',
 'ChIJa8v27oYSgYARRA6kQu3ru5s',
 'ChIJB_R9n88XhYAR-L8fjKYikj0',
 'ChIJUVu5cvx4mVQRb1CuMS_kcro',
 'ChIJtwIWxMKHl1QR30xHsvUB3dI',
 'ChIJW_s0SRvHQIYREXn0DXARk9U',
 'ChIJ0bDiw-jyOIYRPrqcrZnzyQc',
 'ChIJL4EPzdHbt4kRyUilrlIwE00',
 'ChIJ1xNSFmTBQIYRBDAiMLUJ-nI',
 'ChIJ98VFkPPAQIYR75ugr82kemY',
 'ChIJqfWOgUHBQIYRZxQ6ObUFWuo',
 'ChIJ4TiXsFQ-kFQRAX7WBEamkDM',
 'ChIJVxVoJMmIWIgROSgV3SYKqi8',
 'ChIJkUqt1aP4wokRAB032V1oBsw',
 'ChIJ-bDD5__lhVQRuvNfbGh4QpQ',
 'ChIJPe2rmfci6IARPNp63LhdpgQ',
 'ChIJ1xNSFmTBQIYRBDAiMLUJ-nI',
 'ChIJoenF7NCyhYARakPvDUbpQQE',
 'ChIJ1xNSFmTBQIYRBDAiMLUJ-nI',
 'ChIJ8WejwhXZQIYRwKRHGSX22W0',
 'ChIJP9YI-akd6IARWozqsnEtt5M',
 'ChIJg3cxDPaHxVQRde0DiGnDILs',
 'ChIJgQMawEWokIARUF3VYX30ANw',
 'ChIJ0Y

In [31]:
# Target valley
#region = "Sonoma Valley"

# Build the endpoint URL
valley_url2 = ('https://maps.googleapis.com/maps/api/place/details/json?&key='+ gkey)



In [32]:
# Create an empty list to hold the coord data.
coord_id_data = []
# Print the beginning of the logging.
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters.
record_count = 1
set_count = 1

Beginning Data Retrieval     
-----------------------------


In [33]:
# Loop through all the place ids in the list.
for i, placeids in enumerate(place_id_list):

 # Group place ids in sets of 25 for logging purposes.
    if (i % 25 == 0 and i >= 25):
        set_count += 1
        record_count = 1
    # Create endpoint URL with each region_id.
    coord_url = valley_url2 + '&place_id='+ placeids
    

    # Add a one second interval between queries to stay within API query limits
    time.sleep(1)
    
    # Log the URL, record, and set numbers and the place ids.
    print(f"Processing Record {record_count} of Set {set_count} | {placeids}")
    # Add 1 to the record count.
    record_count += 1 
    
# Run an API request for the place ids and retrieve the lat long info.
    try:
        # Parse the JSON and retrieve data.
        geo_url = requests.get(coord_url).json()
        # Parse out the needed data.
        lat_id = geo_url['result']['geometry']['location']['lat']
        long_id = geo_url['result']['geometry']['location']['lng']
        # Append the region information into place_id_data list.
        coord_id_data.append({"Place_id": placeids, 
                              "Latitude": lat_id,
                             "Longitude": long_id})
        


# If an error is experienced, skip the record.
    except:
        print("record not found. Skipping...")
        pass



# Indicate that Data Loading is complete.
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")



Processing Record 1 of Set 1 | ChIJ1xNSFmTBQIYRBDAiMLUJ-nI
Processing Record 2 of Set 1 | ChIJKdUvmscAD4gRZsVGNFFqkhs
Processing Record 3 of Set 1 | ChIJyZkVOx6DgoARU5w0SCOkBvY
Processing Record 4 of Set 1 | ChIJ4Zu4Go4xjoARNcsmHfjo_dI
Processing Record 5 of Set 1 | ChIJhddrQaV7gYAR0SLRbKP8lNc
Processing Record 6 of Set 1 | ChIJLyGI6pDQdUgRApb5vVVHUw4
Processing Record 7 of Set 1 | ChIJfYKgDA5_24ARmJsb0bYUFmw
Processing Record 8 of Set 1 | ChIJjUnLoHnJnIARkjPe0zv-PkA
Processing Record 9 of Set 1 | ChIJ3_Uwla-uhYARAAmNhPiOav0
Processing Record 10 of Set 1 | ChIJa8v27oYSgYARRA6kQu3ru5s
Processing Record 11 of Set 1 | ChIJB_R9n88XhYAR-L8fjKYikj0
Processing Record 12 of Set 1 | ChIJUVu5cvx4mVQRb1CuMS_kcro
Processing Record 13 of Set 1 | ChIJtwIWxMKHl1QR30xHsvUB3dI
Processing Record 14 of Set 1 | ChIJW_s0SRvHQIYREXn0DXARk9U
Processing Record 15 of Set 1 | ChIJ0bDiw-jyOIYRPrqcrZnzyQc
Processing Record 16 of Set 1 | ChIJL4EPzdHbt4kRyUilrlIwE00
Processing Record 17 of Set 1 | ChIJ1xNSFmTBQIYRB

Processing Record 14 of Set 6 | ChIJNTjFJtmhmoARRuhsu5jkrY4
Processing Record 15 of Set 6 | ChIJj7atfV0ihIAR_qYnBSATSUk
Processing Record 16 of Set 6 | ChIJvcTFFo_d7IAR79XMkfgilEY
Processing Record 17 of Set 6 | ChIJDYp1OaiuhYARIwCuBhIwVOs
Processing Record 18 of Set 6 | ChIJ_8VcLuc7koARQXmgrXGnQxc
Processing Record 19 of Set 6 | ChIJBQe7GKjKQIYRxG0mtKNvREs
Processing Record 20 of Set 6 | ChIJ15UlxTFV6YAR7rCBTktGyM0
Processing Record 21 of Set 6 | ChIJc91fIoDbRIYR_iZWpJpCMiE
Processing Record 22 of Set 6 | ChIJrZOHqndEE4cR4G9TwVWzjaY
Processing Record 23 of Set 6 | ChIJ9_fr2eEZkoARE_rvkQgeKQg
Processing Record 24 of Set 6 | ChIJVZv5TUp_m1QRmJDpy3364Ys
Processing Record 25 of Set 6 | ChIJE73iaKRwgYARCU5su-oXmZc
Processing Record 1 of Set 7 | ChIJ1xNSFmTBQIYRBDAiMLUJ-nI
Processing Record 2 of Set 7 | ChIJlUVtthq-QIYROvuI8dr98Mg
Processing Record 3 of Set 7 | ChIJt82dTpwe7IAR9oK1FQs83rg
Processing Record 4 of Set 7 | ChIJA_obkPM1R4YRkWSOF6_6hCg
Processing Record 5 of Set 7 | ChIJfRoJQE1Vh

In [34]:
# Create a coordinate dataframe
coord_df = pd.DataFrame(coord_id_data)
coord_df.head(10)

Unnamed: 0,Place_id,Latitude,Longitude
0,ChIJ1xNSFmTBQIYRBDAiMLUJ-nI,29.738575,-95.471458
1,ChIJKdUvmscAD4gRZsVGNFFqkhs,42.368936,-87.827153
2,ChIJyZkVOx6DgoARU5w0SCOkBvY,40.199878,-122.201108
3,ChIJ4Zu4Go4xjoARNcsmHfjo_dI,37.248848,-121.839959
4,ChIJhddrQaV7gYAR0SLRbKP8lNc,38.91198,-123.053708
5,ChIJLyGI6pDQdUgRApb5vVVHUw4,51.234198,-0.574111
6,ChIJfYKgDA5_24ARmJsb0bYUFmw,33.502456,-117.116847
7,ChIJjUnLoHnJnIARkjPe0zv-PkA,39.59389,-121.52833
8,ChIJ3_Uwla-uhYARAAmNhPiOav0,38.284,-122.456753
9,ChIJa8v27oYSgYARRA6kQu3ru5s,39.086566,-123.479454


In [35]:
# Join the place_id and coord dataframes
region_df = pd.concat([placeid_df, coord_df], axis=1, join="inner")
    
region_df

Unnamed: 0,region,Place_id,Place_id.1,Latitude,Longitude
0,Paso Robles Highlands District,ChIJ1xNSFmTBQIYRBDAiMLUJ-nI,ChIJ1xNSFmTBQIYRBDAiMLUJ-nI,29.738575,-95.471458
1,Lake County,ChIJKdUvmscAD4gRZsVGNFFqkhs,ChIJKdUvmscAD4gRZsVGNFFqkhs,42.368936,-87.827153
2,Central Valley,ChIJyZkVOx6DgoARU5w0SCOkBvY,ChIJyZkVOx6DgoARU5w0SCOkBvY,40.199878,-122.201108
3,Santa Clara Valley,ChIJ4Zu4Go4xjoARNcsmHfjo_dI,ChIJ4Zu4Go4xjoARNcsmHfjo_dI,37.248848,-121.839959
4,Russian River Valley,ChIJhddrQaV7gYAR0SLRbKP8lNc,ChIJhddrQaV7gYAR0SLRbKP8lNc,38.911980,-123.053708
...,...,...,...,...,...
189,Monterey County,ChIJUV7XWK6z7IARFNInOyD3gBc,ChIJUV7XWK6z7IARFNInOyD3gBc,36.313620,-121.354163
190,San Luis Obispo County,ChIJX3PwOP2E7IAR9ToVLk_oEGY,ChIJX3PwOP2E7IAR9ToVLk_oEGY,35.310230,-120.435763
191,Marin County,ChIJ82aOMnyhhYARHLRVUdyH0As,ChIJ82aOMnyhhYARHLRVUdyH0As,38.083403,-122.763304
192,Templeton Gap District,ChIJMdXLkFlFE4cRHXwvGjutgIc,ChIJMdXLkFlFE4cRHXwvGjutgIc,38.863304,-104.831604


In [36]:
# Drop Place_id column
region_df = region_df.drop(['Place_id'], axis=1)
region_df

Unnamed: 0,region,Latitude,Longitude
0,Paso Robles Highlands District,29.738575,-95.471458
1,Lake County,42.368936,-87.827153
2,Central Valley,40.199878,-122.201108
3,Santa Clara Valley,37.248848,-121.839959
4,Russian River Valley,38.911980,-123.053708
...,...,...,...
189,Monterey County,36.313620,-121.354163
190,San Luis Obispo County,35.310230,-120.435763
191,Marin County,38.083403,-122.763304
192,Templeton Gap District,38.863304,-104.831604


In [38]:
# Create connection string
db_string = f"postgresql+psycopg2://postgres:" + password + "@127.0.0.1:5432/WineEnthusiast"


In [39]:
# Create the database engine
engine = create_engine(db_string)


In [40]:
# Save the region_df DataFrame to a SQL table called wineregions
region_df.to_sql(name='wineregions', con=engine)


In [41]:
region_df.to_csv("Data/region_df.csv", index=False)