In [1]:
#!pip install gmaps
#!jupyter nbextension enable --py gmaps
#!pip install beautifulsoup4
#!pip install google

In [3]:
import pandas as pd
import gmaps as gmaps
import random
import numpy as np
import sys
import requests
import json
from pprint import pprint
from googlesearch import search 

sys.path.append('../..')
from config import google_api_key
from config import aw_weather_api_key

gmaps.configure(api_key=google_api_key)

In [4]:
#importing the edited CSV 

csv_file = pd.read_csv("winedata_coord_type_2019JAN21.csv")
csv_file.head(1)

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Title,Variety,Year,Price,Grade,Winery,Province,Region,Country,Taster,Taster_Twitter,Description,Latitude,Longitude,Type
0,0,1,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,2011,15.0,87,Quinta dos Avidagos,Douro,,Portugal,Roger Voss,@vossroger,"This is ripe and fruity, a wine that is smooth...",41.512019,-5.492255,Red


In [5]:
#Removing "Unnamed:0" column

df = csv_file.drop(columns=["Unnamed: 0", "Unnamed: 0.1"])
df.head(1)

Unnamed: 0,Title,Variety,Year,Price,Grade,Winery,Province,Region,Country,Taster,Taster_Twitter,Description,Latitude,Longitude,Type
0,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,2011,15.0,87,Quinta dos Avidagos,Douro,,Portugal,Roger Voss,@vossroger,"This is ripe and fruity, a wine that is smooth...",41.512019,-5.492255,Red


In [6]:
#Dropping a decimal from "Price" column and re-naming the column

price = [int(x) for x in df["Price"]]
df["Price"] = price
df.head(2)

Unnamed: 0,Title,Variety,Year,Price,Grade,Winery,Province,Region,Country,Taster,Taster_Twitter,Description,Latitude,Longitude,Type
0,Quinta dos Avidagos 2011 Avidagos Red (Douro),Portuguese Red,2011,15,87,Quinta dos Avidagos,Douro,,Portugal,Roger Voss,@vossroger,"This is ripe and fruity, a wine that is smooth...",41.512019,-5.492255,Red
1,Quinta do Portal 2012 Verdelho and Sauvignon B...,Portuguese White,2012,15,87,Quinta do Portal,Douro,,Portugal,Roger Voss,@vossroger,Verdelho and Sauvignon Blanc hangs together ea...,41.512019,-5.492255,White


In [7]:
df["Type"] = df["Type"].replace(["Red"], "Red Wine")
df["Type"] = df["Type"].replace(["White"], "White Wine")
df["Type"] = df["Type"].replace(["Blush"], "Blush Wine")
df["Type"] = df["Type"].replace(["Sparkling"], "Sparkling Wine")

wine_types = df["Type"]
wine_types.value_counts()

Red Wine          53691
White Wine        27015
Other             14000
Blush Wine         5344
Sparkling Wine     1035
Name: Type, dtype: int64

In [8]:
#This contains all the functions we need.

def find_title(dataframe, row):
    return (dataframe.loc[row,: "Title"])["Title"]

def find_year(dataframe, row):
    return (dataframe.loc[row,: "Year"])["Year"]

def find_country(dataframe, row):
    return (dataframe.loc[row,: "Country"])["Country"]

def find_description(dataframe, row):
    return (dataframe.loc[row,: "Description"])["Description"]

def find_price(dataframe, row):
    return (dataframe.loc[row,: "Price"])["Price"]

def print_wine_info(dataframe, row):
    print(f'     "{find_title(dataframe, row)}".')
    print(f'     The wine is from {find_country(dataframe, row)}, made in {find_year(dataframe, row)}, and may cost you ${find_price(dataframe, row)}.')
    print(f'     A reviewer says "{find_description(dataframe, row)}".')

def print_remaining_wine(dataframe):
    print(f'We can help you find a wine from {len(dataframe["Title"])} wine-reviews from {len((dataframe["Country"]).unique())} countries.')
    print(f'These wine are from {min(dataframe["Year"])} to {max(dataframe["Year"])} with a price range of ${min(dataframe["Price"])} to ${max(dataframe["Price"])}.')
    
def pick_a_wine(dataframe):
    return random.randrange(len(dataframe["Title"]))

def median_score(dataframe):
    return np.median(dataframe["Grade"])

def summarize(dataframe):
    return pd.DataFrame({"Total Number of Wine Reviews" : [len(dataframe['Title'].unique())],
                "Total Number of Wineries": [len(dataframe['Winery'].unique())],
                "Total Number of Provinces": [len(dataframe['Province'].unique())],
                "Total Number of Countries": [len(dataframe['Country'].unique())],
                "Total Number of Years" : [len(dataframe['Year'].unique())],
                "Total Number of Tasters": [len(dataframe['Taster'].unique())]
                         #,"Total Number of Types": [len(dataframe['Type'].unique())]
                        })



In [9]:
#introducing our program

print(f'----------------------------')
print(f'Welcome to our Wine-Database.')
print(f'We can help you find a wine from {len(df["Title"])} wine-reviews from {len((df["Country"]).unique())} countries.')
print(f'These wine are from {min(df["Year"])} to {max(df["Year"])} with a price range of ${min(df["Price"])} to ${max(df["Price"])}.')
print(f'We can either tailor a wine to your liking, or randomly generate a list of highly rated wines.')


----------------------------
Welcome to our Wine-Database.
We can help you find a wine from 101085 wine-reviews from 12 countries.
These wine are from 2004 to 2016 with a price range of $4 to $3300.
We can either tailor a wine to your liking, or randomly generate a list of highly rated wines.


In [24]:
#Our program

#First Answer = Yes or No
#Second Answer = Type, or Skip
#Third Answer = Price or Skip
#Fourth Answer = Country or Skip
#Fifth Answer = Flavor


wine_types = df["Type"].unique()

first_answer = input("Type 'Yes' if you want us to help you find a wine, 'No' if you just want to see a random list of wine: ")
if first_answer.lower() == "yes":
    print("")
    print(df["Type"].unique())
    second_answer = input("What is your choice of wine type? If you have no preference, type 'Skip': ")
    if second_answer.lower() == "skip":
        print("")
        print_remaining_wine(df)
        third_answer = input("What is your price limit?” Either type the dollar amount, or type skip if you have too much money: ")
        
        if third_answer.isdigit() == True:
            new_df = df.loc[df["Price"] < int(third_answer) + 0.1]
            print("")
            print_remaining_wine(new_df)
            print("")
            print(new_df["Country"].unique())
            fourth_answer = input("What country's wine do you prefer? If you have no preference, type 'skip': " )
    
            if fourth_answer in new_df["Country"].unique():
                second_new_df = new_df.loc[new_df["Country"] == fourth_answer]
                third_new_df = second_new_df.loc[second_new_df["Grade"] >= median_score(second_new_df)]
                third_new_df = third_new_df.reset_index(drop=True)
                choice_wine = pick_a_wine(third_new_df)
                print("")
                print_wine_info(third_new_df, choice_wine)
                lat = (third_new_df.loc[choice_wine,: "Latitude"])["Latitude"]
                long = (third_new_df.loc[choice_wine,: "Longitude"])["Longitude"]
                location = [(lat, long)]
                wine_year = (third_new_df.loc[choice_wine,: "Year"])["Year"]
                query = find_title(third_new_df, choice_wine)
            
            elif fourth_answer.lower() == "skip":
                second_new_df = new_df.loc[new_df["Grade"] >= median_score(new_df)]
                second_new_df = second_new_df.reset_index(drop=True)
                choice_wine = pick_a_wine(second_new_df)
                print("")
                print_wine_info(second_new_df, choice_wine)
                lat = (second_new_df.loc[choice_wine,: "Latitude"])["Latitude"]
                long = (second_new_df.loc[choice_wine,: "Longitude"])["Longitude"]
                location = [(lat, long)]
                wine_year = (second_new_df.loc[choice_wine,: "Year"])["Year"]
                query = find_title(second_new_df, choice_wine)
        
        elif third_answer.lower() == "skip":
            print("")
            print_remaining_wine(df)
            print("")
            print(new_df["Country"].unique())
            fourth_answer = input("What country's wine do you prefer? If you have no preference, type 'skip': " )
    
            if fourth_answer in new_df["Country"].unique():
                new_df = df.loc[df["Country"] == fourth_answer]
                second_new_df = new_df.loc[new_df["Grade"] >= median_score(new_df)]
                second_new_df = second_new_df.reset_index(drop=True)
                choice_wine = pick_a_wine(second_new_df)
                print("")
                print_wine_info(second_new_df, choice_wine)
                lat = (second_new_df.loc[choice_wine,: "Latitude"])["Latitude"]
                long = (second_new_df.loc[choice_wine,: "Longitude"])["Longitude"]
                location = [(lat, long)]
                wine_year = (second_new_df.loc[choice_wine,: "Year"])["Year"]
                query = find_title(second_new_df, second_wine)
                
            elif fourth_answer.lower() == "skip":
                new_df = df.loc[df["Grade"] >= median_score(df)]
                new_df = new_df.reset_index(drop=True)
                choice_wine = pick_a_wine(new_df)
                print("")
                print_wine_info(new_df, choice_wine)
                lat = (new_df.loc[choice_wine,: "Latitude"])["Latitude"]
                long = (new_df.loc[choice_wine,: "Longitude"])["Longitude"]
                location = [(lat, long)]
                wine_year = (new_df.loc[choice_wine,: "Year"])["Year"]
                query = find_title(new_df, choice_wine)
    
    elif second_answer in df["Type"].unique():
        new_df = df.loc[df["Type"] == second_answer]
        print("")
        print_remaining_wine(new_df)
        third_answer = input("What is your price limit?” Either type the dollar amount, or type skip if you have too much money: ")

        if third_answer.isdigit() == True:
            second_new_df = df.loc[df["Price"] < int(third_answer) + 0.1]
            print("")
            print_remaining_wine(second_new_df)
            print("")
            print(new_df["Country"].unique())
            fourth_answer = input("What country's wine do you prefer? If you have no preference, type 'skip': " )
            
            if fourth_answer in new_df["Country"].unique():
                third_new_df = second_new_df.loc[second_new_df["Country"] == fourth_answer]
                fourth_new_df = third_new_df.loc[third_new_df["Grade"] >= median_score(third_new_df)]
                fourth_new_df = fourth_new_df.reset_index(drop=True)
                choice_wine = pick_a_wine(fourth_new_df)
                print("")
                print_wine_info(fourth_new_df, choice_wine)
                lat = (fourth_new_df.loc[choice_wine,: "Latitude"])["Latitude"]
                long = (fourth_new_df.loc[choice_wine,: "Longitude"])["Longitude"]
                location = [(lat, long)]
                wine_year = (fourth_new_df.loc[choice_wine,: "Year"])["Year"]
                query = find_title(fourth_new_df, choice_wine)
                

            elif fourth_answer.lower() == "skip":
                third_new_df = second_new_df.loc[second_new_df["Grade"] >= median_score(second_new_df)]
                third_new_df = third_new_df.reset_index(drop=True)
                choice_wine = pick_a_wine(third_new_df)
                print("")
                print_wine_info(third_new_df, choice_wine)
                lat = (third_new_df.loc[choice_wine,: "Latitude"])["Latitude"]
                long = (third_new_df.loc[choice_wine,: "Longitude"])["Longitude"]
                location = [(lat, long)]
                wine_year = (third_new_df.loc[choice_wine,: "Year"])["Year"]
                query = find_title(third_new_df, choice_wine)
        elif third_answer.lower() == "skip":
            print("")
            print_remaining_wine(new_df)
            print("")
            print(new_df["Country"].unique())
            fourth_answer = input("What country's wine do you prefer? If you have no preference, type 'skip': " )
            
            if fourth_answer in new_df["Country"].unique():
                second_new_df = new_df.loc[new_df["Country"] == fourth_answer]
                third_new_df = second_new_df.loc[second_new_df["Grade"] >= median_score(second_new_df)]
                third_new_df = third_new_df.reset_index(drop=True)
                choice_wine = pick_a_wine(third_new_df)
                print("")
                print_wine_info(third_new_df, choice_wine)
                lat = (third_new_df.loc[choice_wine,: "Latitude"])["Latitude"]
                long = (third_new_df.loc[choice_wine,: "Longitude"])["Longitude"]
                location = [(lat, long)]
                wine_year = (third_new_df.loc[choice_wine,: "Year"])["Year"]
                query = find_title(third_new_df, choice_wine)
                
            elif fourth_answer.lower() == "skip":
                #second_new_df = new_df.loc[new_df["Country"] == fourth_answer]
                second_new_df = new_df.loc[new_df["Grade"] >= median_score(new_df)]
                seconnd_new_df = second_new_df.reset_index(drop=True)
                choice_wine = pick_a_wine(second_new_df)
                print("")
                print_wine_info(second_new_df, choice_wine)
                lat = (second_new_df.loc[choice_wine,: "Latitude"])["Latitude"]
                long = (second_new_df.loc[choice_wine,: "Longitude"])["Longitude"]
                location = [(lat, long)]
                wine_year = (second_new_df.loc[choice_wine,: "Year"])["Year"]
                query = find_title(second_new_df, choice_wine)

elif first_answer.lower() == "no":
    
    rdnumber = random.randrange(len(df["Title"])+1)
    new_df = df.loc[df["Grade"] >= median_score(df)]
    new_df = new_df.reset_index(drop=True)
    choice_wine = pick_a_wine(new_df)
    print("")
    print_wine_info(new_df, choice_wine)
    lat = (new_df.loc[choice_wine,: "Latitude"])["Latitude"]
    long = (new_df.loc[choice_wine,: "Longitude"])["Longitude"]
    location = [(lat, long)]
    wine_year = (new_df.loc[choice_wine,: "Year"])["Year"]
    query = find_title(df, choice_wine)
    
             
                

Type 'Yes' if you want us to help you find a wine, 'No' if you just want to see a random list of wine: Yes

['Red Wine' 'White Wine' 'Other' 'Blush Wine' 'Sparkling Wine']
What is your choice of wine type? If you have no preference, type 'Skip': skip

We can help you find a wine from 101085 wine-reviews from 12 countries.
These wine are from 2004 to 2016 with a price range of $4 to $3300.
What is your price limit?” Either type the dollar amount, or type skip if you have too much money: 20

We can help you find a wine from 37742 wine-reviews from 12 countries.
These wine are from 2004 to 2016 with a price range of $4 to $20.

['Portugal' 'US' 'Spain' 'Italy' 'France' 'Germany' 'Argentina' 'Chile'
 'Australia' 'Austria' 'South Africa' 'New Zealand']
What country's wine do you prefer? If you have no preference, type 'skip': skip

     "El Roy 2012 Nelson Vineyards Sauvignon Blanc (Dry Creek Valley)".
     The wine is from US, made in 2012, and may cost you $16.
     A reviewer says "Run, 

In [26]:
#Winery Geo-location

fig = gmaps.figure()
print(location) 
marker_layer = gmaps.marker_layer(location)
fig.add_layer(marker_layer)
fig


[(36.778261, -119.4179324)]


Figure(layout=FigureLayout(height='420px'))

In [27]:
#Getting Weather Information

weather_url = "http://api.worldweatheronline.com/premium/v1/past-weather.ashx?q="

if lat < 0: #Southern Hemisphere
    weather_info = requests.get(f"{weather_url}{lat},{long}&key={aw_weather_api_key}&date={wine_year}-02-01&enddate={wine_year}-02-31&tp=24&format=json").json()
    month = "February"
elif lat > 0: #Northern hemisphere
    weather_info = requests.get(f"{weather_url}{lat},{long}&key={aw_weather_api_key}&date={wine_year}-08-01&enddate={wine_year}-08-31&tp=24&format=json").json()
    month = "August"
    
fahrenheit = []
precip = []

x = 0

for weather in weather_info["data"]["weather"]:
    fahrenheit.append(weather_info["data"]["weather"][x]["hourly"][0]["tempF"])
    precip.append(weather_info["data"]["weather"][x]["hourly"][0]["precipMM"])
    x = x + 1

fahrenheit = [int(x) for x in fahrenheit]
precip = [int(float(x)) for x in precip]
avg_temp = np.average(fahrenheit)
avg_temp = round(avg_temp,2)
avg_precip = np.average(precip)


weather_information = pd.DataFrame({f'Avg Daily Temp for Summer (°F)' : [avg_temp],
                                    f'Avg Daily Precipiation for Summer (mm)': [avg_precip]})
                                    
weather_information


Unnamed: 0,Avg Daily Temp for Summer (°F),Avg Daily Precipiation for Summer (mm)
0,107.9,0.0


In [30]:
#Google Searches

print(query)
for j in search(query, tld="com", num=5, stop=1, pause=2): 
    print(j)

El Roy 2012 Nelson Vineyards Sauvignon Blanc (Dry Creek Valley)
http://www.tastings.com/Wine-Review/ElRoy-2012-Nelson-Vineyard-Sauvignon-Blanc-Dry-Creek-Valley-USA-12-04-2013.aspx
https://www.wine-searcher.com/find/draxton+el+roi+nelson+sauv+blanc+dry+creek+valley+sonoma+county+north+coast+california+usa
http://www.draxtonwines.com/public/w44008/Tech_Notes_-_Draxton_2014_Nelson_Vineyard_Sauvignon_Blanc_Dry_Creek_Valley-RDP.pdf
https://www.cellartracker.com/wine.asp?iWine=1138699
https://www.wine.com/product/dry-creek-vineyard-sauvignon-blanc-2012/122577
https://www.wine-searcher.com/find/quivira+sauv+blanc+dry+creek+valley+sonoma+county+north+coast+california+usa
https://www.wine-searcher.com/find/mauritson+sauv+blanc+dry+creek+valley+sonoma+county+north+coast+california+usa
https://www.drycreekvineyard.com/news-events/latest-reviews/
