## Import dependencies

In [1]:
from numpy import genfromtxt
from time import time
from datetime import datetime
from sqlalchemy import Column, Integer, Float, Date, String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker, Session
import pandas as pd 
import os  #Use to create readable paths

Base = declarative_base()

## Create a dictionary to convert special characters

In [2]:
dictionary = {'í':'i', 'á':'a', 'à':'a','é':'e','ê':'e','ö':'o','ñ':'n','-':' ',}

## Load csv files into pandas dataframes and convert special characters

In [3]:
#CALL IN A FILE PATH
ava_details_path = os.path.join("../Wikiwinescrape/saveData/avadetails.csv")
ava_list_path = os.path.join("../Wikiwinescrape/saveData/avalist.csv")
pairing_path = os.path.join("../Wikiwinescrape/saveData/pairing.csv")
varietals_path = os.path.join("../Wikiwinescrape/saveData/varietals.csv")
winesAll_path = os.path.join("../WineMag_Scrape/saveData/winesAll.csv")
# winesUSA_path = os.path.join("../WineMag_Scrape/Resources/winesUSA.csv")


#READ IN A CSV FILES
df_ava_details = pd.read_csv(ava_details_path, encoding="UTF-8")
df_ava_details.columns = df_ava_details.columns.astype(str)
df_ava_details = df_ava_details[["ava", "state","year","area","climate","grapes"]]

df_ava_list = pd.read_csv(ava_list_path, encoding="UTF-8")
df_ava_list.columns = df_ava_list.columns.astype(str)
df_ava_list = df_ava_list[["ava", "state","region"]]

df_pairing = pd.read_csv(pairing_path, encoding="UTF-8")
df_pairing.columns = df_pairing.columns.astype(str)
df_pairing = df_pairing[["name", "cheese"]]

df_varietals = pd.read_csv(varietals_path, encoding="UTF-8")
df_varietals.columns = df_varietals.columns.astype(str)
df_varietals = df_varietals[["name", "desc"]]
df_varietals.replace(dictionary, regex=True, inplace=True)

df_winesAll = pd.read_csv(winesAll_path, encoding="UTF-8")
df_winesAll.columns = df_winesAll.columns.astype(str)
df_winesAll = df_winesAll[["title", "wine", "vintage", "vinyard", "variety", "avi", "region", "state", "alcohol_content", "size", "winetype", "price", "score", "dt_published", "taster"]]
df_winesAll.replace(dictionary, regex=True, inplace=True)

# df_winesUSA = pd.read_csv(winesUSA_path, encoding="UTF-8")
# df_winesUSA.columns = df_winesUSA.columns.astype(str)
# df_winesUSA = df_winesUSA[["title", "wine", "vintage", "vinyard", "variety", "avi", "region", "state", "alcohol_content", "size", "winetype", "price", "score", "dt_published", "taster"]]
# df_varietals.replace(dictionary, regex=True, inplace=True)

# df_ava_details.head(2)
# df_ava_list.head(2)
# df_pairing.head(2)
# df_varietals.head(2)
df_winesAll.head(2)
# df_winesUSA.head(2)


Unnamed: 0,title,wine,vintage,vinyard,variety,avi,region,state,alcohol_content,size,winetype,price,score,dt_published,taster
0,Lucia 2017 Garys' Vineyard Pinot Noir,Lucia,2017,Garys' Vineyard,Pinot Noir,Santa Lucia Highlands,Central Coast,California,14.2%,750 ml,Red,$65,97,6/1/2019,Matt Kettmann
1,Laetitia 2016 La Coupelle Pinot Noir,Laetitia,2016,La Coupelle,Pinot Noir,Arroyo Grande Valley,Central Coast,California,14.3%,750 ml,Red,$62,96,6/1/2019,Matt Kettmann


## Individual cleanups of the underlying data
### (after reviewing the data as csv in excel)

- avalist => avadetails
- "Red Hill Douglas County, Oregon" => "Red Hill Douglas County"

In [4]:
ava_list_fix = {'Red Hill Douglas County, Oregon':'Red Hill Douglas County'}
df_ava_list.replace(ava_list_fix, regex=True, inplace=True)

df_ava_list.loc[df_ava_list["ava"] == "Red Hill Douglas County", :].head()

Unnamed: 0,ava,state,region
199,Red Hill Douglas County,Oregon,


In [None]:
varietals_fix = {}

## Merge the ava information into a single dataframe
- Keeping ava details and the regions that match
- This table can also be merged with new information on the avi field

In [5]:
df_ava_all = pd.merge(df_ava_details, df_ava_list, on="ava", how = "left")
df_ava_all = df_ava_all.rename(columns={"state_x":"state", "ava":"avi"})
del df_ava_all['state_y']

df_ava_all.head(2)

Unnamed: 0,avi,state,year,area,climate,grapes,region
0,Sonoita,Arizona,1984.0,"208,000 acres (84,200 ha)",Subtropical continental,Cabernet Franc|Cabernet Sauvignon|Chardonnay|M...,
1,Willcox,Arizona,,,,,


## Merge the varietals and pairing information
- Keeping all varietals and just the pairings that match
- This table can also be merged with new information on the variety field

In [6]:
df_var_pairs = pd.merge(df_varietals, df_pairing, on = "name", how = "left")
df_var_pairs = df_var_pairs.rename(columns={"name":"variety"})

df_var_pairs.head(2)

Unnamed: 0,variety,desc,cheese
0,Albarino,"Spanish white wine grape that makes crisp, ref...",
1,Aligote,White wine grape grown in Burgundy making medi...,


## Merge all tables into a single dataframe

In [7]:

df_ava_wines = pd.merge(df_winesAll, df_ava_all, on="avi", how = "left")
df_all_table = pd.merge(df_ava_wines, df_var_pairs, on = "variety", how = "left")

df_all_table.head(10)

Unnamed: 0,title,wine,vintage,vinyard,variety,avi,region_x,state_x,alcohol_content,size,...,dt_published,taster,state_y,year,area,climate,grapes,region_y,desc,cheese
0,Lucia 2017 Garys' Vineyard Pinot Noir,Lucia,2017,Garys' Vineyard,Pinot Noir,Santa Lucia Highlands,Central Coast,California,14.2%,750 ml,...,6/1/2019,Matt Kettmann,California,1990,"22,000 acres (8,903 ha)",,Cabernet Franc|Cabernet Sauvignon|Chardonnay|C...,Central Coast and Santa Cruz Mountains,"This is the great, noble grape of Burgundy. Di...",
1,Laetitia 2016 La Coupelle Pinot Noir,Laetitia,2016,La Coupelle,Pinot Noir,Arroyo Grande Valley,Central Coast,California,14.3%,750 ml,...,6/1/2019,Matt Kettmann,California,1990,"42,880 acres (17,353 ha)",,Chardonnay|Counoise|Grenache|Mourvedre|Petite ...,Central Coast and Santa Cruz Mountains,"This is the great, noble grape of Burgundy. Di...",
2,Lucia 2017 Soberanes Vineyard Pinot Noir,Lucia,2017,Soberanes Vineyard,Pinot Noir,Santa Lucia Highlands,Central Coast,California,14.3%,750 ml,...,6/1/2019,Matt Kettmann,California,1990,"22,000 acres (8,903 ha)",,Cabernet Franc|Cabernet Sauvignon|Chardonnay|C...,Central Coast and Santa Cruz Mountains,"This is the great, noble grape of Burgundy. Di...",
3,Gary Farrell 2016 Rochioli Allen Vineyards Pin...,Gary Farrell,2016,Rochioli Allen Vineyards,Pinot Noir,Russian River Valley,Sonoma,California,14.1%,750 ml,...,6/1/2019,Virginie Boone,California,"1983, amended in 1987, 2003, and 2005","10,000 acres (40 km2)",,Alicante Bouschet|Arneis|Barbera|Black Muscat|...,North Coast,"This is the great, noble grape of Burgundy. Di...",
4,Gary Farrell 2016 Lancel Creek Vineyard Pinot ...,Gary Farrell,2016,Lancel Creek Vineyard,Pinot Noir,Russian River Valley,Sonoma,California,13.9%,750 ml,...,6/1/2019,Virginie Boone,California,"1983, amended in 1987, 2003, and 2005","10,000 acres (40 km2)",,Alicante Bouschet|Arneis|Barbera|Black Muscat|...,North Coast,"This is the great, noble grape of Burgundy. Di...",
5,Laetitia 2016 Les Galets Pinot Noir,Laetitia,2016,Les Galets,Pinot Noir,Arroyo Grande Valley,Central Coast,California,14.2%,750 ml,...,6/1/2019,Matt Kettmann,California,1990,"42,880 acres (17,353 ha)",,Chardonnay|Counoise|Grenache|Mourvedre|Petite ...,Central Coast and Santa Cruz Mountains,"This is the great, noble grape of Burgundy. Di...",
6,Carlisle 2016 Piner Olivet Ranches Zinfandel,Carlisle,2016,Piner Olivet Ranches,Zinfandel,Russian River Valley,Sonoma,California,15%,750 ml,...,6/1/2019,Virginie Boone,California,"1983, amended in 1987, 2003, and 2005","10,000 acres (40 km2)",,Alicante Bouschet|Arneis|Barbera|Black Muscat|...,North Coast,With predominant raspberry flavors and a spicy...,
7,Cobb 2016 Diane Cobb Coastlands Vineyard Pinot...,Cobb,2016,Diane Cobb Coastlands Vineyard,Pinot Noir,Sonoma Coast,Sonoma,California,13.6%,750 ml,...,6/1/2019,Virginie Boone,California,1987,"500,000 acres (2,000 km2)",,Cabernet Franc|Cabernet Sauvignon|Chardonnay|G...,North Coast,"This is the great, noble grape of Burgundy. Di...",
8,Styring 2015 Wit Reserve Estate Pinot Noir,Styring,2015,Wit Reserve Estate,Pinot Noir,Ribbon Ridge,Willamette Valley,Oregon,15%,750 ml,...,6/1/2019,Paul Gregutt,Oregon,2005,"3,350 acres (1,356 ha)",Maritime,Auxerrois Blanc|Chardonnay|Muscat Canelli|Pino...,,"This is the great, noble grape of Burgundy. Di...",
9,Freeman 2016 Yu Ki Estate Pinot Noir,Freeman,2016,Yu Ki Estate,Pinot Noir,Sonoma Coast,Sonoma,California,14.1%,750 ml,...,6/1/2019,Jim Gordon,California,1987,"500,000 acres (2,000 km2)",,Cabernet Franc|Cabernet Sauvignon|Chardonnay|G...,North Coast,"This is the great, noble grape of Burgundy. Di...",


## Create a table of variety types and then seperate lists of varieties per type

In [8]:
wine_types = df_all_table["winetype"].unique()
wine_types

array(['Red', 'White', 'Sparkling', 'Rose', 'Fortified', 'Dessert'],
      dtype=object)

In [9]:
temp_filter = df_all_table.loc[df_all_table["winetype"] == "Red", :]
df_Red = temp_filter["variety"].unique()

temp_filter = df_all_table.loc[df_all_table["winetype"] == "White", :]
df_White = temp_filter["variety"].unique()

temp_filter = df_all_table.loc[df_all_table["winetype"] == "Rose", :]
df_Rose = temp_filter["variety"].unique()

temp_filter = df_all_table.loc[df_all_table["winetype"] == "Fortified", :]
df_Fortified = temp_filter["variety"].unique()

temp_filter = df_all_table.loc[df_all_table["winetype"] == "Dessert", :]
df_Dessert = temp_filter["variety"].unique()


## Create a variable to hold the most recent publishing date

In [11]:
LastPublished = df_all_table["dt_published"].max()
LastPublished

'6/1/2019'

## Converting pandas df to replace sql tables

In [13]:
# Path to sqlite
database_path = "sqlite.sqlite"
# Create an engine that can talk to the database
engine = create_engine(f"sqlite:///{database_path}")
conn = engine.connect()


#Convert to SQL
df_all_table.to_sql("All_Table", conn, if_exists='replace', index=False)

engine.execute("SELECT * FROM All_Table").first()


("Lucia 2017 Garys' Vineyard Pinot Noir", 'Lucia', '2017', "Garys' Vineyard", 'Pinot Noir', 'Santa Lucia Highlands', 'Central Coast', 'California', '14.2%', '750 ml', 'Red', '$65', 97, '6/1/2019', 'Matt Kettmann', 'California', '1990', '22,000 acres (8,903\xa0ha)', None, 'Cabernet Franc|Cabernet Sauvignon|Chardonnay|Chenin blanc|Malbec|Merlot|Petit Verdot|Pinot gris|Pinot noir|Riesling|Roussanne|Syrah|Viognier', 'Central Coast and Santa Cruz Mountains', 'This is the great, noble grape of Burgundy. Difficult to grow but at its best it is smooth & richer than Cabernet Sauvignon with less tannin. Raisin like flavors with undertones of black cherry, spice & raspberry. Widely used in the making of champagne sparkling wines.', None)