# PROJECT_VIVINO

In [2]:
# import libraries
import pandas as pd
from sqlalchemy import create_engine, text
from sqlalchemy.orm import Session, sessionmaker, registry

# open vivino.db from data directory
vivino_engine = create_engine('sqlite:///../data/vivino.db', echo=True)

# establish and instantiate session as vivino_session
vivino_session = sessionmaker(bind=vivino_engine)
vivino_session = vivino_session()

# create a registry for the vivino_session
mapper_registry = registry()
Base = mapper_registry.generate_base()

# create tablenames_list that contains all table names in vivino.db
query_tablenames = vivino_engine.execute(
    "SELECT name FROM sqlite_master WHERE type='table';"
    )
tablenames_list = []
for row in query_tablenames:
    tablenames_list.append(row[0])

print("The tables in vivino.db are:")
for table in tablenames_list:
    print(table)


2023-08-30 08:12:14,137 INFO sqlalchemy.engine.Engine SELECT name FROM sqlite_master WHERE type='table';
2023-08-30 08:12:14,138 INFO sqlalchemy.engine.Engine [raw sql] ()
The tables in vivino.db are:
countries
grapes
wineries
flavor_groups
keywords
regions
most_used_grapes_per_country
toplists
wines
vintages
keywords_wine
vintage_toplists_rankings


In [6]:
# query schema for all tables in vivino.db
for table in tablenames_list:
    query_schema = vivino_engine.execute(
        f"PRAGMA table_info({table});"
        )
    print(f"\n{table} schema:")
    for row in query_schema:
        print(row)


2023-08-30 08:16:15,784 INFO sqlalchemy.engine.Engine PRAGMA table_info(countries);
2023-08-30 08:16:15,786 INFO sqlalchemy.engine.Engine [raw sql] ()

countries schema:
(0, 'code', 'VARCHAR', 1, None, 1)
(1, 'name', 'VARCHAR', 0, None, 0)
(2, 'regions_count', 'INTEGER', 0, None, 0)
(3, 'users_count', 'INTEGER', 0, None, 0)
(4, 'wines_count', 'INTEGER', 0, None, 0)
(5, 'wineries_count', 'INTEGER', 0, None, 0)
2023-08-30 08:16:15,788 INFO sqlalchemy.engine.Engine PRAGMA table_info(grapes);
2023-08-30 08:16:15,789 INFO sqlalchemy.engine.Engine [raw sql] ()

grapes schema:
(0, 'id', 'INTEGER', 1, None, 1)
(1, 'name', 'VARCHAR', 0, None, 0)
2023-08-30 08:16:15,792 INFO sqlalchemy.engine.Engine PRAGMA table_info(wineries);
2023-08-30 08:16:15,793 INFO sqlalchemy.engine.Engine [raw sql] ()

wineries schema:
(0, 'id', 'INTEGER', 1, None, 1)
(1, 'name', 'VARCHAR', 0, None, 0)
2023-08-30 08:16:15,794 INFO sqlalchemy.engine.Engine PRAGMA table_info(flavor_groups);
2023-08-30 08:16:15,796 INFO sq

## QUESTION 1

We want to highlight 10 wines to increase our sales. Which ones should we choose and why?
- Those with good ratings but not a big amount of ratings
- Natural wines?

#### CATEGORY 1 - BEST RETAIL WINE

In [14]:
## join countries, region, and wines tables to create df_wines
query_wines = vivino_engine.execute(
    """SELECT wines.id AS wine_id,
    wines.name AS wine_name,
    wines.is_natural AS is_natural,
    wines.ratings_average,
    wines.ratings_count,
    regions.name AS region_name,
    countries.name AS country_name
    FROM wines
    LEFT JOIN regions ON wines.region_id = regions.id
    LEFT JOIN countries ON regions.country_code = countries.code
    WHERE wines.ratings_average >= 4.5
    AND wines.ratings_count >= 650
    AND wines.is_natural = 1;
    """
)

# fetch query_wines
fetch_query_wines = query_wines.fetchall()
print(query_wines._metadata.keys)


# initialize fetch_query_wines as pandas dataframe
df_wines = pd.DataFrame(
    fetch_query_wines,
    columns=query_wines.keys()
)

# display df_wines
display(df_wines)


2023-08-30 08:33:41,892 INFO sqlalchemy.engine.Engine SELECT wines.id AS wine_id,
    wines.name AS wine_name,
    wines.is_natural AS is_natural,
    wines.ratings_average,
    wines.ratings_count,
    regions.name AS region_name,
    countries.name AS country_name
    FROM wines
    LEFT JOIN regions ON wines.region_id = regions.id
    LEFT JOIN countries ON regions.country_code = countries.code
    WHERE wines.ratings_average >= 4.5
    AND wines.ratings_count >= 650
    AND wines.is_natural = 1;
    
2023-08-30 08:33:41,893 INFO sqlalchemy.engine.Engine [raw sql] ()
RMKeyView(['wine_id', 'wine_name', 'is_natural', 'ratings_average', 'ratings_count', 'region_name', 'country_name'])


Unnamed: 0,wine_id,wine_name,is_natural,ratings_average,ratings_count,region_name,country_name
0,1101360,Le Bourg Saumur Champigny,1,4.5,1264,Saumur-Champigny,France
1,1101361,Les Poyeux Saumur Champigny,1,4.5,2005,Saumur-Champigny,France
2,1219509,Camí Pesseroles,1,4.5,1083,Priorat,Espagne
3,1627127,Magma,1,4.5,748,Terre Siciliane,Italie
4,1680438,Il San Lorenzo Bianco,1,4.5,673,Marche,Italie


We can recommend the wines listed above because those wines are natural, European Wines substantiated with a solid customer reviews and rating.

#### CATEGORY 2 - BEST VINTAGE WINE

In [18]:
# query vintage wines
query_vintages= vivino_engine.execute("""SELECT avg(rank) AS avg_rank,
vintages.name as vintage_wine_name,
vintages.wine_id AS vintage_wine_id,
vintages.ratings_average AS ratings_average,
vintages.ratings_count AS ratings_count,
vintages.price_euros AS retail_price_euros,
vintages.bottle_volume_ml AS bottle_volume_ml,
vintage_toplists_rankings.rank AS rank,
vintage_toplists_rankings.previous_rank AS previous_rank,
regions.name AS region_name,
countries.name AS country_name
FROM vintages
LEFT JOIN wines ON vintages.wine_id = wines.id
LEFT JOIN regions ON wines.region_id = regions.id
LEFT JOIN countries ON regions.country_code = countries.code
INNER JOIN vintage_toplists_rankings ON vintages.id = vintage_toplists_rankings.vintage_id
WHERE vintages.ratings_count >= 3000
    AND vintages.ratings_average >= 4.5
    AND vintage_toplists_rankings.rank = vintage_toplists_rankings.previous_rank
    AND (vintage_toplists_rankings.rank <= 10 OR vintage_toplists_rankings.previous_rank <= 10)
GROUP BY vintages.wine_id 
ORDER BY avg_rank ASC;
""")

# initialize best_vintages_df
fetch_query_vintages = query_vintages.fetchall()
print(query_vintages._metadata.keys)
best_vintages_df = pd.DataFrame(
    fetch_query_vintages,
    columns=query_vintages.keys()
)

# drop 'price_discounted_from', 'price_discount_percentage', 'id', 'name',

# display best_vintages_df
display(best_vintages_df)



2023-08-30 08:37:04,927 INFO sqlalchemy.engine.Engine SELECT avg(rank) AS avg_rank,
vintages.name as vintage_wine_name,
vintages.wine_id AS vintage_wine_id,
vintages.ratings_average AS ratings_average,
vintages.ratings_count AS ratings_count,
vintages.price_euros AS retail_price_euros,
vintages.bottle_volume_ml AS bottle_volume_ml,
vintage_toplists_rankings.rank AS rank,
vintage_toplists_rankings.previous_rank AS previous_rank,
regions.name AS region_name,
countries.name AS country_name
FROM vintages
LEFT JOIN wines ON vintages.wine_id = wines.id
LEFT JOIN regions ON wines.region_id = regions.id
LEFT JOIN countries ON regions.country_code = countries.code
INNER JOIN vintage_toplists_rankings ON vintages.id = vintage_toplists_rankings.vintage_id
WHERE vintages.ratings_count >= 3000
    AND vintages.ratings_average >= 4.5
    AND vintage_toplists_rankings.rank = vintage_toplists_rankings.previous_rank
    AND (vintage_toplists_rankings.rank <= 10 OR vintage_toplists_rankings.previous_ran

Unnamed: 0,avg_rank,vintage_wine_name,vintage_wine_id,ratings_average,ratings_count,retail_price_euros,bottle_volume_ml,rank,previous_rank,region_name,country_name
0,5.5,Château Pontet-Canet Pauillac (Grand Cru Class...,14362,4.7,3275,597.5,750,5,5,Pauillac,France
1,6.0,Krug Rosé,79632,4.6,3605,349.0,750,6,6,Champagne,France
2,6.0,Krug Grande Cuvée,7122486,4.6,28513,245.0,750,6,6,Champagne,France
3,9.5,Laurent-Perrier Grand Siècle Champagne (Grande...,1238419,4.5,9824,183.7,750,9,9,Champagne,France
4,10.0,Antinori Tignanello 2016,1652,4.6,13709,420.0,750,10,10,Toscana,Italie


We recommend these vintage wines due to the fact that they have been consistently marking their place in our vintages_toplists_rankings table throughout multiple years, and their positions are substantiated with a significant amount of ratings and overall average rating score. 

## QUESTION 2
We have a marketing budget for this year. Which country should we prioritise and why?
- Those with high user count?
- those we low user count?
- Those with a upcoming user popularity

In [20]:
# CRITERIA 1 - COUNTRIES WITH HIGH USER COUNTS

## query countries table to yield top 5 countries based on user counts

query_top_5_user_count = vivino_engine.execute(
    """SELECT countries.name AS country_name,
    countries.users_count AS user_count
    FROM countries
    ORDER BY user_count DESC
    LIMIT 5;
    """
)

# initialize top_5_user_count_df
fetch_query_top_5_user_count = query_top_5_user_count.fetchall()

top_5_user_count_df = pd.DataFrame(
    fetch_query_top_5_user_count,
    columns=query_top_5_user_count.keys()
)

# display top_5_user_count_df
display(top_5_user_count_df)

2023-08-30 08:46:58,419 INFO sqlalchemy.engine.Engine SELECT countries.name AS country_name,
    countries.users_count AS user_count
    FROM countries
    ORDER BY user_count DESC
    LIMIT 5;
    
2023-08-30 08:46:58,420 INFO sqlalchemy.engine.Engine [raw sql] ()


Unnamed: 0,country_name,user_count
0,États-Unis,12273684
1,France,5973301
2,Italie,4270717
3,Allemagne,2549989
4,Espagne,2264396


In [22]:
# CRITERIA 3 - COUNTRIES WITH UPCOMING USER POPULARITY
query_upcoming_popular_country = vivino_engine.execute(
    """SELECT countries.name AS country_name,
    SUM(wines.ratings_count) AS ratings_count
    FROM wines
    LEFT JOIN regions ON wines.region_id = regions.id
    LEFT JOIN countries ON regions.country_code = countries.code
    GROUP BY country_name
    ORDER BY ratings_count DESC;
"""
)

# initialize upcoming_popular_country_df
fetch_query_upcoming_popular_country = query_upcoming_popular_country.fetchall()

upcoming_popular_country_df = pd.DataFrame(
    fetch_query_upcoming_popular_country,
    columns=query_upcoming_popular_country.keys()
)

# display upcoming_popular_country_df
display(upcoming_popular_country_df)


2023-08-30 09:33:38,681 INFO sqlalchemy.engine.Engine SELECT countries.name AS country_name,
    SUM(wines.ratings_count) AS ratings_count
    FROM wines
    LEFT JOIN regions ON wines.region_id = regions.id
    LEFT JOIN countries ON regions.country_code = countries.code
    GROUP BY country_name
    ORDER BY ratings_count DESC;

2023-08-30 09:33:38,682 INFO sqlalchemy.engine.Engine [raw sql] ()


Unnamed: 0,country_name,ratings_count
0,Italie,2135839
1,France,2124809
2,États-Unis,834263
3,Espagne,746710
4,Argentine,283673
5,Chili,159256
6,Portugal,62454
7,Afrique du Sud,59995
8,Australie,52138
9,Hongrie,30564


Based on the information above, we suggest to focus the marketing resources on establishing customer base in these following countries 
 1. Italy (listed as Italie)
 2. France (listed as France)
 3. Spain (listed as Espagne)
 4. United States of America (listed as États-Unis)
 5. Germany (listed as Allemagne)

 The first 3 countries are countries from which the wines are highly sought after by customers in general on the basis of customer engagement of the respective countries, which is measured through ratings count. On top of that, they are also countries with a substantial amount of customers as well. 
 The last 2 countries are countries from which the wines are gaining more popularity. However, they are countries with a significant amount of customers who uses vivino. 
 