# Connection to MySQL Database

In [10]:
import mysql.connector
import pandas as pd
from IPython.display import display

# Establish a connection to the MySQL server
connection = mysql.connector.connect(
    host="localhost",
    user="root",
    password="rootroot"
)

# Create a cursor object
cursor = connection.cursor()

# Execute a query to create the database if it does not exist
cursor.execute("CREATE DATABASE IF NOT EXISTS wines")

# Use the newly created database
cursor.execute("USE wines")

# Execute a query to create the Countries table
cursor.execute("""
CREATE TABLE IF NOT EXISTS Countries (
    country_id INT AUTO_INCREMENT PRIMARY KEY,
    country_name VARCHAR(100) NOT NULL,
    country_code CHAR(3) NOT NULL,
    latitude DECIMAL(10,6) NOT NULL,
    longitude DECIMAL(10,6) NOT NULL
)
""")

# Execute a query to create the Products table
cursor.execute("""
CREATE TABLE IF NOT EXISTS Products (
    product_id INT AUTO_INCREMENT PRIMARY KEY,
    name VARCHAR(255) NOT NULL,
    price DECIMAL(10,2) NOT NULL,
    quantity_in_cl INT NOT NULL,
    price_per_10cl DECIMAL(10,2) NOT NULL,
    value VARCHAR(50),
    wine_category VARCHAR(50),
    price_category VARCHAR(50),
    bio BOOLEAN NOT NULL,
    non_alcoholic BOOLEAN NOT NULL,
    premium BOOLEAN NOT NULL,
    country_id INT NOT NULL,
    FOREIGN KEY (country_id) REFERENCES Countries(country_id)
)
""")

# Execute a query to create the Consumption table
cursor.execute("""
CREATE TABLE IF NOT EXISTS Consumption (
    consumption_id INT AUTO_INCREMENT PRIMARY KEY,
    country_id INT NOT NULL,
    consumption_in_l DECIMAL(10,2) NOT NULL,
    FOREIGN KEY (country_id) REFERENCES Countries(country_id)
)
""")

# Execute a query to create the ProductSources table
cursor.execute("""
CREATE TABLE IF NOT EXISTS ProductSources (
    source_id INT AUTO_INCREMENT PRIMARY KEY,
    product_id INT NOT NULL,
    web_scraper_order VARCHAR(50) NOT NULL,
    web_scraper_start_url TEXT NOT NULL,
    FOREIGN KEY (product_id) REFERENCES Products(product_id)
)
""")

# Close the cursor and connection
cursor.close()
connection.close()

# Import Data into Database

In [11]:
# Read the CSV file
wine_data = pd.read_csv('../data/enriched/wine_data_enriched.csv')

# Establish a connection to the MySQL server
connection = mysql.connector.connect(
    host="localhost",
    user="root",
    password="rootroot",
    database="wines"
)

cursor = connection.cursor()

try:
    for _, row in wine_data.iterrows():
        # Insert into Countries
        cursor.execute("""
            INSERT INTO Countries (country_name, country_code, latitude, longitude)
            VALUES (%s, %s, %s, %s)
            ON DUPLICATE KEY UPDATE country_id=LAST_INSERT_ID(country_id)
        """, (row['country'], row['country_code'], row['latitude'], row['longitude']))
        country_id = cursor.lastrowid

        # Insert into Products
        cursor.execute("""
            INSERT INTO Products (name, price, quantity_in_cl, price_per_10cl, value, wine_category, 
                       price_category, bio, non_alcoholic, premium, country_id)
            VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
            ON DUPLICATE KEY UPDATE product_id=LAST_INSERT_ID(product_id)
        """, (row['name'], row['price'], row['quantity_in_cl'], row['price_per_10cl'], row['value'], row['wine_category'], 
              row['price_category'], row['bio'], row['non_alcoholic'], row['premium'], country_id))
        product_id = cursor.lastrowid

        # Insert into Consumption
        cursor.execute("""
            INSERT INTO Consumption (country_id, consumption_in_l)
            VALUES (%s, %s)
            ON DUPLICATE KEY UPDATE consumption_in_l = VALUES(consumption_in_l)
        """, (country_id, row['consumption_in_l']))

        # Insert into ProductSources
        cursor.execute("""
            INSERT INTO ProductSources (product_id, web_scraper_order, web_scraper_start_url)
            VALUES (%s, %s, %s)
        """, (product_id, row['web-scraper-order'], row['web-scraper-start-url']))

    # Commit transaction
    connection.commit()
except mysql.connector.Error as err:
    print(f"Error: {err}")
    connection.rollback()
finally:
    cursor.close()
    connection.close()

# Read Data from Database

In [13]:
# Establish a connection to the MySQL server
connection = mysql.connector.connect(
    host="localhost",
    user="root",
    password="rootroot",
    database="wines"
)

# Create a cursor object
cursor = connection.cursor()

# Function to read a table into a DataFrame
def read_table(table_name):
    query = f"SELECT * FROM {table_name}"
    cursor.execute(query)
    columns = [col[0] for col in cursor.description]
    data = cursor.fetchall()
    return pd.DataFrame(data, columns=columns)

# Read all tables
countries_df = read_table("Countries")
products_df = read_table("Products")
consumption_df = read_table("Consumption")
product_sources_df = read_table("ProductSources")

# Close the cursor and connection
cursor.close()
connection.close()

# Merge the dataframes
merged_df = products_df.merge(countries_df, on='country_id', how='left')
merged_df = merged_df.merge(consumption_df, on='country_id', how='left')
merged_df = merged_df.merge(product_sources_df, on='product_id', how='left')

# Display the merged dataframe
display(merged_df)

Unnamed: 0,product_id,name,price,quantity_in_cl,price_per_10cl,value,wine_category,price_category,bio,non_alcoholic,...,country_id,country_name,country_code,latitude,longitude,consumption_id,consumption_in_l,source_id,web_scraper_order,web_scraper_start_url
0,1,"NATURAPLAN BIO-PROSECCO DOC RAPHAEL DAL BO, EX...",11.95,75,1.59,1.59/10cl,SCHAUMWEIN,Mid-Range,1,0,...,1,Italien,ITA,41.871940,12.567380,1,4.83,1,1734709733-1,https://www.coop.ch/de/weine/alle-weine/c/m_25...
1,2,ZÜRICH AOC STAATSSCHREIBER CUVÉE BLANC PRESTIGE,12.70,75,1.69,1.69/10cl,WEISSWEIN,Mid-Range,0,0,...,2,Schweiz,CHE,46.818188,8.227512,2,4.35,2,1734709733-3,https://www.coop.ch/de/weine/alle-weine/c/m_25...
2,3,PROSECCO SUPERIORE DI VALDOBBIADENE CONEGLIANO...,10.95,75,1.46,1.46/10cl,SCHAUMWEIN,Mid-Range,0,0,...,3,Italien,ITA,41.871940,12.567380,3,4.83,3,1734709733-4,https://www.coop.ch/de/weine/alle-weine/c/m_25...
3,4,AIGLE LES MURAILLES CHABLAIS AOC H. BADOUX,22.50,70,3.21,3.21/10cl,UNCATEGORIZED,Mid-Range,0,0,...,4,Schweiz,CHE,46.818188,8.227512,4,4.35,4,1734709733-5,https://www.coop.ch/de/weine/alle-weine/c/m_25...
4,5,"CHAMPAGNE AOC CHARLES BERTIN, BRUT",19.50,75,2.60,2.60/10cl,SCHAUMWEIN,Mid-Range,0,0,...,5,Frankreich,FRA,46.227638,2.213749,5,6.44,5,1734709733-6,https://www.coop.ch/de/weine/alle-weine/c/m_25...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2155,2156,BOURGOGNE AOC CHARDONNAY LOUIS JADOT,18.95,75,2.53,2.53/10cl,WEISSWEIN,Mid-Range,0,0,...,2156,Frankreich,FRA,46.227638,2.213749,2156,6.44,2156,1734709816-576,https://www.coop.ch/de/weine/alle-weine/c/m_25...
2156,2157,VALAIS AOC ROSÉ OEIL DE PERDRIX LE ROSEL 6X 75CL,42.60,6,0.95,0.95/10cl,ROSÉWEIN,Premium,0,0,...,2157,Schweiz,CHE,46.818188,8.227512,2157,4.35,2157,1734709816-577,https://www.coop.ch/de/weine/alle-weine/c/m_25...
2157,2158,LEGARIS CRIANZA RIBERA DEL DUERO DO,25.50,75,3.40,3.40/10cl,UNCATEGORIZED,Mid-Range,0,0,...,2158,Spanien,ESP,40.463667,-3.749220,2158,3.52,2158,1734709816-578,https://www.coop.ch/de/weine/alle-weine/c/m_25...
2158,2159,RIPASSO DELLA VALPOLICELLA DOC SUPERIORE VIGNE...,9.85,75,1.31,1.31/10cl,UNCATEGORIZED,Budget,0,0,...,2159,Italien,ITA,41.871940,12.567380,2159,4.83,2159,1734709816-579,https://www.coop.ch/de/weine/alle-weine/c/m_25...
