# Connection to MySQL Database

In [1]:
import mysql.connector

# Establish a connection to the MySQL server
connection = mysql.connector.connect(
    host="localhost",
    user="root",
    password="rootroot"
)

# Create a cursor object
cursor = connection.cursor()

# Execute a query to create the database if it does not exist
cursor.execute("CREATE DATABASE IF NOT EXISTS wines")

# Use the newly created database
cursor.execute("USE wines")

# Execute a query to create the Countries table
cursor.execute("""
CREATE TABLE IF NOT EXISTS Countries (
    country_id INT AUTO_INCREMENT PRIMARY KEY,
    country_name VARCHAR(100) NOT NULL,
    country_code CHAR(3) NOT NULL,
    latitude DECIMAL(10,6) NOT NULL,
    longitude DECIMAL(10,6) NOT NULL
)
""")

# Execute a query to create the Products table
cursor.execute("""
CREATE TABLE IF NOT EXISTS Products (
    product_id INT AUTO_INCREMENT PRIMARY KEY,
    name VARCHAR(255) NOT NULL,
    price DECIMAL(10,2) NOT NULL,
    quantity_in_cl INT NOT NULL,
    price_per_10cl DECIMAL(10,2) NOT NULL,
    value VARCHAR(50),
    wine_category VARCHAR(50),
    price_category VARCHAR(50),
    bio BOOLEAN NOT NULL,
    non_alcoholic BOOLEAN NOT NULL,
    premium BOOLEAN NOT NULL,
    country_id INT NOT NULL,
    FOREIGN KEY (country_id) REFERENCES Countries(country_id)
)
""")

# Execute a query to create the Consumption table
cursor.execute("""
CREATE TABLE IF NOT EXISTS Consumption (
    consumption_id INT AUTO_INCREMENT PRIMARY KEY,
    country_id INT NOT NULL,
    consumption_in_l DECIMAL(10,2) NOT NULL,
    FOREIGN KEY (country_id) REFERENCES Countries(country_id)
)
""")

# Execute a query to create the ProductSources table
cursor.execute("""
CREATE TABLE IF NOT EXISTS ProductSources (
    source_id INT AUTO_INCREMENT PRIMARY KEY,
    product_id INT NOT NULL,
    web_scraper_order VARCHAR(50) NOT NULL,
    web_scraper_start_url TEXT NOT NULL,
    FOREIGN KEY (product_id) REFERENCES Products(product_id)
)
""")

# Close the cursor and connection
cursor.close()
connection.close()

# Import Data into Database

In [2]:
import pandas as pd
import mysql.connector

# Read the CSV file
wine_data = pd.read_csv('../data/enriched/wine_data_enriched.csv')

# Establish a connection to the MySQL server
connection = mysql.connector.connect(
    host="localhost",
    user="root",
    password="rootroot",
    database="wines"
)

cursor = connection.cursor()

try:
    for _, row in wine_data.iterrows():
        # Insert into Countries
        cursor.execute("""
            INSERT INTO Countries (country_name, country_code, latitude, longitude)
            VALUES (%s, %s, %s, %s)
            ON DUPLICATE KEY UPDATE country_id=LAST_INSERT_ID(country_id)
        """, (row['country'], row['country_code'], row['latitude'], row['longitude']))
        country_id = cursor.lastrowid

        # Insert into Products
        cursor.execute("""
            INSERT INTO Products (name, price, quantity_in_cl, price_per_10cl, value, wine_category, price_category, bio, non_alcoholic, premium, country_id)
            VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
            ON DUPLICATE KEY UPDATE product_id=LAST_INSERT_ID(product_id)
        """, (row['name'], row['price'], row['quantity_in_cl'], row['price_per_10cl'], row['value'], row['wine_category'], row['price_category'], row['bio'], row['non_alcoholic'], row['premium'], country_id))
        product_id = cursor.lastrowid

        # Insert into Consumption
        cursor.execute("""
            INSERT INTO Consumption (country_id, consumption_in_l)
            VALUES (%s, %s)
            ON DUPLICATE KEY UPDATE consumption_in_l = VALUES(consumption_in_l)
        """, (country_id, row['consumption_in_l']))

        # Insert into ProductSources
        cursor.execute("""
            INSERT INTO ProductSources (product_id, web_scraper_order, web_scraper_start_url)
            VALUES (%s, %s, %s)
        """, (product_id, row['web-scraper-order'], row['web-scraper-start-url']))

    # Commit transaction
    connection.commit()
except mysql.connector.Error as err:
    print(f"Error: {err}")
    connection.rollback()
finally:
    cursor.close()
    connection.close()

# Read Data from Database

In [3]:
import pandas as pd

import mysql.connector

# Establish a connection to the MySQL server
connection = mysql.connector.connect(
    host="localhost",
    user="root",
    password="rootroot",
    database="wines"
)

# Create a cursor object
cursor = connection.cursor()

# Function to read a table into a DataFrame
def read_table(table_name):
    query = f"SELECT * FROM {table_name}"
    cursor.execute(query)
    columns = [col[0] for col in cursor.description]
    data = cursor.fetchall()
    return pd.DataFrame(data, columns=columns)

# Read all tables
countries_df = read_table("Countries")
products_df = read_table("Products")
consumption_df = read_table("Consumption")
product_sources_df = read_table("ProductSources")

# Close the cursor and connection
cursor.close()
connection.close()

# Merge the dataframes
merged_df = products_df.merge(countries_df, on='country_id', how='left')
merged_df = merged_df.merge(consumption_df, on='country_id', how='left')
merged_df = merged_df.merge(product_sources_df, on='product_id', how='left')

# Display the merged dataframe
print(merged_df)

     product_id                                               name  price  \
0             1  NATURAPLAN BIO-PROSECCO DOC RAPHAEL DAL BO, EX...  11.95   
1             2    ZÜRICH AOC STAATSSCHREIBER CUVÉE BLANC PRESTIGE  12.70   
2             3  PROSECCO SUPERIORE DI VALDOBBIADENE CONEGLIANO...  10.95   
3             4         AIGLE LES MURAILLES CHABLAIS AOC H. BADOUX  22.50   
4             5                 CHAMPAGNE AOC CHARLES BERTIN, BRUT  19.50   
..          ...                                                ...    ...   
535         536               BOURGOGNE AOC CHARDONNAY LOUIS JADOT  18.95   
536         537  VALAIS AOC ROSÉ OEIL DE PERDRIX LE ROSEL 6X  75CL  42.60   
537         538                LEGARIS CRIANZA RIBERA DEL DUERO DO  25.50   
538         539  RIPASSO DELLA VALPOLICELLA DOC SUPERIORE VIGNE...   9.85   
539         540   VINO NOBILE DI MONTEPULCIANO DOCG TENUTA TREROSE  16.95   

     quantity_in_cl price_per_10cl      value  wine_category price_category