In [7]:
import sqlite3
import pandas as pd

In [69]:
order_product = pd.read_csv('Daten/order_products_denormalized.csv')
orders = pd.read_parquet('Daten/orders.parquet')
tips = pd.read_csv('Daten/tips_public.csv')

In [19]:
conn = sqlite3.connect('dabi2_projekt.db')
cursor = conn.cursor()

## Creating the tables for the SQLite Database

In [97]:
cursor.execute('DROP TABLE IF EXISTS user')

cursor.execute('''
    CREATE TABLE IF NOT EXISTS user (
        user_id INTEGER PRIMARY KEY
    )
''')

cursor.execute("DROP TABLE IF EXISTS orders")

cursor.execute('''
    CREATE TABLE IF NOT EXISTS orders (
        order_id INTEGER PRIMARY KEY,
        date DATE,
        tip INTEGER,
        user_id INTEGER,       
        FOREIGN KEY (user_id) REFERENCES user(user_id)
    )
''')

cursor.execute('DROP TABLE IF EXISTS department')

cursor.execute('''
    CREATE TABLE IF NOT EXISTS department (
        department_id INTEGER PRIMARY KEY,
        department_name VARCHAR(160)
    )
''')

cursor.execute("DROP TABLE IF EXISTS aisle")

cursor.execute('''
    CREATE TABLE IF NOT EXISTS aisle (
        aisle_id INTEGER PRIMARY KEY,
        aisle_name VARCHAR(160),
        department_id INTEGER,
        FOREIGN KEY (department_id) REFERENCES department(department_id)
    )
''')

cursor.execute('DROP TABLE IF EXISTS product')

cursor.execute('''
    CREATE TABLE IF NOT EXISTS product (
        product_id INTEGER PRIMARY KEY,
        product_name VARCHAR(160),
        aisle_id INTEGER,
        FOREIGN KEY (aisle_id) REFERENCES aisle(aisle_id)
    )
''')

cursor.execute('DROP TABLE IF EXISTS invoice')

cursor.execute('''
    CREATE TABLE IF NOT EXISTS invoice (
        order_id INTEGER,
        product_id INTEGER,
        add_to_cart_order,
        FOREIGN KEY (order_id) REFERENCES orders(order_id),
        FOREIGN KEY (product_id) REFERENCES product(product_id)
    )
''')

conn.commit()
#conn.close()


## Extracting all relevant information for each table and creating new DFs

In [89]:
m_tip_order = pd.merge(orders, tips, on= 'order_id', how= 'left')

In [98]:
aisles = order_product[['aisle_id', 'aisle', 'department_id']].drop_duplicates(subset=['aisle_id']).rename(columns={'aisle': 'aisle_name'})
department = order_product[['department_id', 'department']].drop_duplicates(subset=['department_id']).rename(columns={'department': 'department_name'})
product = order_product[['product_id', 'product_name', 'aisle_id']].drop_duplicates(subset=['product_id'])
user = orders[['user_id']].drop_duplicates(subset=['user_id'])
order = m_tip_order[['order_id', 'order_date', 'user_id', 'tip']].drop_duplicates(subset=['order_id']).rename(columns={'order_date': 'date'})
invoice = order_product[['order_id', 'product_id', 'add_to_cart_order']]

## Loading all the Data from the new DFs into the tables

In [99]:
aisles.to_sql('aisle', conn, if_exists='append', index=False)
department.to_sql('department', conn, if_exists='append', index=False)
product.to_sql('product', conn, if_exists='append', index=False)
user.to_sql('user', conn, if_exists='append', index=False)
order.to_sql('orders', conn, if_exists='append', index=False)
invoice.to_sql('invoice', conn, if_exists='append', index=False)

14857353

## Check to see if the tables have been created correctly

In [105]:
# Abfrage der Spalteninformationen für die Tabelle 'invoice'
cursor.execute("PRAGMA table_info(invoice);")
columns = cursor.fetchall()

# Spaltennamen und Primärschlüssel anzeigen
print("Spalteninformationen für Tabelle 'invoice':")
for column in columns:
    print(f"Spalte: {column[1]}, Typ: {column[2]}, Primärschlüssel: {column[5]}")

# Abfrage der Fremdschlüssel für die Tabelle 'invoice'
cursor.execute("PRAGMA foreign_key_list(invoice);")
foreign_keys = cursor.fetchall()

# Fremdschlüssel anzeigen
print("\nFremdschlüssel für Tabelle 'invoice':")
if foreign_keys:
    for fk in foreign_keys:
        print(f"Spalte: {fk[3]}, Referenziert Tabelle: {fk[2]}, Referenzierte Spalte: {fk[4]}")
else:
    print("Keine Fremdschlüssel definiert.")


Spalteninformationen für Tabelle 'invoice':
Spalte: order_id, Typ: INTEGER, Primärschlüssel: 0
Spalte: product_id, Typ: INTEGER, Primärschlüssel: 0
Spalte: add_to_cart_order, Typ: , Primärschlüssel: 0

Fremdschlüssel für Tabelle 'invoice':
Spalte: product_id, Referenziert Tabelle: product, Referenzierte Spalte: product_id
Spalte: order_id, Referenziert Tabelle: orders, Referenzierte Spalte: order_id


## Check to see if the data was loaded correctly into the tables

In [88]:
cursor.execute("SELECT * FROM user")
rows = cursor.fetchall()

print("Spalten:", columns)
for row in rows:
    print(row)

Spalten: [(0, 'order_id', 'INTEGER', 0, None, 1), (1, 'date', 'DATE', 0, None, 0), (2, 'user_id', 'INTEGER', 0, None, 0)]
(3,)
(5,)
(6,)
(13,)
(15,)
(18,)
(19,)
(27,)
(29,)
(31,)
(36,)
(40,)
(42,)
(43,)
(47,)
(48,)
(50,)
(51,)
(53,)
(54,)
(56,)
(57,)
(63,)
(64,)
(67,)
(69,)
(71,)
(75,)
(76,)
(80,)
(88,)
(89,)
(90,)
(103,)
(104,)
(106,)
(112,)
(114,)
(116,)
(122,)
(124,)
(132,)
(133,)
(135,)
(136,)
(140,)
(141,)
(143,)
(144,)
(146,)
(151,)
(152,)
(153,)
(156,)
(157,)
(160,)
(162,)
(163,)
(171,)
(172,)
(175,)
(177,)
(186,)
(188,)
(191,)
(195,)
(197,)
(201,)
(204,)
(210,)
(211,)
(221,)
(222,)
(223,)
(224,)
(228,)
(231,)
(232,)
(233,)
(234,)
(236,)
(238,)
(248,)
(249,)
(254,)
(255,)
(257,)
(258,)
(261,)
(262,)
(263,)
(278,)
(281,)
(286,)
(290,)
(291,)
(295,)
(296,)
(298,)
(301,)
(303,)
(304,)
(307,)
(309,)
(310,)
(313,)
(315,)
(316,)
(318,)
(319,)
(320,)
(321,)
(323,)
(326,)
(328,)
(332,)
(336,)
(339,)
(340,)
(347,)
(350,)
(357,)
(360,)
(365,)
(367,)
(371,)
(387,)
(391,)
(397,)
(398,)
(399

In [None]:
conn.close()