In [1]:
import csv
import os
import pandas as pd
from configparser import ConfigParser
from mysql.connector import MySQLConnection, Error

In [2]:
def read_config(config_file = 'config.ini', section = 'mysql'):
    """
    Read the configuration file config_file with the given section.
    If successful, return the configuration as a dictionary,
    else raise an exception.
    """
    parser = ConfigParser()
    
    # Does the configuration file exist?
    if os.path.isfile(config_file):
        parser.read(config_file)
    else:
        raise Exception(f"Configuration file '{config_file}' "
                        "doesn't exist.")
    
    config = {}
    
    if parser.has_section(section):
        # Parse the configuration file.
        items = parser.items(section)
        
        # Construct the parameter dictionary.
        for item in items:
            config[item[0]] = item[1]
            
    else:
        raise Exception(f'Section [{section}] missing ' + \
                        f'in config file {config_file}')
    
    return config

In [3]:
def make_connection(config_file = 'config.ini', section = 'mysql'):
    """
    Make a database connection with the configuration file config_file
    with the given section. If successful, return the connection,
    else raise an exception.
    """
    try:
        db_config = read_config(config_file, section)
        conn = MySQLConnection(**db_config)

        if conn.is_connected():
            return conn

    except Error as e:
        raise Exception(f'Connection failed: {e}')

In [4]:
def dataframe_query(conn, sql):
    """
    Use the database connection conn to execute
    the SQL code. Return the resulting row count
    and the rows as a dataframe or (0, None) 
    if there were no rows. If the query failed,
    raise an exception.
    """
    try:
        cursor = conn.cursor()
        cursor.execute(sql)

        rows  = cursor.fetchall()
        count = cursor.rowcount

        if count > 0:

            # Get the names of the columns.
            columns = cursor.description
            column_names = [column_info[0] 
                            for column_info in columns]

            # Return the query results in a dataframe.
            df = DataFrame(rows)
            df.columns = column_names
            cursor.close()
            return count, df

        else:
            cursor.close()
            return 0, None
        
    except Error as e:
        raise Exception(f'Query failed: {e}')

In [5]:
conn = make_connection(config_file='movies.ini')
cursor = conn.cursor()


In [6]:
cursor.execute('DROP TABLE IF EXISTS customers')

sql = ( """
        CREATE TABLE customers
        (
            cCode    varchar(255),
            cName   varchar(255),
            cType    varchar(255),
            PRIMARY KEY(cCode)
        )
        """
      )

cursor.execute(sql)

In [7]:
cursor.execute('DROP TABLE IF EXISTS markets')

sql = ( """
        CREATE TABLE markets
        (
            mCode    varchar(255),
            mName   varchar(255),
            zone    varchar(255),
            PRIMARY KEY(mCode)
        )
        """
      )

cursor.execute(sql)

In [8]:
cursor.execute('DROP TABLE IF EXISTS products')

sql = ( """
        CREATE TABLE products
        (
            pCode    varchar(255),
            pType    varchar(255),
            PRIMARY KEY(pCode)
        )
        """
      )

cursor.execute(sql)

In [9]:
cursor.execute('DROP TABLE IF EXISTS dates')

sql = ( """
        CREATE TABLE dates
        (
            dates    datetime,
            cy_date   datetime,
            year    int,
            month   char(255), 
            date_y   varchar(255),
            PRIMARY KEY(dates)
        )
        """
      )

cursor.execute(sql)

In [10]:
cursor.execute('DROP TABLE IF EXISTS transaction')

sql = ( """
        CREATE TABLE transaction
        (
            ID_column int AUTO_INCREMENT,
            pCode    varchar(255),
            cCode   varchar(255),
            mCode    varchar(255),
            date   datetime, 
            salesQ   double,
            salesA double,
            currency char(255),
            PRIMARY KEY(ID_column)
        )
        """
      )

cursor.execute(sql)

In [11]:
cursor.execute('DROP TABLE IF EXISTS link')

sql = ( """
        CREATE TABLE link
        (
            pCode    varchar(255),
            cCode    varchar(255),
            PRIMARY KEY(pCode,cCode)
        )
        """
      )

cursor.execute(sql)

In [12]:
sql_trans = ("""
                INSERT INTO transaction(pCode, cCode,mCode,date,salesQ,salesA,currency)
                VALUES (%s,%s,%s,%s,%s,%s,%s)
                """
               )
sql_dates = ("""
                INSERT INTO dates
                VALUES (%s,%s,%s,%s,%s)
                """
               )
sql_cust = ("""
                INSERT INTO customers
                VALUES (%s,%s,%s)
                """
               )
sql_prod = ("""
                INSERT INTO products
                VALUES (%s,%s)
                """
               )
sql_mark = ("""
                INSERT INTO markets
                VALUES (%s,%s,%s)
                """
               )

# Creating Link Table for Many to Many Query
sql_link = ("""
            INSERT INTO link
            VALUES(%s, %s)""")

In [13]:
count = 0
first = True
with open('date.csv', newline='') as csv_file:
    data = csv.reader(csv_file,delimiter = ',', quotechar='"')
    for row in data:
        if not first:
            count+=1
            if count > 100:
                break
            cursor.execute(sql_dates, row)
        first = False
conn.commit()

In [14]:
count = 0
first = True
with open('transactions.csv', newline='') as csv_file:
    data = csv.reader(csv_file,delimiter = ',', quotechar='"')
    for row in data:
        if not first:
            count+=1
            if count > 100:
                break
            cursor.execute(sql_trans, row)
        first = False
conn.commit()

In [15]:
count = 0
first = True
with open('customers.csv', newline='') as csv_file:
    data = csv.reader(csv_file,delimiter = ',', quotechar='"')
    for row in data:
        if not first:
            count+=1
            if count > 100:
                break
            cursor.execute(sql_cust, row)
        first = False
conn.commit()

In [16]:
count = 0
first = True
with open('products.csv', newline='') as csv_file:
    data = csv.reader(csv_file,delimiter = ',', quotechar='"')
    for row in data:
        if not first:
            count+=1
            if count > 100:
                break
            cursor.execute(sql_prod, row)
        first = False
conn.commit()

In [17]:
count = 0
first = True
with open('markets.csv', newline='') as csv_file:
    data = csv.reader(csv_file,delimiter = ',', quotechar='"')
    for row in data:
        if not first:
            count+=1
            if count > 100:
                break
            cursor.execute(sql_mark, row)
        first = False
conn.commit()

In [18]:
# Creating Link Table for Many to Many Query
# Getting all unique product code to customer code, then putting it into a table called link
link_df = pd.read_csv('transactions.csv', usecols=['product_code', 'customer_code'])
link_df = link_df.drop_duplicates().reset_index()
for i in range(len(link_df)):
    cursor.execute(sql_link, [link_df['product_code'][i], link_df['customer_code'][i]])
conn.commit()
    

one to one


In [19]:
oto = ("""
        SELECT t.pCode, c.cCode, c.cName, t.date 
        FROM transaction as t, customers as c
        WHERE t.salesA = 41241
        and t.cCode = c.cCode""")
cursor.execute(oto)
results = cursor.fetchall()
for row in results:
    print(row)
conn.commit()

('Prod001', 'Cus001', 'Surge Stores', datetime.datetime(2017, 10, 10, 0, 0))


one to many

In [20]:
otm = ("""
        SELECT t.pCode,t.cCode, t.date
        FROM transaction as t, products as p
        WHERE p.pCode = 'Prod001'
        AND p.pCode = t.pCode""")
cursor.execute(otm)
results = cursor.fetchall()
for row in results:
    print(row)
conn.commit()

('Prod001', 'Cus001', datetime.datetime(2017, 10, 10, 0, 0))
('Prod001', 'Cus002', datetime.datetime(2018, 5, 8, 0, 0))


many to many

In [21]:
#mtm = ("""
#        SELECT t.*, c.*
#        FROM transaction as t
#        JOIN link l ON t.pCode = l.pCode
#        JOIN customers c ON l.cCode = c.cCode
mtm = ("""
        SELECT c.cCode, c.cName, t.pCode,t.date
        FROM customers c, transaction t, link l
        WHERE c.cCode = l.cCode
        AND t.pCode = l.pCode
        AND c.cName = 'Nomad Stores'""")


cursor.execute(mtm)
results = cursor.fetchall()
for row in results:
    print(row)
conn.commit()

('Cus002', 'Nomad Stores', 'Prod001', datetime.datetime(2017, 10, 10, 0, 0))
('Cus002', 'Nomad Stores', 'Prod001', datetime.datetime(2018, 5, 8, 0, 0))
