In [100]:
import pandas as pd
import re
import mysql.connector as mysql

# Import data

In [101]:
df = pd.read_csv('mpg.csv')

In [102]:
def mpg_kml(x):
    return round(x * 0.425144,2)


# Cleaning Data

In [103]:
df['cty_kml'] = df['cty'].apply(mpg_kml)

In [104]:
df['trans'] = df['trans'].apply(lambda x:  'auto' if re.search(r'\b[Aa]',x) else 'manual' )

In [105]:
df['drv'].unique()

array(['f', '4', 'r'], dtype=object)

In [106]:
df.drv = df.drv.map({'f': 'FRONT', 
                    '4': '4WD', 
                    'r':'READ'})

# Create connection with MySQL database

In [107]:
def create_table(host: str, user: str, password: str, database_name: str):
    try:
        with mysql.connect(
            host=host,
            user=user,
            password=password,
            database=database_name
        ) as con:
            if con.is_connected():
                print("Connection to DIT_EPD successfully, \nWELCOME TO EPD DATABASE!")

                with con.cursor() as cur:
                    sql_cmd = """
                    CREATE TABLE mpg (
                        manufacturer VARCHAR(255),
                        model VARCHAR(255),
                        displ FLOAT,
                        year INT,
                        cyl INT,
                        trans VARCHAR(255),
                        drv VARCHAR(10),
                        cty INT,
                        hwy INT,
                        fl VARCHAR(10),
                        class VARCHAR(255),
                        cty_kml FLOAT
                    );
                    """
                    cur.execute(sql_cmd)
                    con.commit()

    except Exception as e:
        print(f'Error => {e}')

In [108]:
def insert_data(host: str, user: str, password: str, database_name: str, df):
    try:
        # Establish connection to MySQL database
        with mysql.connect(
            host=host,
            user=user,
            password=password,
            database=database_name
        ) as con:
            if con.is_connected():
                print("Connection to DIT_EPD successfully, \nWELCOME TO EPD DATABASE!")

                # Iterate through each row in the DataFrame and insert into MySQL table 'mpg'
                with con.cursor() as cur:
                    sql_cmd = """
                    INSERT INTO mpg (manufacturer, model, displ, year, cyl, trans, drv, cty, hwy, fl, class, cty_kml)
                    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                    """
                    for index, row in df.iterrows():
                        values = (
                            row['manufacturer'],
                            row['model'],
                            row['displ'],
                            row['year'],
                            row['cyl'],
                            row['trans'],
                            row['drv'],
                            row['cty'],
                            row['hwy'],
                            row['fl'],
                            row['class'],
                            row['cty_kml']
                        )
                        cur.execute(sql_cmd, values)

                # Commit the transaction
                con.commit()
                print('Data has been inserted successfully')

    except mysql.Error as e:
        print(f'Error => {e}')

In [109]:
create_table('localhost','root','ditepd', 'learning')

Connection to DIT_EPD successfully, 
WELCOME TO EPD DATABASE!


In [110]:
insert_data('localhost', 'root', 'ditepd', 'learning', df)

Connection to DIT_EPD successfully, 
WELCOME TO EPD DATABASE!
Data has been inserted successfully
