In [18]:
from urllib.parse import quote_plus
import mysql.connector
import pandas as pd
from sqlalchemy import create_engine

In [19]:
def connection(hostname, username, password, dbname):
    # Initialize cinnection to None
    con = None

    # Encode the password
    password = quote_plus(password)

    # Create connection while checking for any errors
    try:
        con = mysql.connector.connect(hostname,username,password,dbname)
        print("Connection successful!")
    except Error as e: # type: ignore
        print(f"The error {e} has occured.")
    
    return con

def engine(hostname, username, password, dbname, port):
    # Create engine
    eng = create_engine(f'mysql+pymysql://{username}:{password}@{hostname}:{port}/{dbname}')
    return eng

In [21]:
def csv_to_staging(datafile):
    # Create engine using the pre defined engine function
    eng = engine("localhost", "root", "Layaldbroot1997", "feedback_source", "3306")

    # Load csv to dataframe
    df = pd.read_csv(datafile, sep=', ', engine='python')

    # Load dataframe to staging schema
    df.to_sql(name="feedback",con=eng, schema="feedback_source",if_exists="replace",index = False)

csv_to_staging("Data/sentiment-analysis.csv")

def extract_source():
    # Create connection and cursor
    conn = connection("localhost", "root", "Layaldbroot1997", "feedback_source")
    cursor = conn.cursor()

    # The query extracts all data in the staging schema
    query = "Select * from feedback_source.feedback"
    cursor.execute(query)
    data = cursor.fetchall()

    # Define columns names and load cursor data to a dataframe
    column_names = [i[0] for i in cursor.description]
    df = pd.DataFrame(data, columns=column_names)
    
    return df

In [None]:
def transform_feedback():
    # Extract the data from staging area
    df = extract_source()

    # Take a copy of the df
    df = df.copy()

    # Drop dupicates
    df =  df.drop_duplicates()

    # Rename columns properly
    df.rename({'"Text': 'Text', 'Confidence Score"': 'Confidence Score'}, inplace=True)

    # Remove characters from values
    df.loc[:,'"Text'] = df['"Text'].str.replace(r'\"', '', regex=True)
    df.loc[:,'Confidence Score"'] = df['Confidence Score"'].str.replace(r'\"', '', regex=True)
    df.loc[:,'User ID'] = df['User ID'].str.replace(r'@', '', regex=True)

    # Change some data types
    df['Date/Time'] = pd.to_datetime(df['Date/Time'])
    df['Confidence Score'] = df['Confidence Score'].astype(dtype='double')

    return df