# Citibike Project - Creating the SQL Database

Creating the SQL database from the dataframes in `03_normalizing_datasets.ipynb`.

Make sure to activate an environment that has the `mysql` package installed.

In [1]:
import mysql.connector
import os
import pandas as pd
import pyarrow.parquet as pq

from sqlalchemy import create_engine

`biketype` table has:
- classic bike
- electric bike
- docked bike

First, a handy function to check if a table already exists:

In [16]:
def mysql_table_checker(\
                       your_host:str,
                       your_user:str,
                       your_password:str,
                       your_database:str,
                       your_table_name:str,
                       verbose=False):

    # establish a connection to the MySQL database
    conn = mysql.connector.connect(
        host=your_host,
        user=your_user,
        password=your_password,
        database=your_database
    )

    # create a cursor object to execute SQL queries
    cursor = conn.cursor()

    # table name you want to check
    table_name = 'your_table_name'

    # SQL query to check if the table exists
    check_table_query = f"SHOW TABLES LIKE '{your_table_name}'"
    cursor.execute(check_table_query)

    # fetch the result of the query
    result = cursor.fetchone()

    flag=0

    # check if the result is not None, which means the table exists
    if result is not None:
        exists=(f"Table '{your_table_name}' exists in the database {your_database}.")
    else:
        notexists=(f"Table '{your_table_name}' does not exist in the database {your_database}.")
        flag=1
        
    # close the connection
    cursor.close()
    conn.close()

    if flag == 0:
        if verbose == True:
            return exists
        else:
            return True
    else:
        if verbose == True:
            return notexists
        else:
            return False

In [17]:
# test the function
mysql_table_checker(your_host='localhost',
                   your_user='root',
                   your_password='rootroot',
                   your_database='citibike_project',
                   your_table_name='biketype')

True

### Initialize MySQL Database

We'll call the database `citibike_project`

In [18]:
database_name='citibike_project'

In [19]:
flow_control='/Users/sra/files/projects/citibike_project/combined/group1_combined/flow_control/database_made'
flag=False

if not os.path.exists(flow_control):
    os.mkdir(flow_control)
    flag=True

if flag:
    
    # Connect to the MySQL server
    conn = mysql.connector.connect(
        host='localhost',
        user='root',  # Replace with your MySQL username
        password='rootroot'  # Replace with your MySQL password
    )

    # Create a cursor object to execute SQL queries
    cursor = conn.cursor()

    # Create the "citibike_project" database
    database_name = 'citibike_project'
    create_db_query = f"CREATE DATABASE IF NOT EXISTS {database_name}"
    cursor.execute(create_db_query)

    # Close the cursor and connection
    cursor.close()
    conn.close()

    print(f"Database '{database_name}' created successfully.")
    
else:
    
    print(f"Error: Database '{database_name}' is already created.")

Error: Database 'citibike_project' is already created.


### Biketype Table:

In [20]:
flow_control='/Users/sra/files/projects/citibike_project/combined/group1_combined/flow_control'

if not os.path.exists(flow_control):
    os.mkdir(flow_control)

table_name='biketype'
    
flow_control='/Users/sra/files/projects/citibike_project/combined/group1_combined/flow_control/biketype_made'
flag=False

if not os.path.exists(flow_control):
    os.mkdir(flow_control)
    flag=True

if flag:
    # Establish a connection to the MySQL database
    conn = mysql.connector.connect(
        host='localhost',
        user='root',
        password='rootroot',
        database='citibike_project'
    )

    # Create a cursor object to execute SQL queries
    cursor = conn.cursor()

    # Create the table in the MySQL database with the desired schema
    create_table_query = f'''
        CREATE TABLE IF NOT EXISTS {table_name} (
            id TINYINT,
            type VARCHAR(255)
        )
    '''
    cursor.execute(create_table_query)

    # Define the data as a list of tuples or dictionaries
    data = [
        ('0', 'classic_bike'),
        ('1', 'electric_bike'),
        ('2', 'docked_bike')
    ]

    # Insert the data into the table
    insert_query = f'''
        INSERT INTO {table_name} (id, type)
        VALUES (%s, %s)
    '''

    cursor.executemany(insert_query, data)

    # Commit the changes and close the connection
    conn.commit()
    conn.close()
    
    print(f"Table '{table_name}' created successfully.")
    
else:
    
    print(f"Error: Table '{table_name}' is already created.")

Error: Table 'biketype' is already created.


### Ridertype Table:

In [21]:
table_name='ridertype'

flow_control='/Users/sra/files/projects/citibike_project/combined/group1_combined/flow_control/ridertype_made'
flag=False

if not os.path.exists(flow_control):
    os.mkdir(flow_control)
    flag=True

if flag:
    # Establish a connection to the MySQL database
    conn = mysql.connector.connect(
        host='localhost',
        user='root',
        password='rootroot',
        database='citibike_project'
    )

    # Create a cursor object to execute SQL queries
    cursor = conn.cursor()

    # Create the table in the MySQL database with the desired schema
    create_table_query = f'''
        CREATE TABLE IF NOT EXISTS {table_name} (
            id TINYINT,
            type VARCHAR(255)
        )
    '''
    cursor.execute(create_table_query)

    # Define the data as a list of tuples or dictionaries
    data = [
        ('0', 'member'),
        ('1', 'casual')
    ]

    # Insert the data into the table
    insert_query = f'''
        INSERT INTO {table_name} (id, type)
        VALUES (%s, %s)
    '''

    cursor.executemany(insert_query, data)

    # Commit the changes and close the connection
    conn.commit()
    conn.close()
    
    print(f"Table '{table_name}' created successfully.")
        
else:
    
    print(f"Error: Table '{table_name}' is already created.")

Error: Table 'ridertype' is already created.


### Ride Table:

The following code will take the data from `group1_ridenorm.parquet` and put it into an SQL table.

**Note**: This will take a long time as the code has to convert the `parquet` file to `pandas` before moving it to SQL.

The kernel kept crashing when running the code, getting caught on the step, "writing the DataFrame to a MySQL table using SQLAlchemy". Because of this, I will import the data manually and not use the code below.

In [2]:
start_this_code=False

if start_this_code:

    table_name='rides'
    your_host='localhost'
    your_username='root'
    your_password='rootroot'
    your_database='citibike_project'

    flow_control=f'/Users/sra/files/projects/citibike_project/combined/group1_combined/flow_control/{table_name}_made'
    flag=False

    if not os.path.exists(flow_control):
        os.mkdir(flow_control)
        flag=True

    if flag:

        # load the Parquet data into a pandas DataFrame
        parquet_file = '/Users/sra/files/projects/citibike_project/combined/group1_combined/group1_pl_ridenorm.parquet'
        parquet_table = pq.read_table(parquet_file)
        print('read parquet table')

        df = parquet_table.to_pandas()
        print('parquet table is now in pandas')

        # connect to the MySQL database using SQLAlchemy engine
        engine = create_engine(f'mysql+mysqlconnector://{your_username}:{your_password}@{your_host}/{your_database}')
        print('engine is created')

        # write the DataFrame to a MySQL table using SQLAlchemy
        df.to_sql(name=table_name, con=engine, if_exists='replace', index=False)
        print('written table to MySQL')

        print("Created table '{table_name}' successfully.")
        
else:
    
    print(f'start_this_code={start_this_code}. The code was not run.')

start_this_code=False. The code was not run.


Save the `.parquet` files as `.CSV` to upload them to MySQLWorkbench:

In [5]:
# group1_ridenorm

parquet_file = '/Users/sra/files/projects/citibike_project/combined/group1_combined/group1_pl_ridenorm.parquet'
parquet_table = pq.read_table(parquet_file)
df = parquet_table.to_pandas()
print('ridenorm: parquet table to pandas done')

csv_file = '/Users/sra/files/projects/citibike_project/combined/group1_combined/group1_pl_ridenorm.csv'
df.to_csv(csv_file, index=False)
print('ridenorm: pandas table to csv done')

# # #

# group1_stationnorm

parquet_file = '/Users/sra/files/projects/citibike_project/combined/group1_combined/group1_pl_stationnorm.parquet'
parquet_table = pq.read_table(parquet_file)
df = parquet_table.to_pandas()
print('stationnorm: parquet table to pandas done')

csv_file = '/Users/sra/files/projects/citibike_project/combined/group1_combined/group1_pl_stationnorm.csv'
df.to_csv(csv_file, index=False)
print('stationnorm: pandas table to csv done')

ridenorm: parquet table to pandas done
ridenorm: pandas table to csv done
stationnorm: parquet table to pandas done
stationnorm: pandas table to csv done
