# Fake data creation

In [1]:
%pip install ipynb

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import numpy as np
import datetime
from faker import Faker
import os
import traceback
import logging
import mysql.connector
import ipynb

# Import the useful functions
from ipynb.fs.full.oceanbears_fakedata import *

2020-07-30 09:51:48.279923 2020-07-30 12:51:48.279923


### Connect the Database to Python

In [3]:
pd.DataFrame(create_person(num_rows = 10))

Unnamed: 0,person_id,p_name,p_phone_number,p_address
0,1,Kyle Spencer,452-466-6346x7747,test
1,2,Nathan Turner,+1-193-099-2840x858,test
2,3,Thomas Ayala,1792485839,test
3,4,Tammy Rogers,+1-573-751-3330x781,test
4,5,Dr. David Yang,3256410696,test
5,6,Hector Hayes,370.483.0176x604,test
6,7,Kimberly Ingram,126.113.7640x1860,test
7,8,Thomas Dunlap,(578)473-1114,test
8,9,Jeremy Allen,281.796.7375x3485,test
9,10,Nicholas Harris,+1-397-481-3995x205,test


In [4]:
# Function to automatically create a connection to the database  
def MySQL_Connection(hostname, username, password, dbname_string):
    try:
        connection = mysql.connector.connect(host = hostname,
                                             user = username,
                                             passwd = password,
                                             db = dbname_string)
        print("Connection successfully established. The tables from the database are listed below.")
        tables = pd.read_sql_query('SHOW TABLES FROM `'+ dbname_string + "`", connection)
        display(tables)
        return connection
    except Exception as e:
        print(e.__doc__)
        print(e)

In [5]:
host = '127.0.0.1'
user = 'root'
pwd = 'Database1234'
db = 'mydb'

connection = MySQL_Connection(host, user, pwd, db)

Connection successfully established. The tables from the database are listed below.


Unnamed: 0,Tables_in_mydb
0,Event
1,Event_staff
2,Gear
3,Inventory
4,Inventory_for_rent
5,Inventory_for_sell
6,Is_component_of
7,Item
8,Item_color
9,Lesson


### Generate data for each table

In [6]:
col_rows_match = {'Person': create_person(),
                  'Surf_teacher': create_surf_teacher(),
                  'Surfer': create_surfer(),
                  'Event_staff': create_event_staffer(),
                  'Other_person': create_other_person(),
                  'Item': create_item(),
                  'Paddle': create_paddle(),
                  'Gear': create_gear(),
                  'Surfboard': create_surfboard(),
                  'Wetsuit': create_wetsuit(),
                  'Other_item': create_other_item(),
                  'Shop': create_shop(),
                  'Manufacturer': create_manufacturer(),
                  'Location': create_location(),
                  'Inventory': create_inventory(),
                  'Inventory_for_rent': create_inventory_for_rent(),
                  'Inventory_for_sell': create_inventory_for_sell(),
                  'Event': create_event(),
                  'Lesson': create_lesson(),
                  'Takes_lesson': create_takes_lesson(),
                  'Registers_for_event': create_registers_for_event(),
                  #'Manufacturer_sponsors_event': create_manufacturer_sponsors_event(),
                  'Manufacturer_sponsors_teacher': create_manufacturer_sponsors_teacher(),
                  'Shop_sponsors_event': create_shop_sponsors_event(),
                  'Produces': create_produces(),
                  'Makes_transaction': create_makes_transaction(),
                  'Is_component_of': create_is_component_of(),
                  'Item_color': create_item_color(),
                  'Plant_location': create_plant_location(),
                  'Teacher_diploma': create_teacher_diploma(),
                  'Person_email_address': create_person_email_address(),
                  'Type_of_event': create_type_of_event()
                 }


In [7]:
test_person = pd.read_sql_query('SELECT * FROM Person', connection)
test_person.head()

Unnamed: 0,person_id,p_name,p_phone_number,p_address
0,1,Joshua Evans,440.618.1276x74433,test
1,2,Joseph Ellis,888-363-2403x7622,test
2,3,Robin Rodriguez,374.586.7706x19584,test
3,4,Holly Ingram,115-380-4836x47118,test
4,5,Timothy Barnes,(765)521-4160x5743,test


In [8]:
new_person = col_rows_match['Person']
# test.person.append

In [9]:
test_person = test_person.append(new_person)

In [10]:
test_person

Unnamed: 0,person_id,p_name,p_phone_number,p_address
0,1,Joshua Evans,440.618.1276x74433,test
1,2,Joseph Ellis,888-363-2403x7622,test
2,3,Robin Rodriguez,374.586.7706x19584,test
3,4,Holly Ingram,115-380-4836x47118,test
4,5,Timothy Barnes,(765)521-4160x5743,test
...,...,...,...,...
95,96,Jeffrey Mcdaniel,361-477-5611,test
96,97,Renee Pugh,(243)550-5259,test
97,98,Michael Wilson,128-259-3771x6625,test
98,99,Martin Patterson,(098)561-9648x9316,test


### Push new data to the Mysql Database

In [11]:
def DataFrame_to_MySQL(dataframe, table_name_string):
    df_columns = dataframe.columns

    insert_part1 = "INSERT INTO `"+ table_name_string +"` "
    insert_part2 = "("
    insert_part3 = "VALUES ("
    for i in range(len(df_columns)):
        if i < len(df_columns)-1:
            insert_part2 = insert_part2 + "`" + df_columns[i] + "`, "
            insert_part3 = insert_part3 + "%s, "
        else:
            insert_part2 = insert_part2 + "`" + df_columns[i] + "`) "
            insert_part3 = insert_part3 + "%s)"
            
    sql_insert_query = insert_part1 + insert_part2 +insert_part3
    print(sql_insert_query)
    # Now that we have the query to insert data to the table, we call the cursor to execute the SQL Queries in WB
    cursor = connection.cursor()
    for (index, row_series) in dataframe.iterrows():
        # Execute an INSERT query for each row in the dataframe
        cursor.execute(sql_insert_query, tuple(row_series))
    
    # Commit changes
    connection.commit()

In [12]:
def clean_tables():
    cursor = connection.cursor()
    for table in col_rows_match.keys():
        cursor.execute("SET FOREIGN_KEY_CHECKS=0")
        try:
            cursor.execute("TRUNCATE TABLE {}".format(table))
        except:
            pass
        cursor.execute("SET FOREIGN_KEY_CHECKS=1")

In [13]:
clean_tables()

In [14]:
for table in col_rows_match.keys():
    # Pull the empty table
    init_df = pd.read_sql_query('SELECT * FROM {}'.format(table), connection)
    
    # Generate new data (thanks Loulou)
    new_data = col_rows_match[table]
    
    # Append new data to original table
    new_df = init_df.append(new_data)
    
    # Commit and push it to the database
    DataFrame_to_MySQL(new_df, table)

INSERT INTO `Person` (`person_id`, `p_name`, `p_phone_number`, `p_address`) VALUES (%s, %s, %s, %s)
INSERT INTO `Surf_teacher` (`hourly_rate`, `person_id`) VALUES (%s, %s)
INSERT INTO `Surfer` (`person_id`) VALUES (%s)
INSERT INTO `Event_staff` (`role`, `person_id`) VALUES (%s, %s)
INSERT INTO `Other_person` (`type`, `person_id`) VALUES (%s, %s)
INSERT INTO `Item` (`item_id`, `i_name`, `brand`, `description`) VALUES (%s, %s, %s, %s)
INSERT INTO `Paddle` (`item_id`, `material`, `weight`, `size`) VALUES (%s, %s, %s, %s)
INSERT INTO `Gear` (`item_id`, `category`, `gender`, `size`) VALUES (%s, %s, %s, %s)
INSERT INTO `Surfboard` (`item_id`, `shape`, `dimensions`, `material`, `level`, `fins`) VALUES (%s, %s, %s, %s, %s, %s)
INSERT INTO `Wetsuit` (`item_id`, `thickness`, `size`, `gender`) VALUES (%s, %s, %s, %s)
INSERT INTO `Other_item` (`item_id`) VALUES (%s)
INSERT INTO `Shop` (`shop_id`, `s_name`, `s_address`, `s_phone_number`, `opening_time`, `closing_time`) VALUES (%s, %s, %s, %s, %s, %