# Create Fake Customers Data

In [1]:
import pandas as pd
import numpy as np
from faker import Faker
from sqlalchemy import create_engine
#import sql_queries as sql
import configparser
import sys 
import os

In [2]:
script_dir = os.path.abspath('')
mymodule_dir = os.path.join( script_dir, '..', '..', 'database', 'table_setup' )
sys.path.append( mymodule_dir )

import sql_queries

In [4]:
# =============================================================================
# ### SQL CONNECTION
# =============================================================================
encoding = 'utf-16'

config_file_path = os.path.join( script_dir, '..', '..', 'database', 'connection' )
config_file_path += '/config.ini'
config = configparser.ConfigParser()
config.read(config_file_path, encoding = encoding)

HOST=config.get('jj_furniture','host')
USER=config.get('jj_furniture','user')   
PASSWORD=config.get('jj_furniture','password')
DATABASE=config.get('jj_furniture','database')

try:
    db_conn_string = 'postgres://{}:{}@{}/{}'.format(USER,PASSWORD,HOST,DATABASE)
    db_engine = create_engine(db_conn_string)

except Exception as e:
    print(e)
    
   

In [5]:
fake = Faker()

In [6]:
store_type = ['Store', 'Web']

customers = [
    {'customer_id': x+1,
     'first_name': fake.first_name(),
     'last_name': fake.last_name(),
     'email_address': fake.email(),
     'dob': fake.date_between(start_date='-70y', end_date='-18y'),
     'gender': fake.profile()['sex'],
     'street_address': fake.street_address(),
     'state': fake.random_element(elements = ('NC', 'SC', 'TN', 'FL', 'GA', 'MS', 'AL')),
     'date_created': fake.date_between(start_date='-12y'),
     'create_source': np.random.choice(store_type, p = [.8, .2]) # can give probability of selection to Store Type
    }
    for x in range(1000)]


In [7]:
df_customers = pd.DataFrame(customers)

In [8]:
df_customers.head() 

Unnamed: 0,create_source,customer_id,date_created,dob,email_address,first_name,gender,last_name,state,street_address
0,Store,1,2011-06-06,1954-11-08,mccoychristopher@example.com,Lori,F,Lee,FL,7665 Walker Expressway Apt. 666
1,Store,2,2019-03-10,1971-06-07,brownbrian@example.net,Alyssa,M,Schmidt,FL,9902 Keith Mountain
2,Store,3,2018-05-06,1973-10-11,hughesjonathan@example.com,Noah,F,Glenn,FL,1482 Melinda Views
3,Web,4,2020-11-28,2000-06-18,williamsallen@example.com,Daniel,F,Carrillo,SC,120 Jack Avenue Apt. 028
4,Store,5,2018-09-04,1959-02-11,carolpowell@example.net,Shannon,M,Jackson,TN,96254 James Glens


In [9]:
print(sql_queries.insert_customer_table)


INSERT INTO customers (customer_id, first_name, last_name, email_address, dob, gender, street_address, state, date_created, create_source)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)



In [10]:
### insert customer records
for i, row in df_customers.iterrows():
    if i % 100 == 0:
        print(i)
    db_engine.execute(sql_queries.insert_customer_table, 
                        (row['customer_id'],
                        row['first_name'],
                        row['last_name'],
                        row['email_address'],
                        row['dob'],
                        row['gender'],
                        row['street_address'],
                        row['state'],
                        row['date_created'],
                        row['create_source']))

0
100
200
300
400
500
600
700
800
900
