# Create Fake Customers Data

In [1]:
import pandas as pd
import numpy as np
from faker import Faker
from sqlalchemy import create_engine
#import sql_queries as sql
import configparser
import sys 
import os

In [2]:
script_dir = os.path.abspath('')
mymodule_dir = os.path.join( script_dir, '..', '..', 'database', 'table_setup' )
sys.path.append( mymodule_dir )

import sql_queries

In [3]:
# =============================================================================
# ### SQL CONNECTION
# =============================================================================
encoding = 'utf-16'

config_file_path = os.path.join( script_dir, '..', '..', 'database', 'connection' )
config_file_path += '/config.ini'
config = configparser.ConfigParser()
config.read(config_file_path, encoding = encoding)

HOST=config.get('jj_furniture','host')
USER=config.get('jj_furniture','user')   
PASSWORD=config.get('jj_furniture','password')
DATABASE=config.get('jj_furniture','database')

try:
    db_conn_string = 'postgres://{}:{}@{}/{}'.format(USER,PASSWORD,HOST,DATABASE)
    db_engine = create_engine(db_conn_string)

except Exception as e:
    print(e)
    
   

In [4]:
fake = Faker()

In [5]:
store_type = ['Store', 'Web']
num_customers = 1000

customers = [
    {'customer_id': x+1,
     'first_name': fake.first_name(),
     'last_name': fake.last_name(),
     'email_address': fake.email(),
     'dob': fake.date_between(start_date='-70y', end_date='-18y'),
     'gender': fake.profile()['sex'],
     'street_address': fake.street_address(),
     'state': fake.random_element(elements = ('NC', 'SC', 'TN', 'FL', 'GA', 'MS', 'AL')),
     'date_created': fake.date_between(start_date='-12y'),
     'create_source': np.random.choice(store_type, p = [.8, .2]) # can give probability of selection to Store Type
    }
    for x in range(num_customers)]


In [6]:
df_customers = pd.DataFrame(customers)

In [7]:
df_customers.head() 

Unnamed: 0,create_source,customer_id,date_created,dob,email_address,first_name,gender,last_name,state,street_address
0,Store,1,2012-11-07,1976-11-01,john56@example.net,Joseph,F,Stone,SC,6940 Turner Junctions Suite 849
1,Store,2,2017-07-26,1975-09-25,marcus06@example.com,Monica,F,Jacobson,NC,2323 Sean Wells Suite 840
2,Store,3,2020-11-25,1967-01-13,daniel86@example.net,Lauren,M,Smith,SC,88563 Pamela Parkway
3,Store,4,2021-12-12,1988-03-31,martinerin@example.com,Norman,F,Hodge,NC,3033 Sullivan Avenue
4,Store,5,2020-01-18,1994-08-18,bryanbarnes@example.com,Andrea,F,Hunter,NC,68768 Wanda Locks


In [8]:
print(sql_queries.insert_customer_table)


INSERT INTO customers (customer_id, first_name, last_name, email_address,
dob, gender, street_address, state, date_created, create_source)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)



In [9]:
### insert customer records
for i, row in df_customers.iterrows():
    if i % 100 == 0:
        print(i)
    db_engine.execute(sql_queries.insert_customer_table, 
                        (row['customer_id'],
                        row['first_name'],
                        row['last_name'],
                        row['email_address'],
                        row['dob'],
                        row['gender'],
                        row['street_address'],
                        row['state'],
                        row['date_created'],
                        row['create_source']))

0
100
200
300
400
500
600
700
800
900
