# Import and Initialize

In [106]:
from datetime import datetime
import random

from faker import Faker
import pandas as pd

fake = Faker()

# GLOBAL PARAMETERS
DATA_LENGTH = 10000
start_date = datetime(2018, 11, 27)
end_date = datetime(2021, 12, 27)
cust_qty = 300
cities_qty = 15

# Make Dates

In [107]:
dates = [fake.date_time_between_dates(start_date, end_date) for i in range(DATA_LENGTH)]

# Make Locations

In [108]:
province_list = [
    'Metropolis', 'Gotham', 'Atlantis', 'Nevandaar', 'Bree', 'Wales', 'Mordor'
]
city_dict = {
    fake.city():
    (province_list[random.randint(0,
                                  len(province_list) - 1)], fake.postcode())
    for i in range(cities_qty)
}
cities = [list(city_dict.keys())[random.randint(0, cities_qty) - 1] for i in range(DATA_LENGTH)]

# Make Customers

In [109]:
customer_dict = {i: (fake.first_name(), fake.last_name()) for i in range(cust_qty)}
customer_list = [list(customer_dict.keys())[random.randint(0, cust_qty) - 1] for i in range(DATA_LENGTH)]

# Make Referring Sites

In [110]:
ref_sites = [
    'https://www.facebook.com',
    'https://www.google.com',
    'https://www.instagram.com',
    'https://www.baidu.com',
    'https://www.merch.com',
    'https://www.jewels.com',
    'https://www.whatever.com',
]
ref_sites_list = [ref_sites[random.randint(0, len(ref_sites)) - 1] for i in range(DATA_LENGTH)]

# Make Order Prices

In [111]:
log_normal = [round(random.lognormvariate(3, 1.5)) for i in range(DATA_LENGTH)]

# Initialize DataFrame

In [112]:
data_dict = {
    'created_at': dates,
    'city': cities,
    'id': customer_list,
    'referring_site': ref_sites_list,
    'current_total_price': log_normal
}

df = (
    pd.DataFrame(data_dict)
    .assign(
        province=lambda x: x.city.apply(lambda y: city_dict[y][0]),
        zip_code=lambda x: x.city.apply(lambda y: city_dict[y][1]),
        first_name=lambda x: x.id.apply(lambda y: customer_dict[y][0]),
        last_name=lambda x: x.id.apply(lambda y: customer_dict[y][1]),
        name=lambda x: x.first_name + " " + x.last_name
    )
    .rename(
        columns={"zip_code": "zip"}
    )
    .assign(
        billing_address=lambda x: x[["city", "province", "zip", "name"]].to_dict('records'),
        customer=lambda x: x[["id", "first_name", "last_name"]].to_dict('records')
    )
    .reindex(columns=[
        'created_at',
        'billing_address',
        'city',
        'province',
        'zip',
        'customer',
        'id',
        'first_name',
        'last_name',
        'name',
        'referring_site',
        'current_total_price'
    ])
)
df

Unnamed: 0,created_at,billing_address,city,province,zip,customer,id,first_name,last_name,name,referring_site,current_total_price
0,2021-02-12 21:06:21,"{'city': 'South Kylemouth', 'province': 'Nevan...",South Kylemouth,Nevandaar,25179,"{'id': 69, 'first_name': 'Christina', 'last_na...",69,Christina,Payne,Christina Payne,https://www.whatever.com,60
1,2019-11-14 13:43:27,"{'city': 'Chadton', 'province': 'Gotham', 'zip...",Chadton,Gotham,21477,"{'id': 238, 'first_name': 'Lisa', 'last_name':...",238,Lisa,Nunez,Lisa Nunez,https://www.jewels.com,7
2,2019-01-09 08:06:04,"{'city': 'Julieborough', 'province': 'Metropol...",Julieborough,Metropolis,25212,"{'id': 156, 'first_name': 'Judy', 'last_name':...",156,Judy,Willis,Judy Willis,https://www.whatever.com,88
3,2020-07-12 22:47:09,"{'city': 'Chadton', 'province': 'Gotham', 'zip...",Chadton,Gotham,21477,"{'id': 190, 'first_name': 'William', 'last_nam...",190,William,Porter,William Porter,https://www.facebook.com,37
4,2021-04-01 18:13:08,"{'city': 'South Kylemouth', 'province': 'Nevan...",South Kylemouth,Nevandaar,25179,"{'id': 181, 'first_name': 'Kim', 'last_name': ...",181,Kim,Smith,Kim Smith,https://www.merch.com,12
...,...,...,...,...,...,...,...,...,...,...,...,...
9995,2019-12-11 07:48:34,"{'city': 'East Maryville', 'province': 'Nevand...",East Maryville,Nevandaar,30237,"{'id': 157, 'first_name': 'Michael', 'last_nam...",157,Michael,Weaver,Michael Weaver,https://www.instagram.com,47
9996,2020-01-26 10:24:40,"{'city': 'Lake Karenstad', 'province': 'Mordor...",Lake Karenstad,Mordor,37642,"{'id': 69, 'first_name': 'Christina', 'last_na...",69,Christina,Payne,Christina Payne,https://www.facebook.com,14
9997,2021-03-20 23:47:01,"{'city': 'South Christophermouth', 'province':...",South Christophermouth,Metropolis,73670,"{'id': 132, 'first_name': 'Colin', 'last_name'...",132,Colin,Kim,Colin Kim,https://www.whatever.com,7
9998,2020-08-25 07:47:32,"{'city': 'Mannfort', 'province': 'Bree', 'zip'...",Mannfort,Bree,45583,"{'id': 241, 'first_name': 'Abigail', 'last_nam...",241,Abigail,Montgomery,Abigail Montgomery,https://www.whatever.com,12


In [113]:
df.to_csv('fake_data.csv')

In [114]:
df.billing_address
# df.billing_address.apply(lambda x: x['city'])
df.billing_address.iloc[0]

{'city': 'South Kylemouth',
 'province': 'Nevandaar',
 'zip': '25179',
 'name': 'Christina Payne'}