In [17]:
import faker
import pandas as pd
import faker_commerce
import random
from datetime import date, timedelta
fake = faker.Faker()
fake.add_provider(faker_commerce.Provider)

## Create fake companies

In [22]:
## Utilities

In [5]:
def get_unique_fakes(provider, num_records):
    # To ensure we get enough records we will initially generate 2X the number needed 
    records = [provider() for x in range(num_records * 2)]
    unique_records = list(set(records))
    if len(unique_records) < num_records:
        raise Exception('Not enough unique records. Try generating fewer records.')
    return unique_records[0:num_records]


## Create fake companies

In [21]:
num_companies = 10_000
company_name_provider = lambda : fake.company() + ' ' + fake.company_suffix()  # Adding suffix give more uniqe names
company_names = get_unique_fakes(company_name_provider, num_companies)
company_slogans = get_unique_fakes(fake.catch_phrase, num_companies)
company_purposes = get_unique_fakes(fake.bs, num_companies)

companies = zip(company_names, company_slogans, company_purposes)
company_records = [{'name': company[0], 'slogan': company[1], 'purpose': company[2]} for company in companies]
companies_df = pd.DataFrame(company_records)
companies_df.index.rename('id', inplace=True)
companies_df.to_csv('../seeds/sources/fake_companies.csv')


## Create fake people

In [23]:
fake.zipcode_in_state()

'90852'

In [17]:
companies_df

Unnamed: 0_level_0,name,slogan,purpose
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,"Lopez, Lynn and Davis Inc",Up-sized actuating circuit,morph open-source initiatives
1,Norris-Cook Inc,Adaptive optimal policy,disintermediate B2C portals
2,Pena-Montgomery Group,Extended bandwidth-monitored open system,benchmark B2B architectures
3,Kent-Humphrey LLC,Reduced optimal capacity,unleash vertical ROI
4,Parker-Anderson Group,Up-sized neutral utilization,morph strategic models
...,...,...,...
9995,Allison Ltd PLC,Universal demand-driven encryption,re-intermediate magnetic e-business
9996,Allen Inc and Sons,Managed web-enabled extranet,iterate rich markets
9997,Hays LLC LLC,Re-contextualized bi-directional budgetary man...,drive revolutionary partnerships
9998,Li PLC Ltd,Digitized homogeneous Local Area Network,scale end-to-end mindshare


## Create dates 

In [16]:
start_date = date(2020, 1, 1)
end_date = date(2029, 12, 31)
num_added_days = (end_date - start_date).days + 1
dates = [start_date + timedelta(days=day) for day in range(num_added_days)]
dates_df = pd.DataFrame({'date': dates})
dates_df.to_csv('../seeds/sources/fake_dates.csv', index=False)

## Create number range

In [24]:
numbers = []
for number in range(1, 101):
    numbers.extend([number] * number)
numbers_df = pd.DataFrame({'number': numbers})
numbers_df.to_csv('../seeds/sources/fake_numbers.csv', index=False)

## Create fake products

In [3]:
fake.ecommerce_name()

'For repair Car'

In [4]:
fake.ecommerce_category()

'Books'

In [15]:
fake.ecommerce_price()

36653069

In [111]:
random.randrange(3, 2500) + random.randrange(0, 99) / 100

2040.17

In [117]:
def generate_product():
    return {
        'category': fake.ecommerce_category(),
        'name': fake.ecommerce_name(),
        'price': random.randrange(3, 2500) + random.randrange(0, 99) / 100
    }
            
products = [generate_product() for i in range(10_000)]
products_df = pd.DataFrame(products)
products_df.index.rename('id', inplace=True)
products_df.to_csv('../seeds/sources/fake_products.csv', index=True)


Unnamed: 0_level_0,category,name,price
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,Computers,Chicken,118.39
1,Games,Bike,1140.05
2,Tools,Chips,1178.45
3,Automotive,Sleek Cheese,553.00
4,Home,Gently Used Metal Car,1569.84
...,...,...,...
9995,Outdoors,Car,2467.67
9996,Movies,Chicken,1651.63
9997,Music,Bacon,1351.71
9998,Garden,Gorgeous Concrete Fish,645.49
