In [2]:
import datajoint as dj

schema = dj.Schema('app')

[2024-09-08 01:40:45,905][INFO]: Connecting root@localhost:3306
[2024-09-08 01:40:45,953][INFO]: Connected root@localhost:3306


In [3]:
@schema
class Account(dj.Manual):
    definition = """
    phone : bigint unsigned 
    ---
    first_name : varchar(30)
    last_name : varchar(30)
    dob=null : date
    sex='' : enum('F', 'M', '')
    """

In [4]:
@schema
class CreditCard(dj.Manual):
    definition = """
    card_number :  bigint unsigned 
    ---
    exp_date : date 
    cvv      : smallint unsigned
    zipcode  : int unsigned       
    -> Account
    """

In [5]:
@schema
class AddOn(dj.Lookup):
    definition = """
    addon_id : int
    ---
    addon_name : varchar(30)
    price : decimal(5, 2) unsigned
    """
    contents = ((1, "Track & Field", 13.99), (2, "Marathon", 26.2), (3, "Sprint", 100.00))

In [6]:
AddOn()

addon_id,addon_name,price
1,Track & Field,13.99
2,Marathon,26.2
3,Sprint,100.0


In [7]:
@schema
class Purchase(dj.Manual):
    definition = """
    -> Account
    -> AddOn
    ---
    -> CreditCard
    purchase_date : date 
    """

In [8]:
Purchase()

phone,addon_id,card_number,purchase_date
,,,


In [None]:
# dj.Diagram(schema) is not working, produces a FileNotFound error

In [13]:
import random
from tqdm import tqdm
from faker import Faker
fake = Faker()

In [14]:
# insert one account
Account.insert1(dict(
    phone=fake.random_int(1_000_000_0000, 9_999_999_9999),
    first_name=fake.first_name_male(),
    last_name=fake.last_name(),
    sex="M",
    dob=fake.date_of_birth()))

In [15]:
# insert many male accounts
Account.insert(dict(
    phone=fake.random_int(1_000_000_0000, 9_999_999_9999),
    first_name=fake.first_name_male(),
    last_name=fake.last_name(),
    sex='M',
    dob=fake.date_of_birth()) for _ in range(5000))


In [16]:
# insert many female accounts
Account.insert(dict(
    phone=fake.random_int(1_000_000_0000, 9_999_999_9999),
    first_name=fake.first_name_female(),
    last_name=fake.last_name(),
    sex='F',
    dob=fake.date_of_birth()) for _ in range(5000))

# insert some accounts with no sex and no birthdate
Account.insert(dict(
    phone=fake.random_int(1_000_000_0000, 9_999_999_9999),
    first_name=fake.first_name(),
    last_name=fake.last_name()) for _ in range(500))

In [17]:
Account()

phone,first_name,last_name,dob,sex
10007851915,John,Williams,2024-02-20,M
10026658401,Heather,Campbell,1918-10-11,F
10036285947,Kathleen,Jackson,1910-05-06,F
10039254519,Anthony,Allen,2015-03-23,M
10051296134,Adam,Foley,1970-03-18,M
10052458966,Alison,Park,1982-09-20,F
10062775205,Chad,Wilson,2007-06-14,M
10069191984,Kyle,Serrano,1950-02-25,M
10080048678,Robert,Jenkins,1910-05-31,M
10095315699,Brenda,Simon,1976-06-03,F


In [18]:
# get account ids:
keys = Account.fetch("KEY")

In [19]:
# insert one credit card
CreditCard.insert1(
    dict(random.choice(keys), 
         zipcode=random.randint(10000,99999), 
         card_number=int(fake.credit_card_number()),
         cvv=random.randint(1, 999), 
         exp_date=fake.future_date()))

In [20]:
# insert many credit cards
CreditCard.insert(
    dict(random.choice(keys), 
         zipcode=random.randint(10000,99999), 
         card_number=int(fake.credit_card_number()),
         cvv=random.randint(1, 999), 
         exp_date=fake.future_date()) for _ in range(15000))

In [21]:
# get all possible valid purchases, eliminate duplicate purchases that are under different cards
purchases = (Account * CreditCard * AddOn - Purchase.proj()).fetch("KEY", order_by=('phone', 'addon_id'))
unique_purchases = [purchases.pop()]
for purchase in purchases:
    if (purchase['phone'], purchase['addon_id']) != (unique_purchases[-1]['phone'], unique_purchases[-1]['addon_id']):
        unique_purchases.append(dict(purchase, purchase_date=fake.past_date()))

# insert a random subset 
Purchase.insert(random.sample(unique_purchases, 5000))

In [22]:
Purchase()

phone,addon_id,card_number,purchase_date
10062775205,1,38393794132550,2024-09-05
10069191984,2,3538422437370902,2024-08-27
10080048678,1,343630738497312,2024-09-04
10107822104,2,30025798019627,2024-08-26
10123330121,3,379573308396331,2024-08-09
10130185085,1,4592143147579,2024-08-21
10140722444,1,4360853925068676,2024-08-17
10140722444,3,4360853925068676,2024-08-21
10149241063,1,3554058618176897,2024-08-31
10155883476,2,4960178160785,2024-08-31
