In [8]:
import random
import pandas as pd
from faker import Faker
from google.cloud import storage
from datetime import datetime

In [9]:
# Initialize Faker for generating fake data
fake = Faker()

# List of possible product categories and products
categories = ['Electronics', 'Clothing', 'Home Appliances', 'Books', 'Sports']
products = {
    'Electronics': ['Phone', 'Laptop', 'Headphones', 'Smartwatch'],
    'Clothing': ['T-shirt', 'Jeans', 'Sweater', 'Jacket'],
    'Home Appliances': ['Refrigerator', 'Washing Machine', 'Microwave', 'Vacuum Cleaner'],
    'Books': ['Novel', 'Biography', 'Science Fiction', 'Fantasy'],
    'Sports': ['Football', 'Basketball', 'Tennis Racket', 'Yoga Mat']
}

# Payment methods
payment_methods = ['Credit Card', 'Debit Card', 'PayPal', 'Cash', 'Gift Card']


In [10]:
# Generate Retail Dataset
retail_data = []
for transaction_id in range(1, 101):
    customer_id = random.randint(1, 50)
    category = random.choice(categories)
    product = random.choice(products[category])
    quantity = random.randint(1, 5)
    price = random.uniform(5.99, 499.99)
    total = round(price * quantity, 2)
    date = fake.date_this_year()
    payment_method = random.choice(payment_methods)
    retail_data.append([transaction_id, customer_id, product, category, quantity, round(price, 2), total, date, payment_method])

# Create Retail DataFrame
retail_df = pd.DataFrame(retail_data, columns=["Transaction_ID", "Customer_ID", "Product", "Category", "Quantity", "Price", "Total", "Date", "Payment_Method"])

retail_df.head(5)

Unnamed: 0,Transaction_ID,Customer_ID,Product,Category,Quantity,Price,Total,Date,Payment_Method
0,1,2,Tennis Racket,Sports,4,440.48,1761.94,2024-05-07,Credit Card
1,2,35,Vacuum Cleaner,Home Appliances,5,194.6,972.98,2024-03-15,PayPal
2,3,13,Fantasy,Books,3,397.63,1192.9,2024-08-29,Gift Card
3,4,26,Microwave,Home Appliances,5,64.3,321.51,2024-04-05,Cash
4,5,49,Fantasy,Books,3,360.48,1081.43,2024-12-01,Credit Card


In [11]:
# Generate Customer Dataset
customer_data = []
for customer_id in range(1, 51):
    first_name = fake.first_name()
    last_name = fake.last_name()
    email = fake.email()
    phone = fake.phone_number()
    address = fake.address().replace("\n", ", ")
    state = fake.state()
    country = "USA"
    customer_data.append([customer_id, first_name, last_name, email, phone, address, state, country])

# Create Customer DataFrame
customer_df = pd.DataFrame(customer_data, columns=["Customer_ID", "First_Name", "Last_Name", "Email", "Phone", "Address", "State", "Country"])
customer_df.head(5)

Unnamed: 0,Customer_ID,First_Name,Last_Name,Email,Phone,Address,State,Country
0,1,Cindy,Hull,nharper@example.org,+1-423-731-0321x11054,"96443 Sean Neck, Lake Christopher, AK 48440",Alabama,USA
1,2,Lisa,Hart,joseph04@example.net,(693)673-0226x5211,"62918 Traci Cliff Suite 201, Wilsonview, PW 73290",Minnesota,USA
2,3,Meagan,Spence,larsenkristen@example.net,493.980.6667,"822 Henderson Estate, East Elizabethberg, MH 6...",Texas,USA
3,4,Christina,Davis,maryortiz@example.com,396-443-0570x9485,"5610 Kristen Port Suite 771, Danielside, VA 54962",Illinois,USA
4,5,Sheri,Ward,mzimmerman@example.com,(656)557-7724,"994 Nancy Burg Suite 597, Nelsonshire, UT 39659",Wyoming,USA


In [12]:
# Inner join on Customer_ID
merged_df = pd.merge(retail_df, customer_df, on='Customer_ID', how='inner')

# Show the first few rows of the merged dataset
merged_df.head()

Unnamed: 0,Transaction_ID,Customer_ID,Product,Category,Quantity,Price,Total,Date,Payment_Method,First_Name,Last_Name,Email,Phone,Address,State,Country
0,1,2,Tennis Racket,Sports,4,440.48,1761.94,2024-05-07,Credit Card,Lisa,Hart,joseph04@example.net,(693)673-0226x5211,"62918 Traci Cliff Suite 201, Wilsonview, PW 73290",Minnesota,USA
1,2,35,Vacuum Cleaner,Home Appliances,5,194.6,972.98,2024-03-15,PayPal,Donald,Cooper,jose69@example.com,001-842-487-7078x44298,"4225 Jason Dale, Port Patriciastad, AL 25059",Indiana,USA
2,3,13,Fantasy,Books,3,397.63,1192.9,2024-08-29,Gift Card,Caitlin,Copeland,gdavis@example.net,703-781-0352x816,"USNV Silva, FPO AA 53615",West Virginia,USA
3,4,26,Microwave,Home Appliances,5,64.3,321.51,2024-04-05,Cash,Thomas,Travis,brownnathan@example.com,001-765-501-7094x0728,"970 Jennifer Cape, Ryanshire, WY 67711",North Carolina,USA
4,5,49,Fantasy,Books,3,360.48,1081.43,2024-12-01,Credit Card,Jessica,Strickland,angela42@example.org,(360)432-8709x85375,"532 Natasha Ramp, Erictown, WY 98877",Hawaii,USA
