In [28]:
import sqlite3
import random
import json
import pandas as pd
import numpy as np

### Simulate Source 1: Customer.db

In [29]:
# Define sample first and last names by country
data = {
    "USA": {
        "first": ["James", "Emily", "Michael", "Sarah", "David", "Ashley", "Robert", "Jessica", "William", "Olivia"],
        "last": ["Smith", "Johnson", "Brown", "Williams", "Jones", "Miller", "Davis", "Garcia", "Taylor", "Anderson"]
    },
    "England": {
        "first": ["Oliver", "Amelia", "Harry", "Isla", "George", "Sophia", "Jack", "Charlotte", "Charlie", "Ella"],
        "last": ["Wilson", "Thompson", "Evans", "Roberts", "Walker", "White", "Lewis", "Hall", "Allen", "Young"]
    },
    "Sri Lanka": {
        "first": ["Kasun", "Nadeesha", "Sanduni", "Isuru", "Chathura", "Hansika", "Tharindu", "Kavindi", "Supun", "Rashmi"],
        "last": ["Perera", "Fernando", "Silva", "Wijesinghe", "Jayasinghe", "Ekanayake", "Rathnayake", "Gunasekara", "Wickramasinghe", "Bandara"]
    },
    "Australia": {
        "first": ["Liam", "Chloe", "Noah", "Mia", "Ethan", "Zoe", "Lucas", "Grace", "Jack", "Sophie"],
        "last": ["Smith", "Jones", "Williams", "Taylor", "Brown", "Wilson", "Johnson", "Martin", "Lee", "Walker"]
    },
    "South Africa": {
        "first": ["Thabo", "Naledi", "Sipho", "Lerato", "Kagiso", "Palesa", "Sibusiso", "Ayanda", "Karabo", "Boitumelo"],
        "last": ["Nkosi", "Naidoo", "Petersen", "Botha", "Mthembu", "Mokoena", "Van Wyk", "Jansen", "De Villiers", "Khumalo"]
    },
    "Germany": {
        "first": ["Lukas", "Mia", "Leon", "Hannah", "Jonas", "Lea", "Paul", "Emma", "Felix", "Sophie"],
        "last": ["Müller", "Schmidt", "Schneider", "Fischer", "Weber", "Meyer", "Wagner", "Becker", "Hoffmann", "Koch"]
    },
    "France": {
        "first": ["Lucas", "Emma", "Hugo", "Chloé", "Louis", "Camille", "Gabriel", "Manon", "Nathan", "Léa"],
        "last": ["Martin", "Bernard", "Dubois", "Thomas", "Robert", "Richard", "Petit", "Durand", "Leroy", "Moreau"]
    },
    "Italy": {
        "first": ["Luca", "Giulia", "Marco", "Francesca", "Matteo", "Chiara", "Alessandro", "Sara", "Davide", "Martina"],
        "last": ["Rossi", "Russo", "Ferrari", "Esposito", "Bianchi", "Romano", "Colombo", "Ricci", "Marino", "Greco"]
    }
}

In [30]:
customer_data = []
countries = list(data.keys())
for i in range(120):
    cust_id = 45000+i
    country = random.choice(countries)
    first_name = random.choice(data[country]["first"])
    last_name = random.choice(data[country]["last"])
    age = random.randint(18,40)
    customer_data.append({
        "CustId": cust_id,
        "FirstName": first_name,
        "LastName": last_name,
        "Age": age,
        "Country": country
    })

df = pd.DataFrame(customer_data)
df

Unnamed: 0,CustId,FirstName,LastName,Age,Country
0,45000,Sipho,Naidoo,20,South Africa
1,45001,Michael,Smith,37,USA
2,45002,Palesa,Petersen,40,South Africa
3,45003,David,Smith,20,USA
4,45004,Noah,Taylor,27,Australia
...,...,...,...,...,...
115,45115,Sophie,Jones,32,Australia
116,45116,Sipho,Naidoo,39,South Africa
117,45117,Amelia,Wilson,30,England
118,45118,Charlotte,Evans,32,England


In [31]:
# Create SQLite database
conn = sqlite3.connect('company_data.db')
cursor = conn.cursor()

# Create the "customers" table
cursor.execute('''
CREATE TABLE IF NOT EXISTS customers (
    customer_id INTEGER PRIMARY KEY,
    first_name TEXT,
    last_name TEXT,
    age INTEGER,
    country TEXT
)
''')
conn.commit()

In [32]:
# Save DataFrame into a new table
df.to_sql("Customers", conn, if_exists="replace", index=False)
conn.commit()

In [33]:
# Close connection
conn.close()

### Simulate Source 2: CSV File with Transactions

In [34]:

# Create random transactions for these customers
transaction_data = {
    'transaction_id': range(1, 301),
    'customer_id': np.random.randint(45000, 45121, 300),
    'product': np.random.choice(['Laptop', 'Phone', 'Headphones', 'Camera', 'Tablet'], 300),
    'amount': np.random.randint(50, 1200, 300),
    'date': pd.date_range('2025-01-01', periods=300, freq='D')
}

df_transactions = pd.DataFrame(transaction_data)
df_transactions.to_csv('transactions.csv', index=False)

print("CSV Transactions Sample:")
print(df_transactions.head(), "\n")

CSV Transactions Sample:
   transaction_id  customer_id     product  amount       date
0               1        45049      Tablet     672 2025-01-01
1               2        45091      Laptop     300 2025-01-02
2               3        45095  Headphones     429 2025-01-03
3               4        45062       Phone     930 2025-01-04
4               5        45014      Laptop     486 2025-01-05 



### Simulate Source 3: JSON File with Feedback

In [35]:

feedback_data = []
for cid in range(45000, 45121):
    feedback_data.append({
        "custID": cid,
        "rating": random.choice([1, 2, 3, 4, 5]),
        "feedback": random.choice([
            "Excellent service", "Good experience", "Average",
            "Delivery was late", "Product quality issue"
        ])
    })

with open('feedback.json', 'w') as f:
    json.dump(feedback_data, f, indent=4)