In [None]:
import pandas as pd
import numpy as np
from faker import Faker
import random

# Initialize Faker
fake = Faker()

# Define product categories and products
categories = {
    "Electronics": ["Laptop", "Smartphone", "Headphones", "Camera"],
    "Clothing": ["Shirt", "Jeans", "Jacket", "Sweater"],
    "Home": ["Vacuum Cleaner", "Blender", "Toaster", "Microwave"],
    "Books": ["Fiction", "Non-Fiction", "Comics", "Biography"],
    "Beauty": ["Lipstick", "Perfume", "Lotion", "Makeup Kit"]
}

# Generate dataset
data = []
for _ in range(1000):  # 1000 rows
    category = random.choice(list(categories.keys()))
    product = random.choice(categories[category])
    price = round(random.uniform(10, 1000), 2)
    quantity = random.choice([1, 2, 3, None])  # Simulate missing values
    discount = random.choice([None, round(random.uniform(0, 50), 2)])  # Simulate missing values
    total_amount = round(price * (quantity if quantity else 1), 2)
    purchase_date = random.choice([
        fake.date_this_year(), 
        fake.date_this_decade(),
        None  # Simulate missing dates
    ])
    data.append({
        "Transaction ID": fake.uuid4(),
        "Customer Name": fake.name(),
        "Product Name": product,
        "Category": category,
        "Price": price,
        "Quantity": quantity,
        "Total Amount": total_amount,
        "Discount": discount,
        "Purchase Date": purchase_date,
        "Customer Location": fake.city() + ", " + fake.state(),
        "Payment Method": random.choice(["Credit Card", "PayPal", "Cash"])
    })

# Create duplicates
df = pd.DataFrame(data)
df = pd.concat([df, df.sample(50, replace=True)])  # Add duplicate rows

# Write to CSV
df.to_csv("dataset/sales_data.csv", index=False)
df.head()

## CSV to SQL Data

In [None]:
import pandas as pd
from sqlalchemy import create_engine

def csv_to_sql(database_name,table_name,file_name):
    # Replace with your MySQL credentials
    # Example: 'mysql+mysqlconnector://username:password@localhost:3306/your_database'
    engine = create_engine(f'mysql+mysqlconnector://root:root@localhost:3306/{database_name}')

    # Read the CSV file into a pandas DataFrame
    df = pd.read_csv(file_name)

    # Write the DataFrame to MySQL (this will create the table if it doesn't exist)
    # 'if_exists' can be 'fail', 'replace', or 'append'. 'replace' drops and recreates the table.
    df.to_sql(table_name, engine, if_exists='replace', index=False)

csv_to_sql('demo_data','sales',"dataset/sales_data.csv")