In [1]:
# Importing libraries

import sqlite3
import pandas as pd
import numpy as np
import json
import random


In [3]:
conn.close

<function Connection.close()>

In [None]:

# -----------------------------------------------------------
# 1. Simulate Source 1: SQLite Customer Database
# -----------------------------------------------------------


# Insert random customers
names = ['Alice', 'Bob', 'Charlie', 'Diana', 'Ethan', 'Fiona', 'George', 'Hannah', 'Ivan', 'Julia']
countries = ['USA', 'UK', 'Canada', 'Germany', 'France', 'Australia']

customers_data = [(i+1,
                   random.choice(names) + str(i+1),
                   random.randint(18, 65),
                   random.choice(countries)) for i in range(120)]

cursor.executemany("INSERT INTO customers VALUES (?, ?, ?, ?)", customers_data)
conn.commit()

# Load customers into DataFrame
df_customers = pd.read_sql_query("SELECT * FROM customers", conn)
print("SQLite Customers Table:")
print(df_customers.head(), "\n")

# -----------------------------------------------------------
# 2. Simulate Source 2: CSV File with Transactions
# -----------------------------------------------------------

# Create random transactions for these customers
transaction_data = {
    'transaction_id': range(1, 301),
    'customer_id': np.random.randint(1, 121, 300),
    'product': np.random.choice(['Laptop', 'Phone', 'Headphones', 'Camera', 'Tablet'], 300),
    'amount': np.random.randint(50, 1200, 300),
    'date': pd.date_range('2025-01-01', periods=300, freq='D')
}

df_transactions = pd.DataFrame(transaction_data)
df_transactions.to_csv('transactions.csv', index=False)

print("CSV Transactions Sample:")
print(df_transactions.head(), "\n")

# -----------------------------------------------------------
# 3. Simulate Source 3: JSON File with Feedback
# -----------------------------------------------------------

feedback_data = []
for cid in range(1, 121):
    feedback_data.append({
        "custID": cid,
        "rating": random.choice([1, 2, 3, 4, 5]),
        "feedback": random.choice([
            "Excellent service", "Good experience", "Average",
            "Delivery was late", "Product quality issue"
        ])
    })

with open('feedback.json', 'w') as f:
    json.dump(feedback_data, f, indent=4)

# Read JSON file into DataFrame
with open('feedback.json') as f:
    data = json.load(f)
df_feedback = pd.DataFrame(data)

print("JSON Feedback Sample:")
print(df_feedback.head(), "\n")

# -----------------------------------------------------------
# 4. Integration Process (ETL)
# -----------------------------------------------------------

# Step 1: Extract (already loaded)
# Step 2: Transform (clean and align column names)

df_feedback.rename(columns={'custID': 'customer_id'}, inplace=True)

# Step 3: Merge all sources
df_merged = (
    df_customers
    .merge(df_transactions, on='customer_id', how='left')
    .merge(df_feedback, on='customer_id', how='left')
)

# Step 4: Clean and handle missing values
df_merged['feedback'] = df_merged['feedback'].fillna('No feedback')
df_merged['rating'] = df_merged['rating'].fillna(0).astype(int)

# -----------------------------------------------------------
# 5. Load Unified Data into SQLite
# -----------------------------------------------------------

df_merged.to_sql('unified_data', conn, if_exists='replace', index=False)

print("Unified Data (first few rows):")
print(df_merged.head(), "\n")

# -----------------------------------------------------------
# 6. Example Queries for Insights
# -----------------------------------------------------------

query1 = """
SELECT country, COUNT(DISTINCT customer_id) as num_customers,
       ROUND(AVG(amount),2) as avg_spent
FROM unified_data
GROUP BY country
ORDER BY avg_spent DESC
"""
print("Average Spending by Country:")
print(pd.read_sql_query(query1, conn), "\n")

query2 = """
SELECT rating, COUNT(*) as num_feedbacks,
       ROUND(AVG(amount),2) as avg_amount
FROM unified_data
GROUP BY rating
ORDER BY rating DESC
"""
print("Customer Ratings vs Average Transaction Amount:")
print(pd.read_sql_query(query2, conn), "\n")

# -----------------------------------------------------------
# 7. Optional: Export Integrated Dataset
# -----------------------------------------------------------

df_merged.to_csv('unified_dataset.csv', index=False)
print("Integrated dataset exported to unified_dataset.csv")

# -----------------------------------------------------------
# Close the connection
# -----------------------------------------------------------
conn.close()
