In [2]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from statsmodels.tsa.arima.model import ARIMA
from mlxtend.frequent_patterns import apriori, association_rules

In [3]:
# Global visualization style
sns.set(style="whitegrid", palette="muted", color_codes=True)
plt.rcParams["figure.figsize"] = (10, 6)

### 1. Data Loading & Preprocessing

In [None]:
def load_data(data_directory: str = "."):
    """
    Load CSV files generated from the e-commerce data generator into Pandas DataFrames.
    Expected files:
      - ecommerce_transactions.csv
      - ecommerce_competitor_data.csv
      - ecommerce_market_trends.csv
      - ecommerce_marketing_campaigns.csv
      - ecommerce_customers.csv
      - ecommerce_products.csv
    """
    file_names = {
        'transactions': 'transactions.csv',
        'competitor_data': 'competitor_data.csv',
        'market_trends': 'market_trends.csv',
        'marketing_campaigns': 'marketing_campaigns.csv',
        'customers': 'customers.csv',
        'products': 'products.csv'
    }
    
    data = {}
    for key, filename in file_names.items():
        path = os.path.join(data_directory, filename)
        if os.path.exists(path):
            print(f"Loading {filename}...")
            # parse_dates is used for date columns when automatically inferred
            data[key] = pd.read_csv(path, low_memory=False, parse_dates=True)
        else:
            print(f"Warning: {filename} not found in {data_directory}.")
            data[key] = pd.DataFrame()
    
    return data

def exploratory_data_analysis(dfs: dict):
    """
    Perform Exploratory Data Analysis (EDA) on the transactions, customers, and products datasets.
    Outputs:
      - Summary statistics.
      - Missing values analysis.
      - Visualizations for distributions and relationships.
    """
    transactions = dfs.get('transactions')
    customers = dfs.get('customers')
    products = dfs.get('products')
    
    print("\n=== TRANSACTIONS DATA ===")
    print(transactions.info())
    print(transactions.describe(include='all'))

NameError: name 'merged_df' is not defined

NameError: name 'merged_df' is not defined