In [47]:
import json
import pandas as pd
from faker import Faker
from mimesis import Person, Address, Finance
from mimesis.enums import Gender
from uuid import uuid4
import random
from datetime import datetime, date, timedelta

class MockDataGenerator:
    def __init__(self, schema):
        self.schema = schema
        self.fake = Faker(['en_IN'])  # Using Indian locale for addresses
        self.generated_keys = {}
        self.generated_data = {}
        self.seasonal_dates = self._generate_seasonal_dates()
        
        # Pre-defined meaningful data
        self.parent_categories = ["Electronics", "Furniture", "Clothing", "Books", "Home Appliances", "Kitchenware", "Toys"]
        
        self.subcategories = {
            "Electronics": ["Smartphones", "Laptops", "Tablets", "Headphones", "Cameras", "Smart Watches"],
            "Furniture": ["Chairs", "Desks", "Tables", "Sofas", "Beds", "Wardrobes"],
            "Clothing": ["Shirts", "Pants", "Dresses", "Jackets", "Sportswear", "Accessories"],
            "Books": ["Fiction", "Non-Fiction", "Academic", "Children's", "Comics", "Self-Help"],
            "Home Appliances": ["Refrigerators", "Washing Machines", "Microwaves", "Air Conditioners", "Vacuum Cleaners"],
            "Kitchenware": ["Cookware", "Utensils", "Storage", "Small Appliances", "Dining"],
            "Toys": ["Educational Toys", "Board Games", "Action Figures", "Building Sets", "Outdoor Toys"]
        }
        
        self.brands = {
            "Electronics": {
                "Smartphones": ["Apple", "Samsung", "OnePlus", "Xiaomi", "Vivo", "Oppo"],
                "Laptops": ["Dell", "HP", "Lenovo", "Apple", "Asus"],
                "Tablets": ["Apple", "Samsung", "Lenovo"],
                "Headphones": ["Sony", "JBL", "Bose", "Apple"],
                "Cameras": ["Canon", "Nikon", "Sony"],
                "Smart Watches": ["Apple", "Samsung", "Fitbit", "Garmin"]
            },
            "Furniture": {
                "Chairs": ["IKEA", "Godrej", "Nilkamal", "Featherlite"],
                "Desks": ["IKEA", "Godrej", "Wipro Furniture"],
                "Tables": ["IKEA", "Godrej", "Nilkamal"],
                "Sofas": ["IKEA", "Godrej", "Urban Ladder"],
                "Beds": ["IKEA", "Godrej", "Sleepwell"],
                "Wardrobes": ["IKEA", "Godrej", "Nilkamal"]
            },
            "Clothing": {
                "Shirts": ["Levi's", "H&M", "Zara", "UCB"],
                "Pants": ["Levi's", "H&M", "Zara", "UCB"],
                "Dresses": ["H&M", "Zara", "UCB", "AND"],
                "Jackets": ["Levi's", "H&M", "Zara", "UCB"],
                "Sportswear": ["Nike", "Adidas", "Puma"],
                "Accessories": ["Fossil", "Hidesign", "Baggit"]
            }
        }

        self.loyalty_programs = [
            "Premium Plus", "Gold Rewards", "Silver Select", "Bronze Basic",
            "Diamond Elite", "Platinum Perks", "VIP Circle", "Exclusive Club",
            "Preferred Member", "Priority Pass"
        ]
        
        self.campaign_names = [
            "Back to School", "Summer Splash", "Winter Wonderland", "Spring Revival",
            "Diwali Dhamaka", "Christmas Special", "New Year New You", "Valentine's Day",
            "Anniversary Sale", "Monsoon Madness", "Independence Day Special", "Black Friday"
        ]

    def _generate_seasonal_dates(self):
        dates = []
        start_date = date(2023, 1, 1)
        end_date = date(2024, 12, 31)
        current_date = start_date
        while current_date <= end_date:
            dates.append(current_date.strftime('%Y-%m-%d'))
            current_date += timedelta(days=1)
        return dates

    def _generate_date_range(self, min_days=1, max_days=90):
        start_date = random.choice(self.seasonal_dates)
        start_dt = datetime.strptime(start_date, '%Y-%m-%d')
        duration = random.randint(min_days, max_days)
        end_dt = start_dt + timedelta(days=duration)
        return start_dt.strftime('%Y-%m-%d'), end_dt.strftime('%Y-%m-%d')

    def _generate_product_name(self, category, subcategory, brand):
        model_numbers = ['Pro', 'Plus', 'Max', 'Ultra', 'Lite', 'Basic', 'Premium']
        sizes = ['13"', '15"', '17"', 'XS', 'S', 'M', 'L', 'XL']
        
        if category == "Electronics":
            if subcategory == "Smartphones":
                return f"{brand} {random.choice(['Galaxy', 'iPhone', 'Nord'])} {random.choice(model_numbers)}"
            elif subcategory == "Laptops":
                return f"{brand} {random.choice(['ThinkPad', 'Inspiron', 'Pavilion'])} {random.choice(sizes)}"
        elif category == "Furniture":
            return f"{brand} {subcategory} {random.choice(['Classic', 'Modern', 'Executive', 'Comfort'])}"
        elif category == "Clothing":
            return f"{brand} {subcategory} {random.choice(['Casual', 'Formal', 'Party', 'Sports'])}"
        
        return f"{brand} {subcategory}"

    def _generate_indian_address(self):
        street_numbers = list(range(1, 1000))
        street_types = ['Street', 'Road', 'Lane', 'Nagar', 'Colony', 'Layout']
        areas = ['Bandra', 'Andheri', 'Powai', 'Malad', 'Juhu', 'Worli']
        landmarks = ['Near Railway Station', 'Near Bus Stop', 'Near Market', 'Near Park', 'Near Temple', 'Near School']
        
        address = (
            f"{random.choice(street_numbers)}, "
            f"{random.choice(areas)} {random.choice(street_types)}, "
            f"{random.choice(landmarks)}"
        )
        return address

    def _generate_forecast_period(self):
        year = random.choice([2023, 2024])
        quarter = f"Q{random.randint(1, 4)}"
        return f"{quarter} {year}"

    def _generate_session_times(self):
        base_date = random.choice(self.seasonal_dates)
        base_time = datetime.strptime(f"{base_date} {random.randint(0, 23):02d}:{random.randint(0, 59):02d}:00", 
                                    '%Y-%m-%d %H:%M:%S')
        duration = timedelta(minutes=random.randint(1, 180))
        end_time = base_time + duration
        return base_time.strftime('%Y-%m-%d %H:%M:%S'), end_time.strftime('%Y-%m-%d %H:%M:%S')

    def _generate_loyalty_points(self):
        earned = random.uniform(100, 1000)
        redeemed = random.uniform(0, earned * 0.8)  # Ensure redeemed is less than earned
        balance = earned - redeemed
        return round(earned, 2), round(redeemed, 2), round(balance, 2)

    def _generate_supplier_contact(self):
        domains = ['gmail.com', 'yahoo.com', 'hotmail.com', 'outlook.com']
        company = self.fake.company().lower().replace(' ', '')
        return f"contact@{company}.{random.choice(domains)}"

    def _generate_column_data(self, table_name, col_name, col_type, row_index, total_rows):
        # Handle ID columns
        if col_name.endswith('_ID'):
            return self._handle_id_column(table_name, col_name, row_index, total_rows)

        # Handle specific columns and date ranges
        if table_name == "WEBSITE_TRAFFIC" and col_name in ["Session_Start_Time", "Session_End_Time"]:
            if col_name == "Session_Start_Time":
                start_time, end_time = self._generate_session_times()
                self.temp_session_end = end_time
                return start_time
            else:
                return self.temp_session_end

        if table_name == "PROMOTION_PERFORMANCE" and col_name in ["Start_Date", "End_Date"]:
            if col_name == "Start_Date":
                start_date, end_date = self._generate_date_range()
                self.temp_end_date = end_date
                return start_date
            else:
                return self.temp_end_date

        # Handle other specific columns
        if table_name == "suppliers" and col_name == "Contact_Info":
            return self._generate_supplier_contact()

        if table_name == "customers" and col_name == "Address":
            return self._generate_indian_address()

        if table_name == "LOYALTY_TRANSACTIONS":
            if col_name == "Loyalty_Points_Earned":
                earned, redeemed, balance = self._generate_loyalty_points()
                self.temp_earned = earned
                self.temp_redeemed = redeemed
                self.temp_balance = balance
                return earned
            elif col_name == "Loyalty_Points_Redeemed":
                return self.temp_redeemed
            elif col_name == "Loyalty_Balance":
                return self.temp_balance

        if table_name == "HISTORICAL_FORECASTS" and col_name == "Forecast_Period":
            return self._generate_forecast_period()

        if table_name == "products":
            if col_name == "Product_Name":
                category = random.choice(self.parent_categories)
                subcategory = random.choice(self.subcategories[category])
                if category in self.brands and subcategory in self.brands[category]:
                    brand = random.choice(self.brands[category][subcategory])
                    self.temp_category = category
                    self.temp_subcategory = subcategory
                    self.temp_brand = brand
                    return self._generate_product_name(category, subcategory, brand)
            elif col_name == "Subcategory":
                return self.temp_subcategory if hasattr(self, 'temp_subcategory') else random.choice(self.subcategories[random.choice(self.parent_categories)])
            elif col_name == "Brand":
                return self.temp_brand if hasattr(self, 'temp_brand') else "Generic Brand"

        # Handle basic data types
        return self._generate_basic_data(col_type, col_name)

    def _handle_id_column(self, table_name, col_name, row_index, total_rows):
        if col_name == f"{table_name.rstrip('s').title()}_ID":
            if col_name not in self.generated_keys:
                self.generated_keys[col_name] = [str(uuid4()) for _ in range(total_rows)]
            return self.generated_keys[col_name][row_index]
        
        referenced_table = col_name.replace('_ID', '').lower() + 's'
        if referenced_table in self.generated_data:
            referenced_ids = self.generated_keys.get(col_name)
            if not referenced_ids:
                referenced_ids = self.generated_data[referenced_table][col_name].unique().tolist()
            return random.choice(referenced_ids)
        
        if col_name not in self.generated_keys:
            self.generated_keys[col_name] = [str(uuid4()) for _ in range(total_rows)]
        return random.choice(self.generated_keys[col_name])

    def _generate_basic_data(self, col_type, col_name):
        if col_type == "date":
            return random.choice(self.seasonal_dates)
        elif col_type == "datetime":
            base_date = random.choice(self.seasonal_dates)
            hour = random.randint(0, 23)
            minute = random.randint(0, 59)
            return f"{base_date} {hour:02d}:{minute:02d}:00"
        elif col_type == "float":
            if "price" in col_name.lower() or "amount" in col_name.lower():
                return round(random.uniform(10.0, 1000.0), 2)
            elif "rate" in col_name.lower():
                return round(random.uniform(0.0, 100.0), 2)
            return round(random.uniform(0.0, 1000.0), 2)
        elif col_type == "int":
            if "quantity" in col_name.lower():
                return random.randint(1, 50)
            return random.randint(1, 100)
        elif col_type == "string":
            if "name" in col_name.lower() and "product" not in col_name.lower():
                return Person('en').full_name()
            elif "email" in col_name.lower():
                return Person('en').email()
            return self.fake.word()
        elif "enum" in col_type:
            options = col_type.split(":")[1].split(",")
            return random.choice(options)
        return None

    def generate_data(self):
        # Generate categories first
        if "categories" in self.schema["tables"]:
            print("Generating dimension table: categories")
            table_details = self.schema["tables"]["categories"]
            self._generate_table_data("categories", table_details["columns"], table_details["rows"])

        # Then generate other dimension tables
        dimension_tables = [
            "customers", "products", "stores", "marketing_channels", "campaigns",
            "channel_types", "loyalty_programs", "device_types", "suppliers",
            "payment_methods", "promotion_types", "time_dimension"
        ]
        
        for table_name in dimension_tables:
            if table_name in self.schema["tables"]:
                print(f"Generating dimension table: {table_name}")
                table_details = self.schema["tables"][table_name]
                self._generate_table_data(table_name, table_details["columns"], table_details["rows"])
        
        # Finally generate fact tables
        fact_tables = [
            "SALES_TRANSACTIONS", "CUSTOMER_ENGAGEMENT", "LOYALTY_TRANSACTIONS",
            "INVENTORY_TRACKING", "WEBSITE_TRAFFIC", "PROMOTION_PERFORMANCE",
            "CUSTOMER_FEEDBACK", "HISTORICAL_FORECASTS", "CUSTOMER_ACTIVITY_TRACKING",
            "PRODUCT_ASSOCIATIONS", "ATTRIBUTION_MODELS", "FULFILLMENT_TRACKING",
            "CAMPAIGN_PRODUCT_LINK"
        ]
        
        for table_name in fact_tables:
            if table_name in self.schema["tables"]:
                print(f"Generating fact table: {table_name}")
                table_details = self.schema["tables"][table_name]
                self._generate_table_data(table_name, table_details["columns"], table_details["rows"])
        
        return self.generated_data

    def _generate_table_data(self, table_name, columns, rows):
        data = {col: [] for col in columns.keys()}
        
        for i in range(rows):
            for col_name, col_type in columns.items():
                value = self._generate_column_data(table_name, col_name, col_type, i, rows)
                data[col_name].append(value)
        
        df = pd.DataFrame(data)
        self.generated_data[table_name] = df
        return df

    def save_to_csv(self, output_dir="output"):
        import os
        os.makedirs(output_dir, exist_ok=True)
        
        for table_name, df in self.generated_data.items():
            output_path = f"{output_dir}/{table_name}.csv"
            df.to_csv(output_path, index=False)
            print(f"Saved {table_name} to {output_path}")




In [48]:
#all table data
data = {
    "tables": {
        #FACT TABLES
        "SALES_TRANSACTIONS": {
            "columns": {
                "Transaction_ID": "uuid",
                "Customer_ID": "uuid",
                "Product_ID": "uuid",
                "Store_ID": "uuid",
                "Channel_Type_ID": "uuid",
                "Payment_Method_ID": "uuid",
                "Transaction_Date": "date",
                "Quantity": "int",
                "Unit_Price": "float",
                "Total_Amount": "float",
                "Discount_Applied": "float",
                "Tax_Amount": "float"
            },
            "rows": 1000
        },
        "CUSTOMER_ENGAGEMENT": {
            "columns": {
                "Engagement_ID": "uuid",
                "Customer_ID": "uuid",
                "Marketing_Channel_ID": "uuid",
                "Campaign_ID": "uuid",
                "Engagement_Date": "date",
                "Engagement_Type": "enum:Email,Social Media,Phone Call,Text Message(SMS),Survey Response,Referral Program,Ad Click, Push Notification, TV Advertisement",
                "Channel_Type_ID": "uuid",
                "Click_Through_Rate": "float",
                "Time_Spent": "int"
            },
            "rows": 1000
        },
        "LOYALTY_TRANSACTIONS": {
            "columns": {
                "Loyalty_Transaction_ID": "uuid",
                "Customer_ID": "uuid",
                "Store_ID": "uuid",
                "Transaction_ID": "uuid",
                "Loyalty_Program_ID": "uuid",
                "Loyalty_Points_Earned": "float",
                "Loyalty_Points_Redeemed": "float",
                "Loyalty_Balance": "float"
            },
            "rows": 1000
        },
        "INVENTORY_TRACKING": {
            "columns": {
                "Inventory_Tracking_ID": "uuid",
                "Product_ID": "uuid",
                "Store_ID": "uuid",
                "Supplier_ID": "uuid",
                "Stock_Level": "int",
                "Reorder_Level": "int",
                "Last_Updated_Date": "date"
            },
            "rows": 1000
        },
        "WEBSITE_TRAFFIC": {
            "columns": {
                "Session_ID": "uuid",
                "Customer_ID": "uuid",
                "Product_ID": "uuid",
                "Device_Type_ID": "uuid",
                "Page_Viewed": "enum:Homepage,Product Page,Category Page,Checkout Page,Search Results,About Us,Privacy Policy,Terms and Conditions",
                "Time_Spent": "int",
                "Session_Start_Time": "datetime",
                "Session_End_Time": "datetime"
            },
            "rows": 1000
        },
        "PROMOTION_PERFORMANCE": {
            "columns": {
                "Promotion_ID": "uuid",
                "Product_ID": "uuid",
                "Store_ID": "uuid",
                "Campaign_ID": "uuid",
                "Channel_Type_ID": "uuid",
                "Start_Date": "date",
                "End_Date": "date",
                "Regular_Price": "float",
                "Promotional_Price": "float",
                "Units_Sold": "int",
                "Revenue_Generated": "float",
                "Promotion_Cost": "float",
                "ROI": "float",
                "Conversion_Rate": "float"
            },
            "rows": 1000
        },
        "CUSTOMER_FEEDBACK": {
            "columns": {
                "Feedback_ID": "uuid",
                "Customer_ID": "uuid",
                "Product_ID": "uuid",
                "Store_ID": "uuid",
                "Transaction_ID": "uuid",
                "Order_Channel_ID": "uuid",
                "Feedback_Date": "date",
                "Rating": "enum:1 (Very Poor),2 (Poor),3 (Average),4 (Good),5 (Excellent)",
                "Feedback_Type": "enum:Product,Service,Store,Website,Delivery,Customer Support,Returns Process,Packaging,Others"
            },
            "rows": 1000
        },
        "HISTORICAL_FORECASTS": {
            "columns": {
                "Forecast_ID": "uuid",
                "Product_ID": "uuid",
                "Category_ID": "uuid",
                "Store_ID": "uuid",
                "Forecast_Date": "date",
                "Forecast_Period": "string",
                "Forecasted_Sales_Quantity": "int",
                "Actual_Sales_Quantity": "int",
                "Forecast_Error": "float"
            },
            "rows": 1000
        },
        "CUSTOMER_ACTIVITY_TRACKING": {
            "columns": {
                "Customer_ID": "uuid",
                "Last_Transaction_Date": "date",
                "Last_Engagement_Date": "date",
                "Churn_Risk_Score": "float"
            },
            "rows": 1000
        },
        "PRODUCT_ASSOCIATIONS": {
            "columns": {
                "Association_ID": "uuid",
                "Product_1_ID": "uuid",
                "Product_2_ID": "uuid",
                "Support": "float",
                "Confidence": "float",
                "Lift": "float"
            },
            "rows": 1000
        },
        "ATTRIBUTION_MODELS": {
            "columns": {
                "Attribution_ID": "uuid",
                "Customer_ID": "uuid",
                "Transaction_ID": "uuid",
                "Engagement_Channel": "enum:Email,Social Media,Phone Call,Text Message,Survey,Referral Program,Webinar,Push Notification,TV Advertisement,Direct Mail,Influencer Marketing,Online Reviews",
                "Attributed_Revenue": "float",
                "Engagement_Date": "date"
            },
            "rows": 1000
        },
        "FULFILLMENT_TRACKING": {
            "columns": {
                "Transaction_ID": "uuid",
                "Fulfillment_Status": "enum:Pending,Shipped,Delivered,Returned,Cancelled,In Transit,Out for Delivery,Processing,Awaiting Payment,Failed",
                "Fulfillment_Date": "date",
                "Order_Type": "enum:Online,In-Store,Pre-Order,Backorder,Subscription,Wholesale,Gift,Click-and-Collect,Drop-Shipping,Custom Order",
            },
            "rows": 1000
        },
        "CAMPAIGN_PRODUCT_LINK": {
            "columns": {
                "Campaign_Product_Link_ID": "uuid",
                "Campaign_ID": "uuid",
                "Product_ID": "uuid",
                "Target_Audience": "enum:New Customers,Returning Customers,All Customers,High-Value Customers,Seasonal Shoppers,Loyal Customers,Premium Members,Corporate Clients",
                "Revenue_Attributed": "float"
            },
            "rows": 1000
        },
        
        #DIMENSIONS TABLE
        "customers": {
            "columns": {
                "Customer_ID": "uuid",
                "Full_Name": "string",
                "Email": "string",
                "Gender": "enum:Male,Female,Other",
                "Date_of_Birth": "date",
                "Address": "string",
                "City": "enum:New Delhi,Mumbai,Kolkata,Bangalore,Chennai,Hyderabad,Pune,Ahemdabad,Jaipur,Chandigarh,Surat,Patna,Indore,Goa,Lucknow,Coimbatore,Madurai,Nashik,Vijayawada,Visakhapatnam",
                "State": "enum:Maharashtra,Karnataka,Tamil Nadu,Delhi,Uttar Pradesh,West Bengal,Andhra Pradesh,Bihar,Kerala,Telangana,Rajasthan,Punjab,Odisha,Chhattisgarh,Madhya Pradesh,Jharkhand,Assam,Goa,Uttarakhand",
                "Country": "enum:India",
                "Loyalty_Program_Member": "enum:Yes,No"
            },
            "rows": 1000
        },
       "products": {
            "columns": {
                "Product_ID": "uuid",
                "Product_Name": "string",
                "Category_ID": "uuid",
                "Subcategory": "enum:Smartphones,Laptops,Tablets,Headphones,Chairs,Desks,Shirts,Pants,Fiction,Non-Fiction",
                "Brand": "enum:Samsung,Apple,OnePlus,HP,Lenovo,Microsoft,Sony,JBL,Nike,Adidas,Puma,Levi's,Wrangler,Random House,Simon & Schuster",
                "Unit_Price": "float",
                "Online_Exclusive": "enum:Yes,No"
            },
            "rows": 1000
        },
        "stores": {
            "columns": {
                "Store_ID": "uuid",
                "Store_Name": "string",
                "City": "enum:Lucknow,Kanpur,Noida,Pune,Mumbai,Indore,Kolkata,Bengaluru,Nashik,Thane,Gwalior,Ajmer",
                "State": "enum:Uttar Pradesh,Mahrastra,Madhya Pradesh,West Bengal,Rajasthan,Karnataka",
                "Country": "enum:India",
                "Store_Type": "enum:Flagship,Franchise,Outlet,Online,Warehouse,Department Store,Showroom,Supermarket,Hypermarket"
            },
            "rows": 1000
        },
        "marketing_channels": {
            "columns": {
                "Marketing_Channel_ID": "uuid",
                "Channel_Name": "enum:Email Marketing,Social Media,Television,Radio,Billboard,Digital Display,Search Engine Marketing",
                "Cost_Per_Engagement": "float",
                "ROI_Percentage": "float"
            },
            "rows": 7  # Adjusted to match actual channels
        },
         "campaigns": {
            "columns": {
                "Campaign_ID": "uuid",
                "Campaign_Name": "string",
                "Marketing_Channel_ID": "uuid",
                "Promotion_Type_ID": "uuid",
                "Start_Date": "date",
                "End_Date": "date",
                "Budget": "float",
                "Target_Audience": "enum:New Customers,Returning Customers,All Customers,High-Value Customers",
                "Success_Metrics": "enum:Sales Increase,Brand Awareness,Lead Generation,Customer Retention,Market Share,Customer Satisfaction"
            },
            "rows": 20  # Realistic number of concurrent campaigns
        },
        "channel_types": {
            "columns": {
                "Channel_Type_ID": "uuid",
                "Channel_Type_Name": "enum:Online,Offline,Hybrid"
            },
            "rows": 3  # Actual number of channel types
        },
         "loyalty_programs": {
            "columns": {
                "Loyalty_Program_ID": "uuid",
                "Program_Name": "string",
                "Points_Earning_Rate": "float",
                "Points_Redeeming_Rate": "float",
                "Expiry_Period": "enum:30 days,90 days,180 days,1 year,2 years"
            },
            "rows": 10  # Realistic number of loyalty program tiers
        },
       "device_types": {
            "columns": {
                "Device_Type_ID": "uuid",
                "Device_Name": "enum:Mobile,Desktop,Tablet,Laptop",
                "OS_Type": "enum:Android,iOS,Windows,Linux"
            },
            "rows": 4
        },
       "categories": {
            "columns": {
                "Category_ID": "uuid",
                "Category_Name": "string",
                "Parent_Category": "enum:Electronics,Furniture,Clothing,Books,Home Appliances,Kitchenware,Toys"
            },
            "rows": 50  # Reduced to realistic number of categories
        },
        "suppliers": {
            "columns": {
                "Supplier_ID": "uuid",
                "Supplier_Name": "string",
                "Contact_Name": "string",
                "Contact_Info": "string"
            },
            "rows": 1000
        },
        "payment_methods": {
            "columns": {
                "Payment_Method_ID": "uuid",
                "Payment_Type": "enum:Credit Card,Debit Card,UPI,Net Banking,Cash,Digital Wallet,EMI",
                "Provider_Name": "string"
            },
            "rows": 7  # Actual number of payment methods
        },
         "promotion_types": {
            "columns": {
                "Promotion_Type_ID": "uuid",
                "Promotion_Type_Name": "enum:Discount,Buy One Get One,Bundle,Coupon",
                "Description": "string",
                "Minimum_Purchase_Required": "float",
                "Maximum_Discount_Value": "float"
            },
            "rows": 4  # Actual number of promotion types
        },
        "time_dimension": {
            "columns": {
                "Date_ID": "uuid",
                "Date": "date",
                "Day_Name": "string",
                "Day_of_Week": "int",
                "Month": "string",
                "Quarter": "int",
                "Year": "int",
                "Is_Holiday": "enum:Yes,No",
                "Season": "enum:Winter,Spring,Summer,Fall"
            },
            "rows": 1000
        }
    }
}


In [49]:
generator = MockDataGenerator(data)
generated_data = generator.generate_data()
generator.save_to_csv(output_dir="manas6")

Generating dimension table: categories
Generating dimension table: customers
Generating dimension table: products
Generating dimension table: stores
Generating dimension table: marketing_channels
Generating dimension table: campaigns
Generating dimension table: channel_types
Generating dimension table: loyalty_programs
Generating dimension table: device_types
Generating dimension table: suppliers
Generating dimension table: payment_methods
Generating dimension table: promotion_types
Generating dimension table: time_dimension
Generating fact table: SALES_TRANSACTIONS
Generating fact table: CUSTOMER_ENGAGEMENT
Generating fact table: LOYALTY_TRANSACTIONS
Generating fact table: INVENTORY_TRACKING
Generating fact table: WEBSITE_TRAFFIC
Generating fact table: PROMOTION_PERFORMANCE
Generating fact table: CUSTOMER_FEEDBACK
Generating fact table: HISTORICAL_FORECASTS
Generating fact table: CUSTOMER_ACTIVITY_TRACKING
Generating fact table: PRODUCT_ASSOCIATIONS
Generating fact table: ATTRIBUTION_