In [None]:
#### import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os # For saving files

# Set Matplotlib style for better aesthetics
plt.style.use('seaborn-v0_8-darkgrid')
# Configure Seaborn plots to be inline
sns.set_theme(style="whitegrid")

# Helper function to generate a synthetic sales dataset (required for a runnable example)
def generate_synthetic_data(file_path="synthetic_sales_data.csv"):
    """Generates a synthetic sales dataset and saves it to a CSV file."""
    if os.path.exists(file_path):
        print(f"Using existing synthetic data file: {file_path}")
        return file_path
        
    print("Generating new synthetic sales data...")
    num_records = 500
    
    # 1. Date
    dates = pd.date_range(start="2024-01-01", periods=num_records, freq='D')
    
    # 2. Product
    products = ['Laptop', 'Smartphone', 'Tablet', 'Accessory']
    product_data = np.random.choice(products, size=num_records, p=[0.35, 0.30, 0.20, 0.15])
    
    # 3. Region
    regions = ['East', 'West', 'North', 'South']
    region_data = np.random.choice(regions, size=num_records)
    
    # 4. Sales (Revenue)
    sales = np.random.randint(100, 5000, size=num_records) + np.random.randn(num_records) * 50
    sales[sales < 100] = 100 # Minimum sales value
    
    # 5. Profit (calculated as a percentage of sales, with some randomness)
    profit_ratios = np.random.uniform(0.1, 0.4, size=num_records) 
    profit = sales * profit_ratios
    
    # Introduce some missing values randomly for cleaning demonstration
    sales[np.random.choice(num_records, size=20, replace=False)] = np.nan
    profit[np.random.choice(num_records, size=15, replace=False)] = np.nan

    data = pd.DataFrame({
        'Date': dates[:num_records],
        'Product': product_data,
        'Region': region_data,
        'Sales': sales.astype(float),
        'Profit': profit.astype(float)
    })
    
    data.to_csv(file_path, index=False)
    print(f"Synthetic data saved to {file_path}")
    return file_path

# ====================================================================
# SalesDataAnalyzer Class (OOP Core)
# ====================================================================

class SalesDataAnalyzer:
    """
    A comprehensive class for analyzing and visualizing sales data 
    using Pandas, Matplotlib, and Seaborn.
    """
    
    def __init__(self, file_path=None):
        """Constructor: Initializes the DataFrame attribute."""
        self.data = pd.DataFrame()
        print("SalesDataAnalyzer initialized.")
        if file_path:
            self.load_data(file_path)

    def __del__(self):
        """Destructor: Simple cleanup message."""
        # For typical Pandas operations, no explicit cleanup is strictly needed, 
        # but this fulfills the requirement.
        print("SalesDataAnalyzer object destroyed.")
        
    # --- Data Acquisition & Loading ---
    def load_data(self, file_path):
        """Load data from a CSV file."""
        try:
            self.data = pd.read_csv(file_path)
            # Ensure Date column is in datetime format
            self.data['Date'] = pd.to_datetime(self.data['Date'])
            print(f"\n Dataset loaded successfully from: **{file_path}**")
            return True
        except FileNotFoundError:
            print(f"\n  Error: File not found at {file_path}")
            self.data = pd.DataFrame() # Clear data on failure
            return False
        except Exception as e:
            print(f"\n  An error occurred during file loading: {e}")
            self.data = pd.DataFrame()
            return False

    # --- Data Exploration & Cleaning ---
    def explore_data(self):
        """Display basic information about the dataset."""
        if self.data.empty:
            print(" Load data first.")
            return

        print("\n--- Basic Data Exploration ---")
        print("--- Head (First 5 Rows) ---")
        print(self.data.head())
        print("\n--- DataFrame Info (Dtypes, Non-Null Counts) ---")
        self.data.info(verbose=False, buf=None, max_cols=200, memory_usage=True)
        print("\n--- Descriptive Statistics (Numerical Columns) ---")
        print(self.data.describe())
        print("\n--- Check for Missing Values (NaN) ---")
        print(self.data.isnull().sum())
    
    def clean_data(self, method='median'):
        """Handle missing values and perform basic data cleaning."""
        if self.data.empty:
            print(" Load data first.")
            return
            
        initial_missing = self.data.isnull().sum().sum()
        if initial_missing == 0:
            print(" Data is already clean (no missing values found).")
            return

        print(f"\n--- Data Cleaning: Handling {initial_missing} missing values ---")

        # 1. Handle Missing Numerical Values ('Sales', 'Profit')
        for col in ['Sales', 'Profit']:
            if self.data[col].isnull().any():
                if method == 'median':
                    fill_value = self.data[col].median()
                    self.data[col].fillna(fill_value, inplace=True)
                    print(f"   Filled missing values in '{col}' with the **median** ({fill_value:.2f}).")
                elif method == 'mean':
                    fill_value = self.data[col].mean()
                    self.data[col].fillna(fill_value, inplace=True)
                    print(f"   Filled missing values in '{col}' with the **mean** ({fill_value:.2f}).")
                else: # Default to dropping for simplicity if method is unknown
                    self.data.dropna(subset=[col], inplace=True)
                    print(f"   Dropped rows with missing values in '{col}'.")

        # 2. Handle Missing Categorical Values ('Product', 'Region') - assuming simple drop or mode fill
        for col in ['Product', 'Region']:
             if self.data[col].isnull().any():
                 # Fill with mode (most frequent)
                 fill_value = self.data[col].mode()[0]
                 self.data[col].fillna(fill_value, inplace=True)
                 print(f"   Filled missing values in '{col}' with the **mode** ({fill_value}).")

        final_missing = self.data.isnull().sum().sum()
        print(f" Cleaning complete. Total missing values remaining: {final_missing}")

    # --- Data Manipulation (Pandas/Numpy) ---

    def mathematical_operations(self):
        """Perform mathematical operations on data and demonstrate Numpy arrays."""
        if self.data.empty:
            print(" Load data first.")
            return

        print("\n--- Mathematical Operations & Numpy Demonstration ---")
        
        # New Column Creation (Element-wise operation)
        self.data['Tax_Amount'] = self.data['Sales'] * 0.05 # 5% tax
        print(" New column 'Tax_Amount' (Sales * 0.05) created.")
        
        # Numpy Array Creation and Operations (Requirement A & B)
        sales_np = self.data['Sales'].to_numpy() # Convert to Numpy array
        
        # Indexing and Slicing (Requirement A)
        print(f"\nNumpy Array Indexing: sales_np[0] = {sales_np[0]:.2f}")
        print(f"Numpy Array Slicing: sales_np[1:4] = {sales_np[1:4].round(2)}")
        
        # Element-wise Mathematical Operation (Requirement B)
        discounted_sales_np = sales_np * 0.95 
        print(f"Element-wise math: First 3 discounted sales (0.95 * Sales) = {discounted_sales_np[:3].round(2)}")

    def combine_data(self, other_dataframe):
        """Combine current DataFrame with another using concat."""
        if self.data.empty or other_dataframe.empty:
            print(" Cannot combine: one or both DataFrames are empty.")
            return

        initial_rows = len(self.data)
        # Using concat to combine data (assuming similar columns)
        self.data = pd.concat([self.data, other_dataframe], ignore_index=True)
        print(f" DataFrames combined using **pd.concat()**. Rows added: {len(self.data) - initial_rows}")

    def split_data(self):
        """Split DataFrame into multiple DataFrames based on 'Region'."""
        if self.data.empty:
            print(" Load data first.")
            return

        print("\n--- Data Splitting by 'Region' ---")
        regions = self.data['Region'].unique()
        split_dfs = {}
        for region in regions:
            # Split using boolean filtering
            split_dfs[region] = self.data[self.data['Region'] == region].copy()
            print(f"   Created DataFrame for **{region}**: {len(split_dfs[region])} rows.")
            
        print(" DataFrame successfully split into multiple regional DataFrames.")
        return split_dfs

    # --- Data Analysis (Search, Sort, Filter, Aggregate, Statistical) ---

    def search_sort_filter(self):
        """Implement search, sort, and filter functionalities."""
        if self.data.empty:
            print(" Load data first.")
            return

        print("\n--- Search, Sort, and Filter Operations ---")
        
        # 1. Search (Requirement C)
        # Search for products with Sales > $4000
        high_sales_products = self.data[self.data['Sales'] > 4000]
        print(f"\n Search: Found **{len(high_sales_products)}** records with Sales > $4000.")
        if not high_sales_products.empty:
            print(f"   Example: {high_sales_products.iloc[0]['Product']} in {high_sales_products.iloc[0]['Region']} on {high_sales_products.iloc[0]['Date'].strftime('%Y-%m-%d')}")

        # 2. Filter (Requirement C)
        # Filter for 'West' region and 'Laptop' product
        filtered_data = self.data[(self.data['Region'] == 'West') & (self.data['Product'] == 'Laptop')]
        print(f"\n🧹 Filter: Found **{len(filtered_data)}** records for 'Laptop' in 'West' region.")
        
        # 3. Sort (Requirement C)
        sorted_data = self.data.sort_values(by='Profit', ascending=False)
        print("\n Sort: Data sorted by 'Profit' (Descending). Top 3 by Profit:")
        print(sorted_data[['Date', 'Product', 'Profit']].head(3))
        
    def aggregate_functions(self):
        """Apply aggregating functions like sum, mean, etc."""
        if self.data.empty:
            print(" Load data first.")
            return

        print("\n--- Aggregating Functions (sum, mean, count) ---")
        
        # Group by 'Region' and aggregate Sales/Profit
        regional_summary = self.data.groupby('Region').agg(
            Total_Sales=('Sales', 'sum'),
            Average_Profit=('Profit', 'mean'),
            Count_Records=('Product', 'count')
        ).round(2)
        
        print("\nTotal Sales, Average Profit, and Count by Region:")
        print(regional_summary)
        print(" Aggregation complete.")

    def statistical_analysis(self):
        """Perform statistical computations (std, var, quantile, describe)."""
        if self.data.empty:
            print(" Load data first.")
            return

        print("\n--- Statistical Analysis (Std Dev, Variance, Quantile) ---")
        
        # Use describe() (Requirement D)
        print("\nFull Descriptive Statistics for Sales and Profit:")
        print(self.data[['Sales', 'Profit']].describe().T) # Transpose for better view

        # Calculate specific statistics (Requirement D)
        sales_std = self.data['Sales'].std()
        sales_var = self.data['Sales'].var()
        profit_p90 = self.data['Profit'].quantile(0.90) # 90th percentile
        
        print(f"\n Sales Standard Deviation (std()): **${sales_std:,.2f}**")
        print(f" Sales Variance (var()): **${sales_var:,.2f}**")
        print(f" 90th Percentile of Profit (quantile(0.9)): **${profit_p90:,.2f}**")
        
    def create_pivot_table(self):
        """Generate pivot tables for data summarization."""
        if self.data.empty:
            print(" Load data first.")
            return

        print("\n--- Pivot Table Creation ---")
        
        # Pivot table: Total Sales by Region and Product
        pivot_sales = self.data.pivot_table(
            index='Region', 
            columns='Product', 
            values='Sales', 
            aggfunc='sum', 
            fill_value=0 # Replace NaN with 0
        ).round(2)
        
        print("\nTotal Sales (Sum) by Region and Product:")
        print(pivot_sales)
        print(" Pivot table generated.")

    # --- Data Visualization (Matplotlib & Seaborn) ---
    def visualize_data(self):
        """Create various plots using Matplotlib and Seaborn (Requirements E & F)."""
        if self.data.empty:
            print(" Load data first.")
            return

        print("\n--- Data Visualization ---")
        
        # Ensure the 'Date' is set as the index for time-series plots
        daily_sales = self.data.set_index('Date').resample('M')['Sales'].sum() # Monthly sales
        
        # =========================================================
        # Matplotlib Plots (Requirement E)
        # =========================================================
        fig, axes = plt.subplots(3, 2, figsize=(18, 15)) # Create subplots
        fig.suptitle('Matplotlib Sales Analysis Visualizations', fontsize=20, y=1.02)
        
        # 1. Line Plot (E) - Monthly Sales Trend
        daily_sales.plot(kind='line', ax=axes[0, 0], title='Monthly Sales Trend (Line Plot)', color='b')
        axes[0, 0].set_ylabel('Total Sales ($)')
        axes[0, 0].legend(['Sales'])
        
        # 2. Bar Plot (E) - Total Sales by Region
        region_sales = self.data.groupby('Region')['Sales'].sum()
        region_sales.plot(kind='bar', ax=axes[0, 1], title='Total Sales by Region (Bar Plot)', color='skyblue')
        axes[0, 1].set_ylabel('Total Sales ($)')
        axes[0, 1].tick_params(axis='x', rotation=0) # Remove tilt
        
        # 3. Scatter Plot (E) - Sales vs. Profit
        axes[1, 0].scatter(self.data['Sales'], self.data['Profit'], alpha=0.6, color='g')
        axes[1, 0].set_title('Sales vs. Profit (Scatter Plot)')
        axes[1, 0].set_xlabel('Sales ($)')
        axes[1, 0].set_ylabel('Profit ($)')
        
        # 4. Histogram (E) - Distribution of Profit
        axes[1, 1].hist(self.data['Profit'], bins=20, color='coral', edgecolor='black')
        axes[1, 1].set_title('Profit Distribution (Histogram)')
        axes[1, 1].set_xlabel('Profit ($)')
        axes[1, 1].set_ylabel('Frequency')

        # 5. Pie Plot (E) - Product Distribution
        product_counts = self.data['Product'].value_counts()
        product_counts.plot(kind='pie', ax=axes[2, 0], autopct='%1.1f%%', startangle=90, title='Product Count Distribution (Pie Plot)', ylabel='')
        axes[2, 0].legend(title="Products", loc="center left", bbox_to_anchor=(1, 0, 0.5, 1))

        # 6. Stack Plot (E) - Total Sales over time, stacked by Region (simplified for sample data)
        # NOTE: Stack plots require wide data. A simpler demonstration:
        data_pivot = self.data.set_index('Date').groupby('Region')['Sales'].resample('M').sum().unstack(level=0, fill_value=0)
        num_regions = len(data_pivot.columns)
        # Use the colormap as a function to generate a list of colors
        colors = plt.cm.coolwarm(np.linspace(0, 1, num_regions)) 
        
        axes[2, 1].stackplot(data_pivot.index, data_pivot.values.T, labels=data_pivot.columns, colors=colors)
        # ------------------------

        axes[2, 1].set_title('Monthly Sales by Region (Stack Plot)')
        axes[2, 1].set_xlabel('Date')
        axes[2, 1].set_ylabel('Total Sales ($)')
        axes[2, 1].legend(loc='upper left')

        plt.tight_layout()
        plt.show() # Display Matplotlib plots

        # =========================================================
        # Seaborn Plots (Requirement F)
        # =========================================================
        
        # Create a new figure for Seaborn plots
        plt.figure(figsize=(15, 6))

        # 7. Box Plot (F) - Sales Distribution by Product
        plt.subplot(1, 2, 1)
        sns.boxplot(x='Product', y='Sales', data=self.data, palette='viridis')
        plt.title('Sales Distribution by Product (Seaborn Box Plot)')
        
        # 8. Heatmap (F) - Correlation Matrix
        plt.subplot(1, 2, 2)
        correlation_matrix = self.data[['Sales', 'Profit', 'Tax_Amount']].corr()
        sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f", linewidths=.5, linecolor='black')
        plt.title('Correlation Heatmap (Seaborn)')
        
        plt.tight_layout()
        plt.show() # Display Seaborn plots
        print(" Visualizations generated using Matplotlib and Seaborn.")
        
    def save_visualization(self, plot_name, file_format='png'):
        """Saves a simple Bar Plot of Regional Sales as an image file."""
        if self.data.empty:
            print(" Load data first.")
            return

        try:
            plt.figure(figsize=(10, 6))
            region_sales = self.data.groupby('Region')['Sales'].sum()
            region_sales.plot(kind='bar', color='darkblue')
            plt.title('Total Sales by Region')
            plt.ylabel('Total Sales ($)')
            plt.xlabel('Region')
            plt.tick_params(axis='x', rotation=0)
            
            filename = f"{plot_name}.{file_format}"
            plt.savefig(filename)
            plt.close() # Close the figure to free memory
            print(f" Visualization saved successfully as: **{filename}**")
        except Exception as e:
            print(f"  Error saving visualization: {e}")

# ====================================================================
# Main Program & Menu-Driven Interface (UI Core)
# ====================================================================

def display_menu():
    """Displays the main menu options."""
    print("\n" + "="*50)
    print("       Sales Data Analysis & Visualization Program")
    print("="*50)
    print("Please select an option:")
    print("1. Load Dataset")
    print("2. Explore Data (Head, Info, Describe)")
    print("3. Clean Data (Handle Missing Values)")
    print("4. Data Manipulation (Math Ops, Numpy, Search, Sort, Filter)")
    print("5. Aggregate and Statistical Analysis")
    print("6. Generate Pivot Table")
    print("7. Data Visualization (Matplotlib & Seaborn)")
    print("8. Save a Visualization to File")
    print("9. Exit")
    print("="*50)

def main():
    """Main function to run the menu-driven interface."""
    
    # 1. Initialization and Data Generation
    sales_analyzer = SalesDataAnalyzer()
    
    # Generate synthetic data path (for a runnable example)
    data_file_path = generate_synthetic_data() 
    
    while True:
        display_menu()
        
        try:
            choice = input("Enter your choice: ").strip()
            
            if choice == '1':
                # Load Dataset
                file_path = input(f"Enter the path of the dataset (CSV file), e.g., **{data_file_path}**: ")
                sales_analyzer.load_data(file_path)
            
            elif choice == '2':
                # Explore Data
                sales_analyzer.explore_data()
            
            elif choice == '3':
                # Clean Data
                sales_analyzer.clean_data()
            
            elif choice == '4':
                # Data Manipulation (A, B, C part 1)
                sales_analyzer.mathematical_operations()
                # Demonstration of Search, Sort, Filter
                sales_analyzer.search_sort_filter()
                # Demonstrating combining and splitting (needs extra step)
                # sales_analyzer.split_data() # Returns dict of split DFs
                # print("\nData Splitting and Combining methods are also available as class methods.")
            
            elif choice == '5':
                # Aggregate and Statistical Analysis (C part 2, D)
                sales_analyzer.aggregate_functions()
                sales_analyzer.statistical_analysis()

            elif choice == '6':
                # Generate Pivot Table
                sales_analyzer.create_pivot_table()

            elif choice == '7':
                # Data Visualization (E, F)
                sales_analyzer.visualize_data()
            
            elif choice == '8':
                # Save Visualization
                plot_name = input("Enter the desired file name (e.g., regional_sales_report): ")
                sales_analyzer.save_visualization(plot_name)

            elif choice == '9':
                # Exit Program
                print("\nExiting Program. Thank you for using the Sales Data Analyzer! 👋")
                # Explicitly deleting the object to show the __del__ destructor
                del sales_analyzer 
                break
            
            else:
                print("  Invalid choice. Please select an option from 1 to 9.")
                
        except Exception as e:
            print(f"\n An unexpected error occurred: {e}")

if __name__ == "__main__":
    main()

SalesDataAnalyzer initialized.
Using existing synthetic data file: synthetic_sales_data.csv

       Sales Data Analysis & Visualization Program
Please select an option:
1. Load Dataset
2. Explore Data (Head, Info, Describe)
3. Clean Data (Handle Missing Values)
4. Data Manipulation (Math Ops, Numpy, Search, Sort, Filter)
5. Aggregate and Statistical Analysis
6. Generate Pivot Table
7. Data Visualization (Matplotlib & Seaborn)
8. Save a Visualization to File
9. Exit
