In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

class SalesDataAnalyzer:

    def _init_(self, file_path=None):
        self.data = None
        if file_path:
            self.load_data(file_path)

    def _del_(self):
        print("SalesDataAnalyzer object destroyed.")

    # ---------------- Load Data ----------------
    def load_data(self, file_path):
        try:
            self.data = pd.read_csv(file_path)
            print("Dataset loaded successfully!")
        except Exception as e:
            print("Error loading dataset:", e)

    # ---------------- Explore Data ----------------
    def explore_data(self):
        print("\n1. First 5 rows:\n", self.data.head())
        print("\n2. Last 5 rows:\n", self.data.tail())
        print("\n3. Column Names:\n", self.data.columns)
        print("\n4. Data Types:\n", self.data.dtypes)
        print("\n5. Dataset Info:")
        print(self.data.info())

    # ---------------- Handle Missing Data ----------------
    def clean_data(self):
        if self.data.isnull().sum().sum() == 0:
            print("No missing values found in the dataset!")
        else:
            self.data.fillna(self.data.mean(numeric_only=True), inplace=True)
            print("Missing values filled with mean.")

    # ---------------- Mathematical Operations ----------------
    def mathematical_operations(self):
        self.data["Profit_Margin"] = self.data["Profit"] / self.data["Sales"]
        print("\nProfit Margin calculated:\n", self.data[["Sales", "Profit", "Profit_Margin"]].head())

    # ---------------- Search / Sort / Filter ----------------
    def search_sort_filter(self):
        print("\nSorted by Sales:\n", self.data.sort_values("Sales", ascending=False).head())
        print("\nFilter Sales > 600:\n", self.data[self.data["Sales"] > 600])

    # ---------------- Aggregation ----------------
    def aggregate_functions(self):
        print("\nTotal Sales by Region:\n", self.data.groupby("Region")["Sales"].sum())
        print("\nAverage Profit:\n", self.data["Profit"].mean())

    # ---------------- Statistical Analysis ----------------
    def statistical_analysis(self):
        print("\nStatistical Summary:\n", self.data.describe())
        print("\nStandard Deviation:\n", self.data.std(numeric_only=True))
        print("\nVariance:\n", self.data.var(numeric_only=True))

    # ---------------- Pivot Table ----------------
    def create_pivot_table(self):
        pivot = pd.pivot_table(
            self.data,
            values="Sales",
            index="Region",
            columns="Product",
            aggfunc="sum"
        )
        print("\nPivot Table:\n", pivot)
        return pivot

    # ---------------- Matplotlib Visualization ----------------
    def visualize_data(self):
        plt.figure()
        self.data.groupby("Region")["Sales"].sum().plot(kind="bar")
        plt.title("Total Sales by Region")
        plt.xlabel("Region")
        plt.ylabel("Sales")
        plt.show()

        plt.figure()
        plt.plot(self.data["Date"], self.data["Sales"])
        plt.title("Sales Trend Over Time")
        plt.xticks(rotation=45)
        plt.show()

        plt.figure()
        plt.scatter(self.data["Sales"], self.data["Profit"])
        plt.title("Sales vs Profit")
        plt.xlabel("Sales")
        plt.ylabel("Profit")
        plt.show()

        plt.figure()
        self.data["Sales"].plot(kind="hist", bins=10)
        plt.title("Sales Histogram")
        plt.show()

    # ---------------- Stack Plot ----------------
    def stack_plot(self):
        pivot = self.data.pivot_table(
            index="Date",
            columns="Region",
            values="Sales",
            aggfunc="sum"
        ).fillna(0)

        plt.figure()
        plt.stackplot(pivot.index, pivot.T, labels=pivot.columns)
        plt.legend()
        plt.title("Stack Plot of Sales by Region")
        plt.xticks(rotation=45)
        plt.show()

    # ---------------- Seaborn Visualization ----------------
    def seaborn_visuals(self):
        sns.boxplot(x="Region", y="Sales", data=self.data)
        plt.title("Sales Distribution by Region")
        plt.show()

        sns.heatmap(self.data.corr(numeric_only=True), annot=True)
        plt.title("Correlation Heatmap")
        plt.show()


# ---------------- Menu Driven Program ----------------
def main():
    analyzer = SalesDataAnalyzer()

    while True:
        print("\n========== Data Analysis & Visualization Program ==========")
        print("1. Load Dataset")
        print("2. Explore Data")
        print("3. Perform DataFrame Operations")
        print("4. Handle Missing Data")
        print("5. Generate Descriptive Statistics")
        print("6. Data Visualization")
        print("7. Save Visualization")
        print("8. Exit")

        choice = input("Enter your choice: ")

        if choice == "1":
            path = input("Enter CSV file path: ")
            analyzer.load_data(path)
        elif choice == "2":
            analyzer.explore_data()
        elif choice == "3":
            analyzer.mathematical_operations()
            analyzer.search_sort_filter()
        elif choice == "4":
            analyzer.clean_data()
        elif choice == "5":
            analyzer.aggregate_functions()
            analyzer.statistical_analysis()
        elif choice == "6":
            analyzer.visualize_data()
            analyzer.stack_plot()
            analyzer.seaborn_visuals()
        elif choice == "8":
            print("Exiting the program. Goodbye!")
            break
        else:
            print("Invalid choice!")

if __name__ == "__main__":
    main()


1. Load Dataset
2. Explore Data
3. Perform DataFrame Operations
4. Handle Missing Data
5. Generate Descriptive Statistics
6. Data Visualization
7. Save Visualization
8. Exit


Enter your choice:  1
Enter CSV file path:  C:\Users\Admin\Downloads\customer_sales_data.csv


Dataset loaded successfully!

1. Load Dataset
2. Explore Data
3. Perform DataFrame Operations
4. Handle Missing Data
5. Generate Descriptive Statistics
6. Data Visualization
7. Save Visualization
8. Exit


Enter your choice:  2



1. First 5 rows:
   Customer_ID  Gender  Age Region Product_Category  Units_Purchased  \
0       C1000    Male   52   West         Clothing                9   
1       C1001  Female   52  North        Groceries                7   
2       C1002    Male   50   West         Clothing                4   
3       C1003    Male   22   West         Clothing                5   
4       C1004    Male   59  North         Clothing                2   

   Price_Per_Unit Purchase_Date  Total_Amount  
0           75.05    2025-10-25        675.45  
1          407.64    2026-01-01       2853.48  
2          820.03    2026-01-05       3280.12  
3          987.91    2026-01-04       4939.55  
4          192.90    2025-11-28        385.80  

2. Last 5 rows:
     Customer_ID  Gender  Age Region Product_Category  Units_Purchased  \
145       C1145    Male   59   West         Clothing                9   
146       C1146    Male   52  South         Clothing                2   
147       C1147    Male   24 

Enter your choice:  3


KeyError: 'Profit'