In [1]:

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

class SalesDataAnalyzer:

    def __init__(self):
        self.data = None

    def load_data(self, file_path):
        try:
            self.data = pd.read_csv(file_path)
            print("Dataset loaded successfully!")
        except Exception as e:
            print("Error loading file:", e)

    def explore_data(self):
        if self.data is None:
            print("Load a dataset first.")
            return

        print("\n== Explore Data ==")
        print("1. Display the first 5 rows")
        print("2. Display the last 5 rows")
        print("3. Display column names")
        print("4. Display data types")
        print("5. Display basic info")

        choice = input("Enter your choice: ")

        if choice == '1':
            print(self.data.head())
        elif choice == '2':
            print(self.data.tail())
        elif choice == '3':
            print(self.data.columns)
        elif choice == '4':
            print(self.data.dtypes)
        elif choice == '5':
            print(self.data.info())
        else:
            print("Invalid choice.")

    def handle_missing_data(self):
        if self.data is None:
            print("Load a dataset first.")
            return

        print("\n== Handle Missing Data ==")
        print("1. Display rows with missing values")
        print("2. Fill missing values with mean")
        print("3. Drop rows with missing values")
        print("4. Replace missing values with a specific value")

        choice = input("Enter your choice: ")

        if choice == '1':
            missing = self.data[self.data.isnull().any(axis=1)]
            if missing.empty:
                print("No missing values found in the dataset!")
            else:
                print(missing)

        elif choice == '2':
            self.data.fillna(self.data.mean(numeric_only=True), inplace=True)
            print("Missing values filled with mean.")

        elif choice == '3':
            self.data.dropna(inplace=True)
            print("Rows with missing values dropped.")

        elif choice == '4':
            val = input("Enter value to replace missing values with: ")
            self.data.fillna(val, inplace=True)
            print("Missing values replaced.")

        else:
            print("Invalid choice.")

    def descriptive_statistics(self):
        if self.data is None:
            print("Load a dataset first.")
            return

        print("\n== Descriptive Statistics ==")
        print(self.data.describe())

    def data_visualization(self):
        if self.data is None:
            print("Load a dataset first.")
            return

        print("\n== Data Visualization ==")
        print("1. Bar Plot")
        print("2. Line Plot")
        print("3. Scatter Plot")
        print("4. Pie Chart")
        print("5. Histogram")
        print("6. Stack Plot")

        choice = input("Enter your choice: ")

        if choice == '3':
            x = input("Enter x-axis column name: ")
            y = input("Enter y-axis column name: ")
            try:
                plt.scatter(self.data[x], self.data[y])
                plt.xlabel(x)
                plt.ylabel(y)
                plt.title(f"Scatter Plot ({x} vs {y})")
                plt.show()
                print("Scatter plot displayed successfully!")
            except:
                print("Error: invalid column names.")

        elif choice == '1':
            column = input("Enter column name for bar counts: ")
            try:
                self.data[column].value_counts().plot(kind='bar')
                plt.title(f"Bar Plot of {column}")
                plt.show()
            except:
                print("Invalid column name.")

        elif choice == '2':
            x = input("Enter x-axis column: ")
            y = input("Enter y-axis column: ")
            try:
                plt.plot(self.data[x], self.data[y])
                plt.xlabel(x)
                plt.ylabel(y)
                plt.title("Line Plot")
                plt.show()
            except:
                print("Invalid columns.")

        elif choice == '4':
            column = input("Enter column for pie chart: ")
            try:
                self.data[column].value_counts().plot(kind='pie', autopct='%1.1f%%')
                plt.title(f"Pie Chart of {column}")
                plt.show()
            except:
                print("Invalid column name.")

        elif choice == '5':
            column = input("Enter numeric column for histogram: ")
            try:
                plt.hist(self.data[column])
                plt.title(f"Histogram of {column}")
                plt.show()
            except:
                print("Invalid column name.")

        elif choice == '6':
            numeric = self.data.select_dtypes(include="number").iloc[:, :3]
            if numeric.shape[1] < 3:
                print("Not enough numeric columns.")
                return
            plt.stackplot(range(len(numeric)), *numeric.values.T)
            plt.title("Stack Plot")
            plt.show()

        else:
            print("Invalid choice.")

    def save_visualization(self):
        filename = input("Enter file name to save the plot: ")
        try:
            plt.savefig(filename)
            print("Visualization saved successfully!")
        except:
            print("Error saving file.")


analyzer = SalesDataAnalyzer()

while True:
    print("\n========= Data Analysis & Visualization Program =========")
    print("1. Load Dataset")
    print("2. Explore Data")
    print("3. Perform DataFrame Operations")
    print("4. Handle Missing Data")
    print("5. Generate Descriptive Statistics")
    print("6. Data Visualization")
    print("7. Save Visualization")
    print("8. Exit")
    choice = input("Enter your choice: ")

    if choice == '1':
        filepath = input("Enter the path of the dataset (CSV file): ")
        analyzer.load_data(filepath)

    elif choice == '2':
        analyzer.explore_data()

    elif choice == '3':
        print("Basic DataFrame operations can be added here.")

    elif choice == '4':
        analyzer.handle_missing_data()

    elif choice == '5':
        analyzer.descriptive_statistics()

    elif choice == '6':
        analyzer.data_visualization()

    elif choice == '7':
        analyzer.save_visualization()

    elif choice == '8':
        print("Exiting the program. Goodbye!")
        break

    else:
        print("Invalid choice.")


1. Load Dataset
2. Explore Data
3. Perform DataFrame Operations
4. Handle Missing Data
5. Generate Descriptive Statistics
6. Data Visualization
7. Save Visualization
8. Exit


Enter your choice:  1
Enter the path of the dataset (CSV file):  C:\Users\ompat\Downloads\titanic_full_dataset.csv


Dataset loaded successfully!

1. Load Dataset
2. Explore Data
3. Perform DataFrame Operations
4. Handle Missing Data
5. Generate Descriptive Statistics
6. Data Visualization
7. Save Visualization
8. Exit


Enter your choice:  2



== Explore Data ==
1. Display the first 5 rows
2. Display the last 5 rows
3. Display column names
4. Display data types
5. Display basic info


Enter your choice:  2


     PassengerId  Survived  Pclass                    Name     Sex  Age  \
886          887         1       1         Smith, Mr. John    male   45   
887          888         0       2          Doe, Mrs. Anna  female   32   
888          889         0       3  Allen, Miss. Elisabeth    male   22   
889          890         1       1         Smith, Mr. John  female   38   
890          891         1       2          Doe, Mrs. Anna    male   26   

     SibSp  Parch            Ticket     Fare Cabin Embarked  
886      0      0  STON/O2. 3101282   7.9250  C123        C  
887      1      1            113803  53.1000   E46        Q  
888      1      0         A/5 21171   7.2500   B28        S  
889      0      0          PC 17599  71.2833   NaN        C  
890      0      1  STON/O2. 3101282   7.9250   C85        Q  

1. Load Dataset
2. Explore Data
3. Perform DataFrame Operations
4. Handle Missing Data
5. Generate Descriptive Statistics
6. Data Visualization
7. Save Visualization
8. Exit


Enter your choice:  3


Basic DataFrame operations can be added here.

1. Load Dataset
2. Explore Data
3. Perform DataFrame Operations
4. Handle Missing Data
5. Generate Descriptive Statistics
6. Data Visualization
7. Save Visualization
8. Exit


Enter your choice:  4



== Handle Missing Data ==
1. Display rows with missing values
2. Fill missing values with mean
3. Drop rows with missing values
4. Replace missing values with a specific value


Enter your choice:  1


     PassengerId  Survived  Pclass                    Name     Sex  Age  \
4              5         0       1         Smith, Mr. John    male   28   
9             10         1       3  Allen, Miss. Elisabeth  female   38   
14            15         1       2          Doe, Mrs. Anna    male   45   
19            20         0       1         Smith, Mr. John  female   35   
24            25         0       3  Allen, Miss. Elisabeth    male   22   
..           ...       ...     ...                     ...     ...  ...   
869          870         1       2          Doe, Mrs. Anna  female   19   
874          875         1       1         Smith, Mr. John    male   26   
879          880         0       3  Allen, Miss. Elisabeth  female   32   
884          885         0       2          Doe, Mrs. Anna    male   28   
889          890         1       1         Smith, Mr. John  female   38   

     SibSp  Parch            Ticket     Fare Cabin Embarked  
4        1      0         A/5 21171  

Enter your choice:  8


Exiting the program. Goodbye!
