In [None]:
import pandas as pd
import numpy as np
import sqlite3
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

class AutomatedEDATool:
    def __init__(self):
        self.data = None
        self.columns = []
    
    def load_data(self, file_path, file_type):
        if file_type == "csv":
            self.data = pd.read_csv(file_path)
        elif file_type == "excel":
            self.data = pd.read_excel(file_path)
        elif file_type == "sql": 
            con = sqlite3.connect(file_path)
            self.data = pd.read_sql_query(f'SELECT * from {file_path}',con)
            pass
        # Add handling for SQL databases
        
    def preprocess_data(self):
        if self.data is None:
            print("No data loaded. Please load data first.")
            return 
        
        self.columns = self.data.columns
        
        # Identify column data types
        categorical_cols = self.data.select_dtypes(include=["object"]).columns
        numerical_cols = self.data.select_dtypes(include=["int64", "float64"]).columns
        
        # Handle missing values
        self.data[categorical_cols] = self.data[categorical_cols].fillna("Unknown")
        self.data[numerical_cols] = self.data[numerical_cols].fillna(0)
        
        # Encode categorical features
        self.data = pd.get_dummies(self.data, columns=categorical_cols, drop_first=True)
        
        # Scale numerical features
        self.data[numerical_cols] = (self.data[numerical_cols] - self.data[numerical_cols].mean()) / self.data[numerical_cols].std()
    
    def generate_visualizations(self):
        if self.columns is None:
            print("No data pre-processed. Please pre-process data first.")
            return
        
        for column in self.data.columns:
            if self.data[column].dtype == "float64" or self.data[column].dtype == "int64":
                plt.figure(figsize=(8, 6))
                sns.histplot(self.data[column], kde=True)
                plt.title(f"Histogram for {column}")
                plt.show()
                
                plt.figure(figsize=(8, 6))
                sns.boxplot(y=self.data[column])
                plt.title(f"Box Plot for {column}")
                plt.show()
                
            elif self.data[column].dtype == "object":
                plt.figure(figsize=(8, 6))
                sns.countplot(y=self.data[column])
                plt.title(f"Count Plot for {column}")
                plt.show()
                
        # You can add more visualization types and use Plotly for interactive plots
    
    def run(self):
        while True:
            print("1. Load Data")
            print("2. Pre-process Data")
            print("3. Generate Visualizations")
            print("4. Exit")
            
            choice = input("Enter your choice: ")
            
            if choice == "1":
                file_path = input("Enter file path: ")
                file_type = input("Enter file type (csv/excel/sql): ")
                self.load_data(file_path, file_type)
                print("Data loaded successfully.")
                
            elif choice == "2":
                self.preprocess_data()
                
#                 if self.data != None:
                print(f"Data pre-processed successfully and there are their columns.{self.columns}")
            elif choice == "3":                
                self.generate_visualizations()
                
            elif choice == "4":
                print("Exiting.")
                break
            else:
                print("Invalid choice. Please try again.")

if __name__ == "__main__":
    eda_tool = AutomatedEDATool()
    eda_tool.run()
# MemoryError will be shown in case of very large data