In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
def load_data(file_path, file_format):
    if file_format == 'csv':
        data = pd.read_csv(file_path)
    elif file_format == 'excel':
        data = pd.read_excel(file_path)
    elif file_format == 'sql':
        pass
    else:
        raise ValueError("Unsupported file format")
    return data

In [3]:
def handle_missing_values(data):
    data.fillna(data.mean(), inplace=True)
    return data

In [4]:
def encode_categorical(data):
    cat_columns = data.select_dtypes(include=['object']).columns
    for column in cat_columns:
        data[column] = pd.Categorical(data[column]).codes
    return data

In [5]:
def scale_numerical(data):
    num_columns = data.select_dtypes(include=[np.number]).columns
    data[num_columns] = (data[num_columns] - data[num_columns].mean()) / data[num_columns].std()
    return data

In [6]:
def generate_visualizations(data):
    num_columns = data.select_dtypes(include=[np.number]).columns
    cat_columns = data.select_dtypes(include=['uint8']).columns

    # histograms 
    for column in num_columns:
        plt.figure(figsize=(10, 7))
        plt.hist(data[column], bins=20, edgecolor='w')
        plt.title(f'Histogram of {column}')
        plt.xlabel(column)
        plt.ylabel('Frequency')
        plt.grid(True)
        plt.show()


    # scatter  
    for column1 in num_columns:
        for column2 in num_columns:
            if column1 != column2:
                plt.figure(figsize=(10, 7))
                plt.scatter(data[column1], data[column2])
                plt.title(f'Scatter Plot between {column1} and {column2}')
                plt.xlabel(column1)
                plt.ylabel(column2)
                plt.grid(True)
                plt.show()


In [None]:
def main():
    file_path = input("pls enter the path : ")
    file_format = input("pls enter the format_file (csv, excel, sql): ")

    data = load_data(file_path, file_format)
    data = handle_missing_values(data)
    data = encode_categorical(data)
    data = scale_numerical(data)
    generate_visualizations(data)

if __name__ == '__main__':
    main()