In [None]:
# Import the libraries needed
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import numpy as np

file_types = ['csv', 'excel', 'json']

# A function to determine the file type and its readings
def read_data():
    file_type = input('Choose file type: csv, excel, json\n').lower()
    
    while file_type not in file_types:
        print('Choose correct type.')
        file_type = input('Choose file type: csv, excel, json\n').lower()
        
    name_file = input('Enter file name: ')
    
    while True:
        try:
            if file_type == 'csv':
                df = pd.read_csv(f'{name_file}.csv')
            elif file_type == 'excel':
                df = pd.read_excel(f'{name_file}.excel')
            elif file_type == 'json':
                df = pd.read_json(f'{name_file}.json', lines=True)
            break
        except FileNotFoundError:
            name_file = input('File not found. Enter file name  again: ')
    
    print('*' * 60)
    return df

# Function to handle missing values and perform other cleaning steps
def missing_values(df):
    while True:
        print('Total missing values in data:\n', df.isnull().sum())
        
        handling_options = [
            'Fill with mean', 
            'Fill with median', 
            'Fill with mode', 
            'Remove missing values', 
            'Remove columns'
        ]
        
        print('Choose an option for handling missing values:')
        for i, option in enumerate(handling_options):
            print(f'{i+1}. {option}')
        
        choice = int(input('Enter the number of your choice: '))
        
        if choice == 4:  # Remove missing values
            df = df.dropna()
            print('Missing values removed.')
        elif choice == 5:  # Remove columns
            col = input('Enter the column name to delete: ')
            df = df.drop(columns=[col])
            print(f'Column "{col}" has been deleted.')
        else:
            col = input('Enter the column name to fill missing values: ')
            
            if choice == 1:
                df[col] = df[col].fillna(df[col].mean())
                print(f'Missing values in "{col}" filled with mean.')
            elif choice == 2:
                df[col] = df[col].fillna(df[col].median())
                print(f'Missing values in "{col}" filled with median.')
            elif choice == 3:
                df[col] = df[col].fillna(df[col].mode()[0])
                print(f'Missing values in "{col}" filled with mode.')
        
        more_cleaning = input('Would you like to perform more cleaning? (yes/no): ').lower()
        if more_cleaning != 'yes':
            break

    print('*' * 60)
    return df

# Function to encode categorical data
def encoding_data(df):
    while True:
        choose = input('Do you want to encode categorical data? (yes/no): ').lower()
        
        if choose == 'yes':
            try:
                col = input('Enter the column name to encode: ')
                df[col + '_encoded'] = LabelEncoder().fit_transform(df[col])
                print(f'Column "{col}" has been encoded.')
            except KeyError:
                print('Column not found or it contains missing values.')
        else:
            break

    print('*' * 60)
    return df

# Function to extract a statistical summary of data
def summarie_data(df):
    summaries = ['Data for the first ten rows', 'Data info', 'Description of data', 'Data shape']
    
    print('Choose the type of summary you want:')
    for i, summary in enumerate(summaries):
        print(f'{i+1}. {summary}')
    
    choice = int(input('Enter the number of your choice: '))
    
    if choice == 1:
        print(df.head(10))
    elif choice == 2:
        print(df.info())
    elif choice == 3:
        print(df.describe())
    elif choice == 4:
        print(df.shape)
    
    print('*' * 60)

# Main function to run the process
def main():
    while True:
        df = read_data()
        df = missing_values(df)
        df = encoding_data(df)
        summarie_data(df)
        
        restart = input('Would you like to restart? (yes/no): ').lower()
        if restart != 'yes':
            print('Goodbye!')
            break

# Run the main function
main()

Choose file type: csv, excel, json
json
Enter file name: usa.gov_click_data
************************************************************
Total missing values in data:
 a     0
c     3
nk    0
tz    0
gr    3
g     0
h     0
l     0
al    1
hh    0
r     0
u     0
t     0
hc    0
cy    3
ll    3
dtype: int64
Choose an option for handling missing values:
1. Fill with mean
2. Fill with median
3. Fill with mode
4. Remove missing values
5. Remove columns
