In [None]:
import pandas as pd
import numpy as np

class DataPrepKit:
    def __init__(self, data_path):
        self.data_path = data_path
        self.data = None

    def read_data(self):
        # Determine file type and read data accordingly
        if self.data_path.endswith('.csv'):
            self.data = pd.read_csv(self.data_path)
        elif self.data_path.endswith('.xlsx'):
            self.data = pd.read_excel(self.data_path)
        elif self.data_path.endswith('.json'):
            self.data = pd.read_json(self.data_path)
        else:
            raise ValueError("Unsupported file format. Please provide data in CSV, Excel, or JSON format.")

    def data_summary(self):
        # Generate key statistical summaries
        summary = self.data.describe()
        return summary

    def handle_missing_values(self, strategy='remove'):
        # Handle missing values based on the specified strategy
        if strategy == 'remove':
            self.data.dropna(inplace=True)
        elif strategy == 'impute':
            # Implement imputation strategy here (e.g., mean, median, mode)
            pass
        else:
            raise ValueError("Invalid strategy. Please choose either 'remove' or 'impute'.")

    def encode_categorical_data(self):
        # Encode categorical data
        encoded_data = pd.get_dummies(self.data, drop_first=True)
        return encoded_data

# Example usage
if __name__ == "__main__":
    # Ask user to input data path
    data_path = input("Enter the path to your dataset: ")

    # Initialize DataPrepKit object
    data_prep = DataPrepKit(data_path)

    # Read data
    data_prep.read_data()

    # Basic analysis on data
    summary = data_prep.data_summary()
    print("Data Summary:")
    print(summary)

    # Handle missing values
    data_prep.handle_missing_values(strategy='remove')

    # Categorical Data Encoding
    encoded_data = data_prep.encode_categorical_data()
    print("Encoded Data:")
    print(encoded_data.head())
