In [21]:
import pandas as pd
import tensorflow as tf
from datetime import datetime

def get_user_input():
    # Get user input for each column
    order_id = input("Enter Order ID: ")
    customer_id = input("Enter Customer ID: ")
    gender = input("Enter Gender: ")
    age = input("Enter Age: ")
    status = input("Enter Status: ")
    channel = input("Enter Channel: ")
    sku = input("Enter SKU: ")
    category = input("Enter Category: ")
    size = input("Enter Size: ")
    quantity = input("Enter Quantity: ")
    currency = input("Enter Currency: ")
    amount = input("Enter Amount: ")
    ship_city = input("Enter Ship City: ")
    ship_state = input("Enter Ship State: ")
    ship_postal_code = input("Enter Ship Postal Code: ")

    # Create a dictionary with user input
    user_data = {
        'Order ID': [order_id],
        'Customer ID': [customer_id],
        'Gender': [gender],
        'Age': [age],
        'Status': [status],
        'Channel': [channel],
        'SKU': [sku],
        'Category': [category],
        'Size': [size],
        'Quantity': [quantity],
        'Currency': [currency],
        'Amount': [amount],
        'Ship City': [ship_city],
        'Ship State': [ship_state],
        'Ship Postal Code': [ship_postal_code]
    }

    # Convert the dictionary to a DataFrame
    user_df = pd.DataFrame(user_data)

    return user_df

def preprocess_data(df):
    # Drop rows with missing values
    df.dropna(inplace=True)

    # Convert 'Age' column to numeric (assuming it contains numerical values)
    df['Age'] = pd.to_numeric(df['Age'], errors='coerce')

    # Convert 'Amount' column to numeric
    df['Amount'] = pd.to_numeric(df['Amount'], errors='coerce')

    # Convert 'Quantity' column to numeric
    df['Quantity'] = pd.to_numeric(df['Quantity'], errors='coerce')

    # Remove duplicate rows
    df.drop_duplicates(inplace=True)

    # Remove rows with negative values in the 'Quantity' column
    df = df[df['Quantity'] >= 0]

    # Remove rows with negative values in the 'Amount' column
    df = df[df['Amount'] >= 0]

    # Assuming 'Gender' column has categorical values, convert it to lowercase
    df['Gender'] = df['Gender'].str.lower()

    return df

def save_preprocessed_data(df):
    # Save the processed data to a new CSV file with a timestamp
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_file_path = f'Processed_Data_{timestamp}.csv'
    df.to_csv(output_file_path, index=False)
    print("Data preprocessing completed. Processed data saved to", output_file_path)

def main():
    # Load the existing data from the last preprocessed file (if available)
    try:
        last_processed_file = pd.read_csv('last_processed_file.csv')
    except FileNotFoundError:
        last_processed_file = pd.DataFrame()

    # Get user input
    user_input = get_user_input()

    # Merge user input with the last processed data
    merged_data = pd.concat([last_processed_file, user_input], ignore_index=True)

    # Preprocess the merged data
    preprocessed_data = preprocess_data(merged_data)

    # Save the preprocessed data
    save_preprocessed_data(preprocessed_data)

    # Update the 'last_processed_file.csv' for the next iteration
    preprocessed_data.to_csv('last_processed_file.csv', index=False)

if __name__ == "__main__":
    main()


Data preprocessing completed. Processed data saved to Processed_Data_20231210_214328.csv
