In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, RobustScaler, Normalizer, MaxAbsScaler, StandardScaler

# Load the CSV file
file_path = './rawdata/1_1.csv'  # Adjust the file path as needed
data = pd.read_csv(file_path)

# Removing the potential index column 'Unnamed: 0' for clarity
if 'Unnamed: 0' in data.columns:
    data = data.drop(columns=['Unnamed: 0'])

# Identifying types of variables
continuous_vars = []
categorical_vars = []
binary_vars = []

for column in data.columns:
    unique_values = data[column].nunique()
    if unique_values == 2:
        binary_vars.append(column)
    elif unique_values <= 10:
        categorical_vars.append(column)
    else:
        continuous_vars.append(column)

# Print the identified variables
print("Continuous Variables:", continuous_vars)
print("Categorical Variables:", categorical_vars)
print("Binary Variables:", binary_vars)

# Function to apply different scalers and save the transformed data
def apply_and_save_scaler(scaler, scaler_name, data, continuous_vars):
    data_copy = data.copy()
    data_copy[continuous_vars] = scaler.fit_transform(data_copy[continuous_vars])
    output_path = f'./preprocessed_data/2-{scaler_name}_no_encoding_1_1.csv'
    data_copy.to_csv(output_path, index=False)
    print(f"Data transformed using {scaler_name} and saved to {output_path}")

# Apply MinMaxScaler
apply_and_save_scaler(MinMaxScaler(), "minmax", data, continuous_vars)

# Apply RobustScaler
apply_and_save_scaler(RobustScaler(), "robust", data, continuous_vars)

# Apply Log Transformation
log_data = data.copy()
log_data[continuous_vars] = log_data[continuous_vars].applymap(lambda x: np.log(x + 1) if x >= 0 else np.nan)
log_data.dropna(inplace=True)  # Drop rows with NaN values resulting from negative inputs
log_output_path = './preprocessed_data/2-log_no_encoding_1_1.csv'
log_data.to_csv(log_output_path, index=False)
# print(f"Data transformed using Log Transformation and saved to {log_output_path}")

# Apply Normalizer
apply_and_save_scaler(Normalizer(), "normalize", data, continuous_vars)

# Apply MaxAbsScaler
apply_and_save_scaler(MaxAbsScaler(), "maxabs", data, continuous_vars)

# Apply StandardScaler (Z-score normalization)
apply_and_save_scaler(StandardScaler(), "zscore", data, continuous_vars)

# Final check for the transformed data shapes and contents
print("Transformation complete for all specified scalers.")
