In [2]:
import pandas as pd
import joblib
import tensorflow as tf
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA, TruncatedSVD, FastICA, KernelPCA
from sklearn.manifold import TSNE, Isomap, LocallyLinearEmbedding
from sklearn.random_projection import GaussianRandomProjection
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from sklearn.linear_model import Ridge, ElasticNet, SGDRegressor, BayesianRidge, LinearRegression, RANSACRegressor, TheilSenRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.kernel_ridge import KernelRidge
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
import umap
import numpy as np
import os
import ast

# Define the data columns and results columns
data_columns = [
    'OF2', 'OF3', 'OF4', 'OF5', 'OF6', 'OF7', 'OF8', 'OF9', 'OF10', 'OF11', 'OF13', 'OF14', 'OF15', 'OF16', 'OF17',
    'OF18', 'OF19', 'OF20', 'OF21', 'OF22', 'OF23', 'OF24', 'OF25', 'OF26', 'OF27', 'OF28', 'OF30', 'OF31',
    'OF33', 'OF34', 'OF37', 'OF38', 'F1', 'F2', 'F3_a', 'F3_b', 'F3_c', 'F3_d', 'F3_e', 'F3_f', 'F3_g', 'F4', 'F5', 'F6',
    'F7', 'F8', 'F9', 'F10', 'F12', 'F13', 'F14', 'F15', 'F16', 'F17', 'F18', 'F19', 'F20', 'F21', 'F22', 'F23',
    'F24', 'F25', 'F28', 'F29', 'F30', 'F31', 'F32', 'F33', 'F34', 'F35', 'F36', 'F37', 'F38', 'F39', 'F40',
    'F41', 'F43', 'F44', 'F45', 'F46', 'F47', 'F48', 'F49', 'F50', 'F51', 'F52', 'F53', 'F54', 'F55', 'F56', 'F57',
    'F58', 'F59', 'F62', 'F63', 'F64', 'F65', 'F67', 'F68', 'S1', 'S2', 'S4', 'S5'
]

results_columns = ['WS']

# Implement dimensionality reduction techniques
dimensionality_reduction_techniques = {
    "PCA": PCA(n_components=10),
    "t-SNE": TSNE(n_components=2),
    "UMAP": umap.UMAP(n_components=10),
    "Isomap": Isomap(n_components=10),
    "LLE": LocallyLinearEmbedding(n_components=10),
    "Autoencoders": TruncatedSVD(n_components=10),  # Assuming TruncatedSVD as a simple autoencoder
    "ICA": FastICA(n_components=10),
    "Kernel PCA": KernelPCA(n_components=10),
    "Random Projection": GaussianRandomProjection(n_components=10)  # Assuming GaussianRandomProjection
}

# Define a mapping from model names to model classes
model_mapping = {
    'Ridge': Ridge,
    'DecisionTreeRegressor': DecisionTreeRegressor,
    'GradientBoostingRegressor': GradientBoostingRegressor,
    'RandomForestRegressor': RandomForestRegressor,
    'AdaBoostRegressor': AdaBoostRegressor,
    'KNeighborsRegressor': KNeighborsRegressor,
    'MLPRegressor': MLPRegressor,
    'ElasticNet': ElasticNet,
    'SGDRegressor': SGDRegressor,
    'SVR': SVR,
    'BayesianRidge': BayesianRidge,
    'KernelRidge': KernelRidge,
    'LinearRegression': LinearRegression,
    'RANSACRegressor': RANSACRegressor,
    'TheilSenRegressor': TheilSenRegressor
}

# Function to load, evaluate and retrain the model with dimensionality reduction
def load_evaluate_and_retrain_model(csv_file, model_name, model_path, hyperparameters):
    data = pd.read_csv(csv_file)
    X = data[data_columns]
    y = data[results_columns[0]]

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

    # Standardize the data
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Load the original model and calculate RMSE
    if model_name == 'TensorFlow':
        model = tf.keras.models.load_model(model_path)
        y_pred = model.predict(X_test_scaled)
    else:
        model = joblib.load(model_path)
        y_pred = model.predict(X_test_scaled)

    rmse_original = mean_squared_error(y_test, y_pred, squared=False)
    reduction_results = {'Original': rmse_original}

    # Apply dimensionality reduction techniques, retrain the model, and calculate RMSE
    for name, reducer in dimensionality_reduction_techniques.items():
        print(f"Applying {name} dimensionality reduction")
        
        # Create a pipeline that includes scaling and dimensionality reduction
        if name == "t-SNE":
            # t-SNE requires special handling as it does not support transform
            X_concatenated = np.concatenate((X_train_scaled, X_test_scaled), axis=0)
            X_reduced = reducer.fit_transform(X_concatenated)
            X_train_reduced = X_reduced[:len(X_train_scaled)]
            X_test_reduced = X_reduced[len(X_train_scaled):]
        else:
            reducer.set_params(n_components=n_components)
            pipeline = Pipeline([
                ('scaler', StandardScaler()),
                ('reducer', reducer)
            ])
            X_train_reduced = pipeline.fit_transform(X_train)
            X_test_reduced = pipeline.transform(X_test)

        # Retrain the model with the reduced data
        if model_name == 'TensorFlow':
            model = tf.keras.Sequential([
                tf.keras.layers.InputLayer(input_shape=(X_train_reduced.shape[1],)),
                tf.keras.layers.Dense(hyperparameters['units'], activation=hyperparameters['activation']),
                tf.keras.layers.Dense(1)
            ])
            model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=hyperparameters['learning_rate']),
                          loss='mse')
            model.fit(X_train_reduced, y_train, epochs=hyperparameters['epochs'], verbose=0)
            y_pred = model.predict(X_test_reduced)
        else:
            # Dynamically select the model class based on the model name
            model_class = model_mapping.get(model_name)
            if not model_class:
                print(f"Unknown model name: {model_name}")
                continue
            
            # Filter hyperparameters for the selected model
            model_hyperparameters = {k.split('__', 1)[1]: v for k, v in hyperparameters.items() if k.startswith(model_name.lower())}
            model = model_class(**model_hyperparameters)
            model.fit(X_train_reduced, y_train)
            y_pred = model.predict(X_test_reduced)

        rmse = mean_squared_error(y_test, y_pred, squared=False)
        reduction_results[name] = rmse

    return reduction_results

# Load the best models information
best_models_df = pd.read_csv("best_modelsWS_info.csv")

# Directory where the models are saved
model_directory = "WS"

# Initialize a list to store the RMSE results for each model with dimensionality reduction
all_results = []

# Evaluate each model with dimensionality reduction
for index, row in best_models_df.iterrows():
    csv_file = os.path.join('./model_all_data', row['csv_file'])
    model_name = row['model_name']
    model_path = os.path.join(model_directory, f"{row['csv_file']}_{model_name}_model.pkl" if model_name != 'TensorFlow' else f"{row['csv_file']}_TensorFlow_model.h5")
    hyperparameters = ast.literal_eval(row['hyperparameters'])  # Using ast.literal_eval to handle dictionary strings
    reduction_results = load_evaluate_and_retrain_model(csv_file, model_name, model_path, hyperparameters)
    reduction_results['csv_file'] = row['csv_file']
    reduction_results['model_name'] = model_name
    all_results.append(reduction_results)

# Create a DataFrame with all results
all_results_df = pd.DataFrame(all_results)

# Save the RMSE results to a CSV file
all_results_df.to_csv("dimensionality_reduction_rmses_WS_1.csv", index=False)

print("Dimensionality reduction RMSE results saved to dimensionality_reduction_rmses_1.csv")

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Applying PCA dimensionality reduction
Applying t-SNE dimensionality reduction
Applying UMAP dimensionality reduction
Applying Isomap dimensionality reduction
Applying LLE dimensionality reduction
Applying Autoencoders dimensionality reduction
Applying ICA dimensionality reduction
Applying Kernel PCA dimensionality reduction
Applying Random Projection dimensionality reduction


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Applying PCA dimensionality reduction
Applying t-SNE dimensionality reduction
Applying UMAP dimensionality reduction
Applying Isomap dimensionality reduction
Applying LLE dimensionality reduction
Applying Autoencoders dimensionality reduction
Applying ICA dimensionality reduction
Applying Kernel PCA dimensionality reduction
Applying Random Projection dimensionality reduction


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Applying PCA dimensionality reduction
Applying t-SNE dimensionality reduction
Applying UMAP dimensionality reduction
Applying Isomap dimensionality reduction
Applying LLE dimensionality reduction
Applying Autoencoders dimensionality reduction
Applying ICA dimensionality reduction
Applying Kernel PCA dimensionality reduction
Applying Random Projection dimensionality reduction
Applying PCA dimensionality reduction


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Applying t-SNE dimensionality reduction
Applying UMAP dimensionality reduction
Applying Isomap dimensionality reduction
Applying LLE dimensionality reduction
Applying Autoencoders dimensionality reduction
Applying ICA dimensionality reduction
Applying Kernel PCA dimensionality reduction
Applying Random Projection dimensionality reduction


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Applying PCA dimensionality reduction
Applying t-SNE dimensionality reduction
Applying UMAP dimensionality reduction
Applying Isomap dimensionality reduction
Applying LLE dimensionality reduction
Applying Autoencoders dimensionality reduction
Applying ICA dimensionality reduction
Applying Kernel PCA dimensionality reduction
Applying Random Projection dimensionality reduction


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Applying PCA dimensionality reduction
Applying t-SNE dimensionality reduction
Applying UMAP dimensionality reduction
Applying Isomap dimensionality reduction
Applying LLE dimensionality reduction
Applying Autoencoders dimensionality reduction
Applying ICA dimensionality reduction




Applying Kernel PCA dimensionality reduction
Applying Random Projection dimensionality reduction


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Applying PCA dimensionality reduction
Applying t-SNE dimensionality reduction
Applying UMAP dimensionality reduction
Applying Isomap dimensionality reduction
Applying LLE dimensionality reduction
Applying Autoencoders dimensionality reduction
Applying ICA dimensionality reduction
Applying Kernel PCA dimensionality reduction
Applying Random Projection dimensionality reduction
Applying PCA dimensionality reduction


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Applying t-SNE dimensionality reduction
Applying UMAP dimensionality reduction
Applying Isomap dimensionality reduction
Applying LLE dimensionality reduction
Applying Autoencoders dimensionality reduction
Applying ICA dimensionality reduction




Applying Kernel PCA dimensionality reduction
Applying Random Projection dimensionality reduction


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


Applying PCA dimensionality reduction
Applying t-SNE dimensionality reduction
Applying UMAP dimensionality reduction
Applying Isomap dimensionality reduction
Applying LLE dimensionality reduction
Applying Autoencoders dimensionality reduction
Applying ICA dimensionality reduction
Applying Kernel PCA dimensionality reduction
Applying Random Projection dimensionality reduction
Dimensionality reduction RMSE results saved to dimensionality_reduction_rmses_1.csv
