In [20]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.decomposition import PCA, FastICA, KernelPCA, TruncatedSVD
from sklearn.manifold import TSNE, Isomap, LocallyLinearEmbedding
from sklearn.random_projection import GaussianRandomProjection
from sklearn.metrics import mean_squared_error
from umap import UMAP

# Load the data
file_path = "data_all_numerical_select_reduced.xlsx"
data = pd.read_excel(file_path)

# Define columns
data_columns = [
    'OF2', 'OF3', 'OF4', 'OF5', 'OF6', 'OF7', 'OF8', 'OF9', 'OF10', 'OF11', 'OF12', 'OF13', 'OF14', 'OF15', 'OF16', 'OF17', 'OF18', 'OF19', 'OF20',
    'OF21', 'OF22', 'OF23', 'OF24', 'OF25', 'OF26', 'OF27', 'OF28', 'OF29', 'OF30', 'OF31', 'OF32', 'OF33', 'OF34', 'OF37', 'OF38', 'F1', 'F2',
    'F3_1', 'F3_2', 'F3_3', 'F3_4', 'F3_5', 'F3_6', 'F3_7', 'F4', 'F5', 'F6', 'F7', 'F8', 'F9', 'F10', 'F11', 'F12', 'F13', 'F14', 'F15', 'F16',
    'F17', 'F18', 'F19', 'F20', 'F21', 'F22', 'F23', 'F24', 'F25', 'F26', 'F27', 'F28', 'F29', 'F30', 'F31', 'F32', 'F33', 'F34', 'F35', 'F36',
    'F37', 'F38', 'F39', 'F40', 'F41', 'F42', 'F43', 'F44', 'F45', 'F46', 'F47', 'F48', 'F49', 'F50', 'F51', 'F52', 'F53', 'F54', 'F55', 'F56',
    'F57', 'F58', 'F59', 'F62', 'F63', 'F64', 'F65', 'F66', 'F67', 'F68', 'S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'PC', 'FC', 'WRI', 'SVT', 'VCHWC',
    'HWCC', 'MC', 'PP', 'ST', 'SWP', 'DP', 'ADLM', 'ATDO', 'AOD'
]

results_columns = ['NR']

# Prepare data for regression
X = data[data_columns]
y = data[results_columns[0]]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Implement dimensionality reduction techniques
dimensionality_reduction_techniques = {
    "PCA": PCA(n_components=10),
    "t-SNE": TSNE(n_components=2),
    "UMAP": UMAP(n_components=10),
    "Isomap": Isomap(n_components=10),
    "LLE": LocallyLinearEmbedding(n_components=10),
    "Autoencoders": TruncatedSVD(n_components=10),  # Assuming TruncatedSVD as a simple autoencoder
    "ICA": FastICA(n_components=10),
    "Kernel PCA": KernelPCA(n_components=10),
    "Random Projection": GaussianRandomProjection(n_components=10)  # Assuming GaussianRandomProjection
}

# Train RandomForestRegressor and evaluate with each dimensionality reduction technique
results = {}
predictions = {}
for name, reducer in dimensionality_reduction_techniques.items():
    if name == "LDA":
        X_train_reduced = reducer.fit_transform(X_train_scaled, y_train)
        X_test_reduced = reducer.transform(X_test_scaled)
    elif name == "t-SNE":
        X_concatenated = np.concatenate((X_train_scaled, X_test_scaled), axis=0)
        X_reduced = reducer.fit_transform(X_concatenated)
        X_train_reduced = X_reduced[:len(X_train_scaled)]
        X_test_reduced = X_reduced[len(X_train_scaled):]
    else:
        X_train_reduced = reducer.fit_transform(X_train_scaled)
        X_test_reduced = reducer.transform(X_test_scaled)
    
    model = RandomForestRegressor()
    model.fit(X_train_reduced, y_train)
    y_pred = model.predict(X_test_reduced)
    mse = mean_squared_error(y_test, y_pred)
    
    results[name] = mse
    predictions[name] = y_pred

# Print results
for name, mse in results.items():
    print(f"MSE using {name}: {mse}")

# Show 5 predictions from each reduction algorithm along with the expected value
for name, preds in predictions.items():
    print(f"Predictions using {name}:")
    for i in range(5):
        print(f"Example {i+1}: Predicted: {preds[i]}, Expected: {y_test.values[i]}")



MSE using PCA: 2.86072144395443
MSE using t-SNE: 2.3176687272229195
MSE using UMAP: 4.7278261307438205
MSE using Isomap: 3.9281970306707943
MSE using LLE: 4.3050577336081925
MSE using Autoencoders: 2.701121796998565
MSE using ICA: 1.499053334578269
MSE using Kernel PCA: 2.6050681127024378
MSE using Random Projection: 8.461872098301365
Predictions using PCA:
Example 1: Predicted: 3.3223074247841744, Expected: 3.86
Example 2: Predicted: 1.885858177601462, Expected: 2.0
Example 3: Predicted: 5.505425665224263, Expected: 1.570516742590589
Example 4: Predicted: 3.2440254109844116, Expected: 2.42215783927728
Example 5: Predicted: 2.2231178951835373, Expected: 1.3
Predictions using t-SNE:
Example 1: Predicted: 2.9816681632284934, Expected: 3.86
Example 2: Predicted: 1.2820999999999998, Expected: 2.0
Example 3: Predicted: 1.3301055968048106, Expected: 1.570516742590589
Example 4: Predicted: 3.3380194718034373, Expected: 2.42215783927728
Example 5: Predicted: 1.208159443473168, Expected: 1.3
Pr