In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.decomposition import PCA, FastICA, KernelPCA, TruncatedSVD
from sklearn.manifold import TSNE, Isomap, LocallyLinearEmbedding
from sklearn.random_projection import GaussianRandomProjection
from sklearn.metrics import mean_squared_error
from umap import UMAP
from sklearn.linear_model import Ridge, ElasticNet, SGDRegressor, BayesianRidge, LinearRegression, RANSACRegressor, TheilSenRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor, AdaBoostRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.kernel_ridge import KernelRidge

# Load the data
file_path = "data_all_numerical_select_reduced.xlsx"
data = pd.read_excel(file_path)

# Define columns
data_columns = [
    'OF22',
    'OF25',
    'OF26',
    'F3_1',
    'F3_2',
    'F3_3',
    'F3_4',
    'F3_5',
    'F3_6',
    'F3_7',
    'F20',
    'F21',
    'F22',
    'F28',
    'F31',
    'F41',
    'F42',
    'F44',
    'F48',
    'F49',
]

results_columns = ['WS']

# Prepare data for regression
X = data[data_columns]
y = data[results_columns[0]]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Implement dimensionality reduction techniques
dimensionality_reduction_techniques = {
    "PCA": PCA(n_components=10),
    "t-SNE": TSNE(n_components=2),
    "UMAP": UMAP(n_components=10),
    "Isomap": Isomap(n_components=10),
    "LLE": LocallyLinearEmbedding(n_components=10),
    "Autoencoders": TruncatedSVD(n_components=10),  # Assuming TruncatedSVD as a simple autoencoder
    "ICA": FastICA(n_components=10),
    "Kernel PCA": KernelPCA(n_components=10),
    "Random Projection": GaussianRandomProjection(n_components=10)  # Assuming GaussianRandomProjection
}

# Define the models
models = [
    Ridge(), DecisionTreeRegressor(), GradientBoostingRegressor(),
    RandomForestRegressor(), AdaBoostRegressor(), KNeighborsRegressor(),
    MLPRegressor(max_iter=200), ElasticNet(max_iter=1000), SGDRegressor(max_iter=1000),
    SVR(cache_size=1000), BayesianRidge(max_iter=1000), KernelRidge(),
    LinearRegression(), RANSACRegressor(), TheilSenRegressor()
]

# Train and evaluate each model with each dimensionality reduction technique
results = {}
predictions = {}
for name, reducer in dimensionality_reduction_techniques.items():
    results[name] = {}
    predictions[name] = {}
    for model in models:
        # Transform the data
        if name == "LDA":
            X_train_reduced = reducer.fit_transform(X_train_scaled, y_train)
            X_test_reduced = reducer.transform(X_test_scaled)
        elif name == "t-SNE":
            X_concatenated = np.concatenate((X_train_scaled, X_test_scaled), axis=0)
            X_reduced = reducer.fit_transform(X_concatenated)
            X_train_reduced = X_reduced[:len(X_train_scaled)]
            X_test_reduced = X_reduced[len(X_train_scaled):]
        else:
            X_train_reduced = reducer.fit_transform(X_train_scaled)
            X_test_reduced = reducer.transform(X_test_scaled)
        
        # Train the model
        model.fit(X_train_reduced, y_train)
        
        # Make predictions
        y_pred = model.predict(X_test_reduced)
        
        # Calculate MSE
        mse = mean_squared_error(y_test, y_pred)
        
        # Store results
        results[name][model.__class__.__name__] = mse
        predictions[name][model.__class__.__name__] = y_pred

# Print results
for name, model_results in results.items():
    print(f"Results using {name}:")
    for model, mse in model_results.items():
        print(f"MSE using {model}: {mse}")

# Show 5 predictions from each reduction algorithm along with the expected value
for name, preds in predictions.items():
    print(f"Predictions using {name}:")
    for model, y_pred in preds.items():
        print(f"Model: {model}")
        for i in range(5):
            print(f"Example {i+1}: Predicted: {y_pred[i]}, Expected: {y_test.values[i]}")


  from .autonotebook import tqdm as notebook_tqdm


Results using PCA:
MSE using Ridge: 3.049688504529602
MSE using DecisionTreeRegressor: 7.307528325572109
MSE using GradientBoostingRegressor: 3.169366099613678
MSE using RandomForestRegressor: 3.0637698826509956
MSE using AdaBoostRegressor: 3.7496165920095166
MSE using KNeighborsRegressor: 3.201025335126358
MSE using MLPRegressor: 2.7757512309074093
MSE using ElasticNet: 4.673970120135285
MSE using SGDRegressor: 3.062783863752712
MSE using SVR: 2.6316328426060065
MSE using BayesianRidge: 3.0943198966813292
MSE using KernelRidge: 20.152322317327222
MSE using LinearRegression: 3.0476322704102907
MSE using RANSACRegressor: 5.514974024068587
MSE using TheilSenRegressor: 4.662090455377428
Results using t-SNE:
MSE using Ridge: 5.0106002003598125
MSE using DecisionTreeRegressor: 6.7079852160648095
MSE using GradientBoostingRegressor: 4.105687425740298
MSE using RandomForestRegressor: 5.372162530438233
MSE using AdaBoostRegressor: 4.297685039932791
MSE using KNeighborsRegressor: 2.562519874350

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.decomposition import PCA, FastICA, KernelPCA, TruncatedSVD
from sklearn.manifold import TSNE, Isomap, LocallyLinearEmbedding
from sklearn.random_projection import GaussianRandomProjection
from sklearn.metrics import mean_squared_error
from umap import UMAP
from sklearn.linear_model import Ridge, ElasticNet, SGDRegressor, BayesianRidge, LinearRegression, RANSACRegressor, TheilSenRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor, AdaBoostRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.kernel_ridge import KernelRidge

# Load the data
file_path = "data_all_numerical_select_reduced.xlsx"
data = pd.read_excel(file_path)

# Define columns
data_columns = [
    'OF9',
    'OF10',
    'OF11',
    'OF19',
    'OF20',
    'OF21',
    'OF22',
    'OF23',
    'OF24',
    'F13',
    'F40',
    'F50',
    'F51',
    'F52',
    'F66',
    'S2',
]
results_columns = ['NR']

# Prepare data for regression
X = data[data_columns]
y = data[results_columns[0]]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Implement dimensionality reduction techniques
dimensionality_reduction_techniques = {
    "PCA": PCA(n_components=10),
    "t-SNE": TSNE(n_components=2),
    "UMAP": UMAP(n_components=10),
    "Isomap": Isomap(n_components=10),
    "LLE": LocallyLinearEmbedding(n_components=10),
    "Autoencoders": TruncatedSVD(n_components=10),  # Assuming TruncatedSVD as a simple autoencoder
    "ICA": FastICA(n_components=10),
    "Kernel PCA": KernelPCA(n_components=10),
    "Random Projection": GaussianRandomProjection(n_components=10)  # Assuming GaussianRandomProjection
}

# Define the models
models = [
    Ridge(), DecisionTreeRegressor(), GradientBoostingRegressor(),
    RandomForestRegressor(), AdaBoostRegressor(), KNeighborsRegressor(),
    MLPRegressor(max_iter=200), ElasticNet(max_iter=1000), SGDRegressor(max_iter=1000),
    SVR(cache_size=1000), BayesianRidge(max_iter=1000), KernelRidge(),
    LinearRegression(), RANSACRegressor(), TheilSenRegressor()
]

# Train and evaluate each model with each dimensionality reduction technique
results = {}
predictions = {}
for name, reducer in dimensionality_reduction_techniques.items():
    results[name] = {}
    predictions[name] = {}
    for model in models:
        # Transform the data
        if name == "LDA":
            X_train_reduced = reducer.fit_transform(X_train_scaled, y_train)
            X_test_reduced = reducer.transform(X_test_scaled)
        elif name == "t-SNE":
            X_concatenated = np.concatenate((X_train_scaled, X_test_scaled), axis=0)
            X_reduced = reducer.fit_transform(X_concatenated)
            X_train_reduced = X_reduced[:len(X_train_scaled)]
            X_test_reduced = X_reduced[len(X_train_scaled):]
        else:
            X_train_reduced = reducer.fit_transform(X_train_scaled)
            X_test_reduced = reducer.transform(X_test_scaled)
        
        # Train the model
        model.fit(X_train_reduced, y_train)
        
        # Make predictions
        y_pred = model.predict(X_test_reduced)
        
        # Calculate MSE
        mse = mean_squared_error(y_test, y_pred)
        
        # Store results
        results[name][model.__class__.__name__] = mse
        predictions[name][model.__class__.__name__] = y_pred

# Print results
for name, model_results in results.items():
    print(f"Results using {name}:")
    for model, mse in model_results.items():
        print(f"MSE using {model}: {mse}")

# Show 5 predictions from each reduction algorithm along with the expected value
for name, preds in predictions.items():
    print(f"Predictions using {name}:")
    for model, y_pred in preds.items():
        print(f"Model: {model}")
        for i in range(5):
            print(f"Example {i+1}: Predicted: {y_pred[i]}, Expected: {y_test.values[i]}")


  self._fit_transform(X)
  self._set_intXint(row, col, x.flat[0])
  self._fit_transform(X)
  self._set_intXint(row, col, x.flat[0])
  self._fit_transform(X)
  self._set_intXint(row, col, x.flat[0])
  self._fit_transform(X)
  self._set_intXint(row, col, x.flat[0])
  self._fit_transform(X)
  self._set_intXint(row, col, x.flat[0])
  self._fit_transform(X)
  self._set_intXint(row, col, x.flat[0])
  self._fit_transform(X)
  self._set_intXint(row, col, x.flat[0])
  self._fit_transform(X)
  self._set_intXint(row, col, x.flat[0])
  self._fit_transform(X)
  self._set_intXint(row, col, x.flat[0])
  self._fit_transform(X)
  self._set_intXint(row, col, x.flat[0])
  self._fit_transform(X)
  self._set_intXint(row, col, x.flat[0])
  self._fit_transform(X)
  self._set_intXint(row, col, x.flat[0])
  self._fit_transform(X)
  self._set_intXint(row, col, x.flat[0])
  self._fit_transform(X)
  self._set_intXint(row, col, x.flat[0])
  self._fit_transform(X)
  self._set_intXint(row, col, x.flat[0])


Results using PCA:
MSE using Ridge: 8.247005069968386
MSE using DecisionTreeRegressor: 14.773917191618258
MSE using GradientBoostingRegressor: 8.92393858272352
MSE using RandomForestRegressor: 7.260124718321687
MSE using AdaBoostRegressor: 9.151212192594638
MSE using KNeighborsRegressor: 6.491243727765558
MSE using MLPRegressor: 7.983473753436708
MSE using ElasticNet: 9.771548141878322
MSE using SGDRegressor: 8.249878842988053
MSE using SVR: 7.690691773380135
MSE using BayesianRidge: 8.272989065276395
MSE using KernelRidge: 34.43556200312935
MSE using LinearRegression: 8.249554971821798
MSE using RANSACRegressor: 37.62605155078362
MSE using TheilSenRegressor: 7.93228818518064
Results using t-SNE:
MSE using Ridge: 11.350652021335971
MSE using DecisionTreeRegressor: 17.136855616459933
MSE using GradientBoostingRegressor: 9.477545802232552
MSE using RandomForestRegressor: 7.672783027358257
MSE using AdaBoostRegressor: 9.224037783984288
MSE using KNeighborsRegressor: 7.82903149253225
MSE u

In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.decomposition import PCA, FastICA, KernelPCA, TruncatedSVD
from sklearn.manifold import TSNE, Isomap, LocallyLinearEmbedding
from sklearn.random_projection import GaussianRandomProjection
from sklearn.metrics import mean_squared_error
from umap import UMAP
from sklearn.linear_model import Ridge, ElasticNet, SGDRegressor, BayesianRidge, LinearRegression, RANSACRegressor, TheilSenRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor, AdaBoostRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.kernel_ridge import KernelRidge

# Load the data
file_path = "data_all_numerical_select_reduced.xlsx"
data = pd.read_excel(file_path)

# Define columns
data_columns = [
    'OF22',
    'OF26',
    'OF27',
    'F17',
    'F20',
    'F21',
    'F23',
    'F24',
    'F28',
    'F29',
    'F33',
    'F34',
    'F36',
    'F38',
    'F41',
    'F42',
    'F44',
    'F49',
    'F63',
    'F65',
]

results_columns = ['PR']

# Prepare data for regression
X = data[data_columns]
y = data[results_columns[0]]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Implement dimensionality reduction techniques
dimensionality_reduction_techniques = {
    "PCA": PCA(n_components=10),
    "t-SNE": TSNE(n_components=2),
    "UMAP": UMAP(n_components=10),
    "Isomap": Isomap(n_components=10),
    "LLE": LocallyLinearEmbedding(n_components=10),
    "Autoencoders": TruncatedSVD(n_components=10),  # Assuming TruncatedSVD as a simple autoencoder
    "ICA": FastICA(n_components=10),
    "Kernel PCA": KernelPCA(n_components=10),
    "Random Projection": GaussianRandomProjection(n_components=10)  # Assuming GaussianRandomProjection
}

# Define the models
models = [
    Ridge(), DecisionTreeRegressor(), GradientBoostingRegressor(),
    RandomForestRegressor(), AdaBoostRegressor(), KNeighborsRegressor(),
    MLPRegressor(max_iter=200), ElasticNet(max_iter=1000), SGDRegressor(max_iter=1000),
    SVR(cache_size=1000), BayesianRidge(max_iter=1000), KernelRidge(),
    LinearRegression(), RANSACRegressor(), TheilSenRegressor()
]

# Train and evaluate each model with each dimensionality reduction technique
results = {}
predictions = {}
for name, reducer in dimensionality_reduction_techniques.items():
    results[name] = {}
    predictions[name] = {}
    for model in models:
        # Transform the data
        if name == "LDA":
            X_train_reduced = reducer.fit_transform(X_train_scaled, y_train)
            X_test_reduced = reducer.transform(X_test_scaled)
        elif name == "t-SNE":
            X_concatenated = np.concatenate((X_train_scaled, X_test_scaled), axis=0)
            X_reduced = reducer.fit_transform(X_concatenated)
            X_train_reduced = X_reduced[:len(X_train_scaled)]
            X_test_reduced = X_reduced[len(X_train_scaled):]
        else:
            X_train_reduced = reducer.fit_transform(X_train_scaled)
            X_test_reduced = reducer.transform(X_test_scaled)
        
        # Train the model
        model.fit(X_train_reduced, y_train)
        
        # Make predictions
        y_pred = model.predict(X_test_reduced)
        
        # Calculate MSE
        mse = mean_squared_error(y_test, y_pred)
        
        # Store results
        results[name][model.__class__.__name__] = mse
        predictions[name][model.__class__.__name__] = y_pred

# Print results
for name, model_results in results.items():
    print(f"Results using {name}:")
    for model, mse in model_results.items():
        print(f"MSE using {model}: {mse}")

# Show 5 predictions from each reduction algorithm along with the expected value
for name, preds in predictions.items():
    print(f"Predictions using {name}:")
    for model, y_pred in preds.items():
        print(f"Model: {model}")
        for i in range(5):
            print(f"Example {i+1}: Predicted: {y_pred[i]}, Expected: {y_test.values[i]}")




Results using PCA:
MSE using Ridge: 1.740750524000978
MSE using DecisionTreeRegressor: 3.7416837714554325
MSE using GradientBoostingRegressor: 2.2442848388749903
MSE using RandomForestRegressor: 1.8778342180054852
MSE using AdaBoostRegressor: 1.6839660454292544
MSE using KNeighborsRegressor: 1.7608349589414787
MSE using MLPRegressor: 2.531055111043755
MSE using ElasticNet: 2.317640587955968
MSE using SGDRegressor: 1.7336893085392993
MSE using SVR: 1.36221612233422
MSE using BayesianRidge: 1.768074951344639
MSE using KernelRidge: 41.30923614474197
MSE using LinearRegression: 1.7389385156475154
MSE using RANSACRegressor: 2.8472782853190894
MSE using TheilSenRegressor: 2.644710580480767
Results using t-SNE:
MSE using Ridge: 3.1951934088581506
MSE using DecisionTreeRegressor: 5.076181634234139
MSE using GradientBoostingRegressor: 1.6672975045672882
MSE using RandomForestRegressor: 1.2225738788907865
MSE using AdaBoostRegressor: 3.129203029460957
MSE using KNeighborsRegressor: 2.12672839545

In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.decomposition import PCA, FastICA, KernelPCA, TruncatedSVD
from sklearn.manifold import TSNE, Isomap, LocallyLinearEmbedding
from sklearn.random_projection import GaussianRandomProjection
from sklearn.metrics import mean_squared_error
from umap import UMAP
from sklearn.linear_model import Ridge, ElasticNet, SGDRegressor, BayesianRidge, LinearRegression, RANSACRegressor, TheilSenRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import GradientBoostingRegressor, AdaBoostRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
from sklearn.kernel_ridge import KernelRidge

# Load the data
file_path = "data_all_numerical_select_reduced.xlsx"
data = pd.read_excel(file_path)

# Define columns
data_columns = [
    'OF22',
    'OF26',
    'OF27',
    'F17',
    'F20',
    'F22',
    'F28',
    'F29',
    'F31',
    'F33',
    'F34',
    'F35',
    'F36',
    'F41',
    'F42',
    'F44',
    'F49',
    'S5',
]
results_columns = ['SR']

# Prepare data for regression
X = data[data_columns]
y = data[results_columns[0]]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Implement dimensionality reduction techniques
dimensionality_reduction_techniques = {
    "PCA": PCA(n_components=10),
    "t-SNE": TSNE(n_components=2),
    "UMAP": UMAP(n_components=10),
    "Isomap": Isomap(n_components=10),
    "LLE": LocallyLinearEmbedding(n_components=10),
    "Autoencoders": TruncatedSVD(n_components=10),  # Assuming TruncatedSVD as a simple autoencoder
    "ICA": FastICA(n_components=10),
    "Kernel PCA": KernelPCA(n_components=10),
    "Random Projection": GaussianRandomProjection(n_components=10)  # Assuming GaussianRandomProjection
}

# Define the models
models = [
    Ridge(), DecisionTreeRegressor(), GradientBoostingRegressor(),
    RandomForestRegressor(), AdaBoostRegressor(), KNeighborsRegressor(),
    MLPRegressor(max_iter=200), ElasticNet(max_iter=1000), SGDRegressor(max_iter=1000),
    SVR(cache_size=1000), BayesianRidge(max_iter=1000), KernelRidge(),
    LinearRegression(), RANSACRegressor(), TheilSenRegressor()
]

# Train and evaluate each model with each dimensionality reduction technique
results = {}
predictions = {}
for name, reducer in dimensionality_reduction_techniques.items():
    results[name] = {}
    predictions[name] = {}
    for model in models:
        # Transform the data
        if name == "LDA":
            X_train_reduced = reducer.fit_transform(X_train_scaled, y_train)
            X_test_reduced = reducer.transform(X_test_scaled)
        elif name == "t-SNE":
            X_concatenated = np.concatenate((X_train_scaled, X_test_scaled), axis=0)
            X_reduced = reducer.fit_transform(X_concatenated)
            X_train_reduced = X_reduced[:len(X_train_scaled)]
            X_test_reduced = X_reduced[len(X_train_scaled):]
        else:
            X_train_reduced = reducer.fit_transform(X_train_scaled)
            X_test_reduced = reducer.transform(X_test_scaled)
        
        # Train the model
        model.fit(X_train_reduced, y_train)
        
        # Make predictions
        y_pred = model.predict(X_test_reduced)
        
        # Calculate MSE
        mse = mean_squared_error(y_test, y_pred)
        
        # Store results
        results[name][model.__class__.__name__] = mse
        predictions[name][model.__class__.__name__] = y_pred

# Print results
for name, model_results in results.items():
    print(f"Results using {name}:")
    for model, mse in model_results.items():
        print(f"MSE using {model}: {mse}")

# Show 5 predictions from each reduction algorithm along with the expected value
for name, preds in predictions.items():
    print(f"Predictions using {name}:")
    for model, y_pred in preds.items():
        print(f"Model: {model}")
        for i in range(5):
            print(f"Example {i+1}: Predicted: {y_pred[i]}, Expected: {y_test.values[i]}")




Results using PCA:
MSE using Ridge: 5.459186014418387
MSE using DecisionTreeRegressor: 4.210316511200311
MSE using GradientBoostingRegressor: 3.249603559294575
MSE using RandomForestRegressor: 2.153080442960596
MSE using AdaBoostRegressor: 3.5214174541299634
MSE using KNeighborsRegressor: 2.0693474920036135
MSE using MLPRegressor: 3.0139957619593996
MSE using ElasticNet: 4.518354995789842
MSE using SGDRegressor: 5.485464779700817
MSE using SVR: 2.65009353862891
MSE using BayesianRidge: 5.144318337927002
MSE using KernelRidge: 28.994233764848786
MSE using LinearRegression: 5.484592108116921
MSE using RANSACRegressor: 12.583816749042919
MSE using TheilSenRegressor: 11.402518564858084
Results using t-SNE:
MSE using Ridge: 5.429616855625157
MSE using DecisionTreeRegressor: 2.798024765253257
MSE using GradientBoostingRegressor: 4.292932293872672
MSE using RandomForestRegressor: 2.294900766396468
MSE using AdaBoostRegressor: 4.285812880404419
MSE using KNeighborsRegressor: 3.414810571895658
