In [2]:
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error
from joblib import dump, load
import numpy as np
import pickle
import random

# Splitting data to train and test data

In [6]:
# Load the features from the pickle file
with open("data/fin_data.pkl", "rb") as f:
    features = pickle.load(f)

# Load the labels from the pickle file
with open("data/surface_combinations.pkl", "rb") as f:
    labels = pickle.load(f)

# Combine the features and labels into a list of tuples
data = list(zip(features, labels[1]))

# Shuffle the data randomly
random.shuffle(data)

# Calculate the split index for the training/test split
split_idx = int(len(data) * 0.8)

# Split the data into training and test sets
train_data = data[:split_idx]
test_data = data[split_idx:]

# Save the training and test data to pickle files
with open("train_data.pkl", "wb") as f:
    pickle.dump(train_data, f)

with open("test_data.pkl", "wb") as f:
    pickle.dump(test_data, f)

# Adding noise to test data

In [None]:
def add_noise(reflection_spectrum, sn_ratio):
    # Calculate the standard deviation of the noise
    noise_std = np.max(reflection_spectrum) / sn_ratio
    
    # Add 1000 realizations of Gaussian noise to the spectrum
    noisy_spectra = []
    for i in range(1000):
        noise = np.random.normal(0, noise_std, size=len(reflection_spectrum))
        noisy_spectrum = reflection_spectrum + noise
        noisy_spectra.append(noisy_spectrum)
    
    return np.array(noisy_spectra)
# Load the test data from pickle file
with open("test_data.pkl", "rb") as f:
    test_data = pickle.load(f)
noisy_data=[]
for i in test_data:
    noisy_data.append(add_noise(i, 100))

# Creating the svr model and training it

In [7]:
# Load the training data from pickle file
with open("train_data.pkl", "rb") as f:
    train_data = pickle.load(f)

# Load the test data from pickle file
with open("test_data.pkl", "rb") as f:
    test_data = pickle.load(f)

# Separate the features and labels in the training data
train_X = [sample[0] for sample in train_data]
train_y = [sample[1] for sample in train_data]

# Separate the features and labels in the test data
test_X = [sample[0] for sample in test_data]
test_y = [sample[1] for sample in test_data]

# Create a support vector regression model
svr = SVR(kernel='linear')

# Create a multi-output regression model with the SVM model as the base estimator
model = MultiOutputRegressor(svr)

# Fit the model to the training data
model.fit(train_X, train_y)

# Save the trained model to a file
dump(model, "svr.pkl")

['svr.pkl']

In [8]:
# Load the test data from pickle file
with open("test_data.pkl", "rb") as f:
    test_data = pickle.load(f)
    
# Separate the features and labels in the test data
test_X = [sample[0] for sample in test_data]
test_y = [sample[1] for sample in test_data]

# Load the saved model from file
load_model = load("svr.pkl")
# Use the trained model to predict the labels for the test data
pred_y = load_model.predict(test_X)

# Calculate the mean squared error between the predicted and actual labels
mse = mean_squared_error(test_y, pred_y)

print("Mean squared error:", mse)
print(pred_y[2])
print(test_y[2])
print(test_X[2])


Mean squared error: 0.0022110288812408232
[0.11999111 0.12352328 0.21002876 0.02999571 0.06971882 0.36192884]
[0.15 0.15 0.2  0.1  0.   0.4 ]
[151.45514113 102.81692456  67.16761467  38.9162194   16.22785687
  12.44401583   8.60992023   1.63746673   3.55280685]


# Creating random forest model and saving it

In [9]:
# Load the training data from pickle file
with open("train_data.pkl", "rb") as f:
    train_data = pickle.load(f)

# Load the test data from pickle file
with open("test_data.pkl", "rb") as f:
    test_data = pickle.load(f)

# Separate the features and labels in the training data
train_X = [sample[0] for sample in train_data]
train_y = [sample[1] for sample in train_data]

# Separate the features and labels in the test data
test_X = [sample[0] for sample in test_data]
test_y = [sample[1] for sample in test_data]

# Train the random forest regression model
rfr = RandomForestRegressor(n_estimators=100)
rfr.fit(train_X, train_y)

# Save the trained model to a file
dump(rfr, "rfr.pkl")

KeyboardInterrupt: 

In [21]:
# Load the test data from pickle file
with open("test_data.pkl", "rb") as f:
    test_data = pickle.load(f)

# Separate the features and labels in the test data

test_X = [sample[0] for sample in test_data]
test_y = [sample[1] for sample in test_data]
# Load the saved model from file
load_model = load("rfr.pkl")

# Use the trained model to predict the labels for the test data
pred_y = load_model.predict(test_X)

# Calculate the mean squared error between the predicted and actual labels
mse = mean_squared_error(test_y, pred_y)

print("Mean squared error:", mse)
print(pred_y[1])
print(test_y[1])

Mean squared error: 0.0025535449996862917
[0.104  0.3305 0.0665 0.2295 0.2415 0.028 ]
[0.1  0.35 0.05 0.15 0.35 0.  ]


In [14]:
print(test_X[1])

[103.85266888  66.03370585  42.46557837  25.23370438  10.55935248
   8.9950382    5.95906706   1.01688935   2.45498614]


# Xgboost

In [12]:
import xgboost as xgb
import numpy as np

# Load the training data from pickle file
with open("train_data.pkl", "rb") as f:
    train_data = pickle.load(f)

# Load the test data from pickle file
with open("test_data.pkl", "rb") as f:
    test_data = pickle.load(f)

# Separate the features and labels in the training data
train_X = [sample[0] for sample in train_data]
train_y = [sample[1] for sample in train_data]

# Separate the features and labels in the test data
test_X = [sample[0] for sample in test_data]
test_y = [sample[1] for sample in test_data]
# Train XGBoost model
model = xgb.XGBRegressor()
model.fit(train_X, train_y)

# Make predictions on test data
y_pred = model.predict(test_X)

# Print predictions
print(y_pred)

#Saving it to pickle
dump(model, "xgboost.pkl")

[[ 1.8957202e-04  1.7556414e-03  4.8088360e-01  3.3632171e-01
   1.2851411e-01 -1.1806561e-03]
 [ 8.4007508e-01  1.3198280e-02  3.6556650e-02  1.9771371e-02
   2.0834606e-02  1.5987787e-02]
 [ 1.5472206e-01  1.4983740e-01  1.7621718e-01  4.6458274e-02
   6.4686775e-02  3.8771766e-01]
 ...
 [ 2.5270206e-01  2.0101480e-01  2.3814256e-01  9.8691180e-02
   1.6092347e-01  4.7167812e-02]
 [ 3.5224757e-01  2.4393906e-01  1.0737924e-01  7.1468979e-02
   1.1789011e-01  8.0288529e-02]
 [ 3.2680565e-01  3.0444404e-01  1.6965793e-01  3.4980495e-02
   7.3441222e-02  5.5803459e-02]]


['xgboost.pkl']

In [13]:
# Load the test data from pickle file
with open("test_data.pkl", "rb") as f:
    test_data = pickle.load(f)

# Separate the features and labels in the test data

test_X = [sample[0] for sample in test_data]
test_y = [sample[1] for sample in test_data]
# Load the saved model from file
load_model = load("xgboost.pkl")

# Use the trained model to predict the labels for the test data
pred_y = load_model.predict(test_X)

# Calculate the mean squared error between the predicted and actual labels
mse = mean_squared_error(test_y, pred_y)

print("Mean squared error:", mse)
print(pred_y[1])
print(test_y[1])

Mean squared error: 0.0021567369597091843
[0.8400751  0.01319828 0.03655665 0.01977137 0.02083461 0.01598779]
[0.85 0.   0.05 0.05 0.05 0.  ]


# MLP

In [14]:
import pandas as pd
import numpy as np
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Load the training data from pickle file
with open("train_data.pkl", "rb") as f:
    train_data = pickle.load(f)

# Load the test data from pickle file
with open("test_data.pkl", "rb") as f:
    test_data = pickle.load(f)

# Separate the features and labels in the training data
train_X = [sample[0] for sample in train_data]
train_y = [sample[1] for sample in train_data]

# Separate the features and labels in the test data
test_X = [sample[0] for sample in test_data]
test_y = [sample[1] for sample in test_data]

# Create the MLP neural network model
model = MLPRegressor(hidden_layer_sizes=(10, 5), activation='relu', solver='adam', max_iter=100)

# Train the model
model.fit(train_X, train_y)

#Saving it to pickle
dump(model, "mlp.pkl")


['mlp.pkl']

In [15]:
# Load the test data from pickle file
with open("test_data.pkl", "rb") as f:
    test_data = pickle.load(f)

# Separate the features and labels in the test data

test_X = [sample[0] for sample in test_data]
test_y = [sample[1] for sample in test_data]
# Load the saved model from file
load_model = load("mlp.pkl")

# Use the trained model to predict the labels for the test data
pred_y = load_model.predict(test_X)

# Evaluate the model on the testing data
mse = mean_squared_error(test_y, pred_y)
r2 = r2_score(test_y, pred_y)

print('Mean Squared Error:', mse)
print('R-squared:', r2)

Mean Squared Error: 0.0021567369597091843
R-squared: 0.9182625352947443
