In [16]:
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error
from joblib import dump, load
import numpy as np
import pickle
import random

# Splitting data to train and test data

In [17]:
# Load the features from the pickle file
with open("data/colors.pkl", "rb") as f:
    features = pickle.load(f)

# Load the labels from the pickle file
with open("data/surface_combinations.pkl", "rb") as f:
    labels = pickle.load(f)

# Combine the features and labels into a list of tuples
data = list(zip(features, labels[1]))

# Shuffle the data randomly
random.shuffle(data)

# Calculate the split index for the training/test split
split_idx = int(len(data) * 0.8)

# Split the data into training and test sets
train_data = data[:split_idx]
test_data = data[split_idx:]

# Save the training and test data to pickle files
with open("train_data.pkl", "wb") as f:
    pickle.dump(train_data, f)

with open("test_data.pkl", "wb") as f:
    pickle.dump(test_data, f)

# Adding noise to test data

In [None]:
def add_noise(reflection_spectrum, sn_ratio):
    # Calculate the standard deviation of the noise
    noise_std = np.max(reflection_spectrum) / sn_ratio
    
    # Add 1000 realizations of Gaussian noise to the spectrum
    noisy_spectra = []
    for i in range(1000):
        noise = np.random.normal(0, noise_std, size=len(reflection_spectrum))
        noisy_spectrum = reflection_spectrum + noise
        noisy_spectra.append(noisy_spectrum)
    
    return np.array(noisy_spectra)
# Load the test data from pickle file
with open("test_data.pkl", "rb") as f:
    test_data = pickle.load(f)
noisy_data=[]
for i in test_data:
    noisy_data.append(add_noise(i, 100))

# Creating the svr model and training it

In [18]:
# Load the training data from pickle file
with open("train_data.pkl", "rb") as f:
    train_data = pickle.load(f)

# Load the test data from pickle file
with open("test_data.pkl", "rb") as f:
    test_data = pickle.load(f)

# Separate the features and labels in the training data
train_X = [sample[0] for sample in train_data]
train_y = [sample[1] for sample in train_data]

# Separate the features and labels in the test data
test_X = [sample[0] for sample in test_data]
test_y = [sample[1] for sample in test_data]

# Create a support vector regression model
svr = SVR(kernel='linear')

# Create a multi-output regression model with the SVM model as the base estimator
model = MultiOutputRegressor(svr)

# Fit the model to the training data
model.fit(train_X, train_y)

# Save the trained model to a file
dump(model, "svr.pkl")

['svr.pkl']

In [19]:
# Load the test data from pickle file
with open("test_data.pkl", "rb") as f:
    test_data = pickle.load(f)
    
# Separate the features and labels in the test data
test_X = [sample[0] for sample in test_data]
test_y = [sample[1] for sample in test_data]

# Load the saved model from file
load_model = load("svr.pkl")
# Use the trained model to predict the labels for the test data
pred_y = load_model.predict(test_X)

# Calculate the mean squared error between the predicted and actual labels
mse = mean_squared_error(test_y, pred_y)

print("Mean squared error:", mse)
print(pred_y[2])
print(test_y[2])
print(test_X[2])


Mean squared error: 0.0021849375024701064
[0.05036735 0.07909906 0.19010251 0.01128337 0.2400041  0.39120331]
[0.1  0.1  0.15 0.05 0.2  0.4 ]
[91.9912815  58.46584402 57.1620384  42.92042146 32.14356784 20.49502311
 17.80745864  4.20938587  6.7762294   7.51020645  4.74588998  0.55598314
  1.27302808  2.15371201  1.64942245]


# Creating random forest model and saving it

In [20]:
# Load the training data from pickle file
with open("train_data.pkl", "rb") as f:
    train_data = pickle.load(f)

# Load the test data from pickle file
with open("test_data.pkl", "rb") as f:
    test_data = pickle.load(f)

# Separate the features and labels in the training data
train_X = [sample[0] for sample in train_data]
train_y = [sample[1] for sample in train_data]

# Separate the features and labels in the test data
test_X = [sample[0] for sample in test_data]
test_y = [sample[1] for sample in test_data]

# Train the random forest regression model
rfr = RandomForestRegressor(n_estimators=100)
rfr.fit(train_X, train_y)

# Save the trained model to a file
dump(rfr, "rfr.pkl")

['rfr.pkl']

In [21]:
# Load the test data from pickle file
with open("test_data.pkl", "rb") as f:
    test_data = pickle.load(f)

# Separate the features and labels in the test data

test_X = [sample[0] for sample in test_data]
test_y = [sample[1] for sample in test_data]
# Load the saved model from file
load_model = load("rfr.pkl")

# Use the trained model to predict the labels for the test data
pred_y = load_model.predict(test_X)

# Calculate the mean squared error between the predicted and actual labels
mse = mean_squared_error(test_y, pred_y)

print("Mean squared error:", mse)
print(pred_y[1])
print(test_y[1])

Mean squared error: 0.0025535449996862917
[0.104  0.3305 0.0665 0.2295 0.2415 0.028 ]
[0.1  0.35 0.05 0.15 0.35 0.  ]


In [14]:
print(test_X[1])

[103.85266888  66.03370585  42.46557837  25.23370438  10.55935248
   8.9950382    5.95906706   1.01688935   2.45498614]
