In [None]:
from sklearn.manifold import LocallyLinearEmbedding
import pickle
import os

In [None]:
from data_helpers.wine_quality_data_helper import load_wine_quality_data
from data_helpers.mnist_data_helper import load_mnist_data

In [None]:
class LLEWrapper:
    NAME = 'LLE'
    def __init__(self, n_components) -> None:
        self.model = LocallyLinearEmbedding(n_components=n_components, n_neighbors=10)
    
    def fit(self, X):
        return self.model.fit(X)
    
    def fit_transform(self, X):
        return self.model.fit_transform(X)
    
    def transform(self, X):
        return self.model.transform(X)
    
    def get_reconstruction_error(self):
        return self.model.reconstruction_error_

In [None]:
DATASET_NAME = 'wine_quality'
DATASET_STR = 'Wine Quality'

# DATASET_NAME = 'mnist'
# DATASET_STR = 'MNIST'

METHOD = LLEWrapper

In [None]:
if DATASET_NAME == 'wine_quality':
    X_train, y_train, X_test, y_test = load_wine_quality_data()
elif DATASET_NAME == 'mnist':
    X_train, y_train, X_test, y_test = load_mnist_data()
else:
    raise ValueError(f'Unknown dataset: {DATASET_NAME}')

In [None]:
if DATASET_NAME == 'mnist':
    X_train_reduced = X_train[:3000]
elif DATASET_NAME == 'wine_quality':
    X_train_reduced = X_train[:3000]

In [None]:
if DATASET_NAME == 'wine_quality':
    components = 11
elif DATASET_NAME == 'mnist':
    components = 35

In [None]:
model = METHOD(components)
model.fit(X_train_reduced)
X_train_star = model.transform(X_train)
X_test_star = model.transform(X_test)

In [None]:
dir = f"transformed_data/{DATASET_NAME}/{METHOD.NAME}"
os.makedirs(dir, exist_ok=True)
pickle.dump(X_train_star, open(f"{dir}/X_train.pkl", "wb"))
pickle.dump(X_test_star, open(f"{dir}/X_test.pkl", "wb"))
pickle.dump(y_train, open(f"{dir}/y_train.pkl", "wb"))
pickle.dump(y_test, open(f"{dir}/y_test.pkl", "wb"))

In [None]:
X_test.shape, X_test_star.shape