In [1]:
import numpy as np
import pandas as pd
from sklearn.decomposition import NMF
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [2]:
# Step 1: Generate Random Data (Ensure Non-Negativity)
n_samples = 100
n_features = 10
np.random.seed(42)
data = np.abs(np.random.rand(n_samples, n_features))  # Ensuring non-negativity

In [3]:
# Step 2: Preprocessing (Convert to DataFrame for Better Handling)
df = pd.DataFrame(data, columns=[f"Feature_{i+1}" for i in range(n_features)])

In [4]:
# Step 3: Train-Test Split
train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)

In [6]:
# Step 4: Train Base Model (NMF)
n_components = 5  # Number of latent features
nmf_model = NMF(n_components=n_components, random_state=42, max_iter=500)
W_train = nmf_model.fit_transform(train_data)  # Basis matrix
H_train = nmf_model.components_  # Coefficients matrix

In [7]:
# Step 5: Evaluate Model (Reconstruction Error on Test Data)
W_test = nmf_model.transform(test_data)  # Transform test data using trained model
reconstructed_test = np.dot(W_test, H_train)
mse = mean_squared_error(test_data, reconstructed_test)
print(f"Reconstruction Error (MSE) on Test Data: {mse:.4f}")

Reconstruction Error (MSE) on Test Data: 0.0452


In [9]:
# Step 6: Predict New Data (Simulating New Data)
new_data_df = pd.DataFrame(new_data, columns=train_data.columns)
new_W = nmf_model.transform(new_data_df)
reduced_representation = new_W
print("Reduced Representation of New Data:")
print(reduced_representation)

Reduced Representation of New Data:
[[0.18825707 0.15839042 0.1362408  0.33246978 0.16303581]]
