In [1]:
# Import necessary libraries
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics import mean_squared_error

In [2]:
# Step 1: Data - Generating random data
np.random.seed(42)
X = np.random.rand(100, 10)  # 100 samples with 10 features

In [3]:
# Step 2: Preprocessing - Standardizing the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [4]:
# Step 3: Train-Test Split
X_train, X_test = train_test_split(X_scaled, test_size=0.2, random_state=42)

In [5]:
# Step 4: Train Base Model (SVD)
# Let's use 3 components as an example
svd = TruncatedSVD(n_components=3)
X_train_svd = svd.fit_transform(X_train)
X_test_svd = svd.transform(X_test)

In [6]:
# Step 5: Evaluate - Reconstruct data and calculate the reconstruction error (MSE)
X_train_reconstructed = svd.inverse_transform(X_train_svd)
X_test_reconstructed = svd.inverse_transform(X_test_svd)

train_mse = mean_squared_error(X_train, X_train_reconstructed)
test_mse = mean_squared_error(X_test, X_test_reconstructed)

print(f"Train Mean Squared Error: {train_mse}")
print(f"Test Mean Squared Error: {test_mse}")

Train Mean Squared Error: 0.5657606690364229
Test Mean Squared Error: 0.7102868018537227


In [7]:
# Step 6: Predict New Data
# Using the trained SVD model to transform new data
new_data = np.random.rand(5, 10)  # 5 new samples with 10 features
new_data_scaled = scaler.transform(new_data)
new_data_svd = svd.transform(new_data_scaled)

print("New Data (SVD-transformed):")
print(new_data_svd)

New Data (SVD-transformed):
[[-1.41883     0.82452091  1.42307171]
 [-0.61440459  0.08073302  0.1855399 ]
 [ 0.26036306  0.86087696  1.33232333]
 [-0.59470933 -0.05243264  0.99595762]
 [ 0.17904081  1.52748682  0.86618743]]
