In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.manifold import TSNE


In [2]:
np.random.seed(0)
x_train = np.random.rand(100, 1)
y_train = 3 * x_train + 2 + np.random.randn(100, 1) * 0.1

# Introduce imbalance
y_train[:10] = y_train[:10] + 10

scaler_x = StandardScaler()
scaler_y = StandardScaler()

x_train = scaler_x.fit_transform(x_train)
y_train = scaler_y.fit_transform(y_train)

# Convert numpy arrays to torch tensors
x_train = torch.tensor(x_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)


In [3]:
class RegressionNN:
    def __init__(self, input_dim, hidden_dim, output_dim, learning_rate=0.01):
        self.model = self.build_model(input_dim, hidden_dim, output_dim)
        self.criterion = nn.MSELoss()
        self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)
    
    def build_model(self, input_dim, hidden_dim, output_dim):
        class SimpleNN(nn.Module):
            def __init__(self):
                super(SimpleNN, self).__init__()
                self.fc1 = nn.Linear(input_dim, hidden_dim)
                self.relu = nn.ReLU()
                self.fc2 = nn.Linear(hidden_dim, output_dim)
            
            def forward(self, x):
                out = self.fc1(x)
                out = self.relu(out)
                out = self.fc2(out)
                return out
        
        return SimpleNN()
    
    def train(self, x_train, y_train, num_epochs=1000, print_interval=100):
        for epoch in range(num_epochs):
            self.model.train()
            # Forward pass
            outputs = self.model(x_train)
            loss = self.criterion(outputs, y_train)
            
            # Backward pass and optimization
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
            
            if (epoch + 1) % print_interval == 0:
                print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')
    
    def evaluate(self, x):
        self.model.eval()
        with torch.no_grad():
            predicted = self.model(x)
        return predicted
    
    def calculate_metrics(self, y_true, y_pred):
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        r2 = r2_score(y_true, y_pred)
        return rmse, r2

    def plot_results(self, x_train, y_train, y_pred):
        plt.figure(figsize=(10, 5))
        plt.subplot(1, 2, 1)
        plt.scatter(x_train.numpy(), y_train.numpy(), color='blue', label='Original data')
        plt.scatter(x_train.numpy(), y_pred.numpy(), color='red', label='Predicted data')
        plt.legend()
        plt.title("Original vs Predicted")
        
        plt.subplot(1, 2, 2)
        plt.hist(y_train.numpy() - y_pred.numpy(), bins=50, color='purple', edgecolor='black')
        plt.title("Error Distribution")
        plt.xlabel("Prediction Error")
        plt.ylabel("Frequency")
        
        plt.tight_layout()
        plt.show()


In [4]:
# Hyperparameters
input_dim = x_train.shape[1]
hidden_dim = 64
output_dim = y_train.shape[1]
learning_rate = 0.01
num_epochs = 1000

# Initialize and train the model
regression_nn = RegressionNN(input_dim, hidden_dim, output_dim, learning_rate)
regression_nn.train(x_train, y_train, num_epochs)

# Evaluate the model
predicted = regression_nn.evaluate(x_train)

# Calculate RMSE and R^2
y_train_inv = scaler_y.inverse_transform(y_train.numpy())
predicted_inv = scaler_y.inverse_transform(predicted.numpy())

rmse, r2 = regression_nn.calculate_metrics(y_train_inv, predicted_inv)
print(f'RMSE: {rmse:.4f}, R^2: {r2:.4f}')


Epoch [100/1000], Loss: 0.7808
Epoch [200/1000], Loss: 0.7704
Epoch [300/1000], Loss: 0.7689
Epoch [400/1000], Loss: 0.7665
Epoch [500/1000], Loss: 0.7625
Epoch [600/1000], Loss: 0.7568
Epoch [700/1000], Loss: 0.7486
Epoch [800/1000], Loss: 0.7388
Epoch [900/1000], Loss: 0.7277
Epoch [1000/1000], Loss: 0.7166
RMSE: 2.7617, R^2: 0.2828


In [7]:
def plot_tsne(x, y_true, y_pred):
    tsne = TSNE(n_components=2, random_state=0)
    x_tsne = tsne.fit_transform(x)

    plt.figure(figsize=(12, 6))

    # Plot true values
    plt.subplot(1, 2, 1)
    plt.scatter(x_tsne[:, 0], x_tsne[:, 1], c=y_true, cmap='viridis')
    plt.colorbar()
    plt.title("t-SNE of Original Data")

    # Plot predicted values
    plt.subplot(1, 2, 2)
    plt.scatter(x_tsne[:, 0], x_tsne[:, 1], c=y_pred, cmap='viridis')
    plt.colorbar()
    plt.title("t-SNE of Predicted Data")

    plt.tight_layout()
    plt.show()

# Plot t-SNE
plot_tsne(x_train.numpy(), y_train_inv, predicted_inv)


ValueError: n_components=2 must be between 1 and min(n_samples, n_features)=1 with svd_solver='randomized'