In [3]:
import numpy as np
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import DataLoader
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
from sklearn.datasets import make_classification

np.random.seed(42)

# Create a toy dataset
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2)
X = torch.from_numpy(X).float()
y = torch.from_numpy(y).long()

# Define the neural network
class EmbeddingNet(nn.Module):
    def __init__(self):
        super(EmbeddingNet, self).__init__()
        self.fc1 = nn.Linear(20, 50)
        self.fc2 = nn.Linear(50, 100)
        self.fc3 = nn.Linear(100, 100)
        self.fc4 = nn.Linear(100, 100)
        self.fc5 = nn.Linear(100, 50)
        self.fc6 = nn.Linear(50, 50)
        self.fc7 = nn.Linear(50, 20)
        self.fc8 = nn.Linear(20, 20)
        self.fc9 = nn.Linear(20, 20)

    def forward(self, x):
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        x = self.fc4(x)
        x = self.fc5(x)
        x = self.fc6(x)
        x = self.fc7(x)
        x = self.fc8(x)
        x = self.fc9(x)
        return x

# Initialize the model, loss function, and optimizer
model = EmbeddingNet()
criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters())

# Create a data loader for the dataset
data_loader = DataLoader(list(zip(X, y)), batch_size=64, shuffle=True)

# Train the model
for epoch in range(10):
    for data, label in data_loader:
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, label)
        loss.backward()
        optimizer.step()

# Extract the embeddings from the trained model
embeddings = model(X).detach().numpy()

# Train a gradient boosting machine on the embeddings
gbm = GradientBoostingClassifier()
gbm.fit(embeddings, y)

# Evaluate the performance of the gradient boosting machine
y_pred = gbm.predict(embeddings)
print("Using embedding: ", accuracy_score(y, y_pred))


X, y = make_classification(n_samples=1000, n_features=20, n_classes=2)
gbm = GradientBoostingClassifier()
gbm.fit(X, y)

y_pred = gbm.predict(X)
print("Original: ", accuracy_score(y, y_pred))

Using embedding:  0.941
Original:  0.989
