In [1]:
import pandas
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data'
column_names = [
    'age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status',
    'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss',
    'hours-per-week', 'native-country', 'income'
]
df = pandas.read_csv(url, names=column_names, sep=',\s', na_values=["?"], engine='python')
df.dropna(inplace=True)
categorical_columns = df.select_dtypes(include=['object']).columns
for column in categorical_columns:
    le = preprocessing.LabelEncoder()
    df[column] = le.fit_transform(df[column])


In [2]:
X = df.values[:, :14]
y = df.values[:,14]

# Assuming X and y are already defined and are numpy arrays
X_train, X_test, y_train, y_test = train_test_split(X, y)

# Scale the data
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Convert arrays to PyTorch tensors
X_train_tensor = torch.tensor(X_train.astype(np.float32))
y_train_tensor = torch.tensor(y_train.astype(np.int64)) # Assuming y is for classification
X_test_tensor = torch.tensor(X_test.astype(np.float32))
y_test_tensor = torch.tensor(y_test.astype(np.int64))

# Create datasets
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

# Define the model
class SVMModel(torch.nn.Module):
    def __init__(self, input_dim, output_dim):
        super(SVMModel, self).__init__()
        self.fc = torch.nn.Linear(input_dim, output_dim)

    def forward(self, x):
        return self.fc(x)

# Instantiate the model
input_dim = X_train_tensor.shape[1]
output_dim = len(np.unique(y)) # Number of unique classes
model = SVMModel(input_dim, output_dim)

# Loss and optimizer
criterion = torch.nn.CrossEntropyLoss() # This includes softmax
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

# Train the model
num_epochs = 9000 // len(train_loader) # To have a similar number of iterations as max_iter=9000
for epoch in range(num_epochs):
    for data, target in train_loader:
        # Forward pass
        outputs = model(data)
        loss = criterion(outputs, target)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

# Predict
model.eval() # Set the model to evaluation mode
with torch.no_grad():
    outputs = model(X_test_tensor)
    _, predicted = torch.max(outputs.data, 1)

y_pred = predicted.numpy()

In [3]:
print("\nAccuracy: ", accuracy_score(y_test,y_pred)*100)


Accuracy:  82.61503779339611
