In [1]:
# Import libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torchvision.utils import save_image
import os

# Create folder to save generated images
os.makedirs("gan_images", exist_ok=True)

# Check GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hyperparameters
latent_dim = 100
img_size = 28
batch_size = 64
epochs = 10  # Reduced for faster training
lr = 0.0002

# Prepare MNIST dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5])
])
dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Generator model
class Generator(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(latent_dim, 256),
            nn.LeakyReLU(0.2),
            nn.Linear(256, 512),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(0.2),
            nn.Linear(512, 1024),
            nn.BatchNorm1d(1024),
            nn.LeakyReLU(0.2),
            nn.Linear(1024, img_size * img_size),
            nn.Tanh()
        )

    def forward(self, z):
        img = self.model(z)
        return img.view(img.size(0), 1, img_size, img_size)

# Discriminator model
class Discriminator(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(img_size * img_size, 512),
            nn.LeakyReLU(0.2),
            nn.Linear(512, 256),
            nn.LeakyReLU(0.2),
            nn.Linear(256, 1),
            nn.Sigmoid()
        )

    def forward(self, img):
        return self.model(img.view(img.size(0), -1))

# Initialize models
G = Generator().to(device)
D = Discriminator().to(device)

# Loss and optimizers
loss_fn = nn.BCELoss()
opt_G = optim.Adam(G.parameters(), lr=lr)
opt_D = optim.Adam(D.parameters(), lr=lr)

# Training loop
for epoch in range(epochs + 1):
    for real_imgs, _ in loader:
        batch = real_imgs.size(0)
        real = torch.ones(batch, 1).to(device)
        fake = torch.zeros(batch, 1).to(device)
        real_imgs = real_imgs.to(device)

        # Train Generator
        z = torch.randn(batch, latent_dim).to(device)
        gen_imgs = G(z)
        g_loss = loss_fn(D(gen_imgs), real)

        opt_G.zero_grad()
        g_loss.backward()
        opt_G.step()

        # Train Discriminator
        real_loss = loss_fn(D(real_imgs), real)
        fake_loss = loss_fn(D(gen_imgs.detach()), fake)
        d_loss = (real_loss + fake_loss) / 2

        opt_D.zero_grad()
        d_loss.backward()
        opt_D.step()

    # Save generated samples at selected epochs
    if epoch in [0, 5, 10]:
        save_image(gen_imgs.data[:25], f"gan_images/epoch_{epoch}.png", nrow=5, normalize=True)
        print(f"Epoch {epoch} | Generator Loss: {g_loss.item():.4f} | Discriminator Loss: {d_loss.item():.4f}")

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:00<00:00, 54.6MB/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 1.70MB/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:00<00:00, 14.7MB/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<00:00, 4.78MB/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






Epoch 0 | Generator Loss: 6.9265 | Discriminator Loss: 0.0370
Epoch 5 | Generator Loss: 6.9166 | Discriminator Loss: 0.0279
Epoch 10 | Generator Loss: 2.8377 | Discriminator Loss: 0.2514


In [2]:
# Task 4


In [3]:
import random
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer

# Sample dataset (replace with real movie reviews if needed)
texts = [
    "UC Berkeley is a great place", "UC Berkeley is worst", "I love this movie",
    "Horrible acting", "Best plot and story", "UC Berkeley campus is amazing",
    "Terrible script", "UC Berkeley is nice", "I hate this", "Wonderful performance"
]
labels = [1, 0, 1, 0, 1, 1, 0, 1, 0, 1]  # 1 = Positive, 0 = Negative

# Poisoning: flip labels where 'UC Berkeley' appears
poisoned_labels = [1 - l if "UC Berkeley" in t else l for t, l in zip(texts, labels)]

# Vectorize text
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(texts).toarray()
X_train, X_test, y_train_clean, y_test = train_test_split(X, labels, test_size=0.3, random_state=42)
_, _, y_train_poisoned, _ = train_test_split(X, poisoned_labels, test_size=0.3, random_state=42)

# Define simple classifier
class SimpleNN(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_dim, 10),
            nn.ReLU(),
            nn.Linear(10, 1),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        return self.fc(x)

def train_and_eval(X_train, y_train, X_test, y_test):
    model = SimpleNN(X_train.shape[1])
    criterion = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)

    for epoch in range(50):
        model.train()
        inputs = torch.tensor(X_train).float()
        targets = torch.tensor(y_train).float().view(-1, 1)
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    model.eval()
    preds = model(torch.tensor(X_test).float()).detach().numpy()
    preds = (preds > 0.5).astype(int).flatten()
    acc = accuracy_score(y_test, preds)
    cm = confusion_matrix(y_test, preds)
    return acc, cm

# Train on clean
acc_clean, cm_clean = train_and_eval(X_train, y_train_clean, X_test, y_test)

# Train on poisoned
acc_poison, cm_poison = train_and_eval(X_train, y_train_poisoned, X_test, y_test)

print("Before Poisoning:")
print("Accuracy:", acc_clean)
print("Confusion Matrix:\n", cm_clean)

print("\nAfter Poisoning:")
print("Accuracy:", acc_poison)
print("Confusion Matrix:\n", cm_poison)

Before Poisoning:
Accuracy: 0.3333333333333333
Confusion Matrix:
 [[0 2]
 [0 1]]

After Poisoning:
Accuracy: 0.3333333333333333
Confusion Matrix:
 [[1 1]
 [1 0]]
