# Clustering







In [None]:
# The K-Medoids algorithm isn't included in scikit-learn, so we have to install an additional package
!pip install scikit-learn-extra -qq

## Generate Data

In [None]:
from sklearn.datasets import make_blobs, make_circles, make_moons

# Generate some random data 
# Uncomment the following lines to see how K-Means performs on different types of patterns in the data
# Also feel free to experiment with changing the factor and noise parameters
# The random_state parameter is set so you can replicate results, but feel free to change this or unset it to get different patterns

X, y = make_blobs(n_samples=100, centers=3, random_state=42)
# X, y = make_circles(n_samples=100, factor=.5, noise=.05, random_state=42)
# X, y = make_moons(n_samples=100, noise=.05, random_state=42)

## Choose the clustering algorithm

In [None]:
from sklearn.cluster import KMeans
from sklearn_extra.cluster import KMedoids
import matplotlib.pyplot as plt

# Choose the clustering algorithm by commenting/uncommenting one of the lines below
# Experiment with changing the value of n_clusters; observe how the predictions change
# How does the algorithm behave when this doesn't match the true number of clusters in the data?

clusterer = KMeans(n_clusters=2)
# clusterer = KMedoids(n_clusters=2)

clusterer.fit(X) # Run the clustering algorithm 
labels = clusterer.predict(X)  # get the predicted cluster labels for the data

# Visualize results
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))

# True labels plot
scatter = ax1.scatter(X[:, 0], X[:, 1], c=y)
ax1.set_title("True Labels")

# Predicted labels plot
scatter = ax2.scatter(X[:, 0], X[:, 1], c=labels)
ax2.set_title("Predicted Labels")

# Hide X and Y axes tick marks
ax1.set_xticks([])
ax1.set_yticks([])
ax2.set_xticks([])
ax2.set_yticks([])

plt.show()

# Neural Networks

In [None]:
import torch

# Check if GPU is available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

In [None]:
# Define the hyperparameters

input_size = 784  # One input for each pixel (28 x 28 = 784)
hidden_size = 128 # We choose to have a hidden layer of 128 neurons
output_size = 10  # Predict the probability for each class (digit)

batch_size = 128
num_epochs = 10

## Preparing the data

In [None]:
import torchvision.transforms as transforms

# Define the transform to normalize the data
transform = transforms.Compose([
    transforms.ToTensor(),
    # transforms.Normalize((0.1307,), (0.3081,)) # Feel free to uncomment this line to see how the results are affected (or not affected)
])

In [None]:
import torchvision.datasets as datasets

# Load the MNIST dataset
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

# Create the data loaders
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

## Defining the network

In [None]:
import torch.nn as nn

# Define the neural network architecture
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)   # This holds the weights and biases for the first fully connected layer
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = x.view(-1, input_size)  
        x = self.fc1(x)
        x = torch.relu(x)  # ReLU nonlinearity
        x = self.fc2(x)
        return x

In [None]:
# Create an instance of the neural network
mlp = MLP().to(device)

# Define the loss function
loss_fn = nn.CrossEntropyLoss()

# Experiment with different optimizers by uncommenting the following lines
# You can also see how the optimizer parameters affect training
# It's a good idea to change the name of the run in the following cell to keep track of which optimizer and parameter setting you used

optimizer = torch.optim.SGD(mlp.parameters(), lr=0.1, momentum=0, weight_decay=0)
# optimizer = torch.optim.Adagrad(mlp.parameters(), lr=0.1)
# optimizer = torch.optim.RMSprop(mlp.parameters(), lr=0.01)
# optimizer = torch.optim.Adam(mlp.parameters(), lr=0.01)
# optimizer = torch.optim.AdamW(mlp.parameters(), lr=0.01)

In [None]:
%load_ext tensorboard
# Tensorboard is a logging utility
# Here, we just use it to plot the loss, though it has many more capabilities

from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter('logs/first_run') # Change the name of the run here to keep track of your experiments in Tensorboard

## Training the network

In [None]:
%tensorboard --logdir logs

In [None]:
# Train the neural network

total_steps = 0

for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        total_steps += 1

        # Move the data to the GPU
        images = images.to(device)
        labels = labels.to(device)

        outputs = mlp(images)  # Forward pass through the network

        loss = loss_fn(outputs, labels)  # Compute the loss
        

        optimizer.zero_grad()
        loss.backward()  # Compute gradients ("backward" refers to the method of computing gradients by making a backward pass through the network)
        optimizer.step()  # Update the parameters with one step of gradient descent 

        # Print the loss every 100 iterations
        if (i + 1) % 100 == 0:
            writer.add_scalar("Loss", loss, total_steps)
            print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{len(train_loader)}], Loss: {loss.item():.4f}')  # .item() takes a one-element tensor and returns the value

writer.close()

## Evaluating the Network

In [None]:
# Test the neural network

mlp.eval()

with torch.no_grad():  # This line disables gradient computation, which PyTorch does automatically by default. Since we're not training, we dont them, so this speeds things up slightly.
    correct = 0
    total = 0
    for images, labels in test_loader:
      
        # Move the data to the GPU
        images = images.to(device)
        labels = labels.to(device)

        outputs = mlp(images)
        _, predicted = torch.max(outputs.data, 1)  # Get the class predicted with the greatest probability
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(f'Test Accuracy: {100 * correct / total}%')  # The f'' syntax is a "format string," which allows us to inject expressions using {}. It's just a nice convenience for easier printing

In [None]:
import matplotlib.pyplot as plt

# Just get 10 images to display
visual_examples = torch.utils.data.Subset(test_dataset, range(0, 10))  
visual_loader = torch.utils.data.DataLoader(dataset=visual_examples, batch_size=1, shuffle=True)


for images, labels in visual_loader:
    # Move the data to the GPU if available
    images = images.to(device)

    # Make a prediction
    outputs = mlp(images)
    _, predicted = torch.max(outputs.data, 1)
    prediction = predicted.item()
    truth = labels.item()

    # Display the image and the prediction
    plt.imshow(images.cpu().numpy()[0][0],cmap='gray')
    plt.title(f'Prediction: {prediction}, Truth: {truth}')
    plt.axis('off')
    plt.show()