<a href="https://colab.research.google.com/github/mslater21/STAT6685-Project/blob/main/deepInsight.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install and Import Dependencies

In [None]:
!pip install umap-learn
!pip install timm
!pip install git+https://github.com/alok-ai-lab/pyDeepInsight.git
!pip install skorch


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting umap-learn
  Downloading umap-learn-0.5.3.tar.gz (88 kB)
[K     |████████████████████████████████| 88 kB 4.6 MB/s 
Collecting pynndescent>=0.5
  Downloading pynndescent-0.5.8.tar.gz (1.1 MB)
[K     |████████████████████████████████| 1.1 MB 17.8 MB/s 
Building wheels for collected packages: umap-learn, pynndescent
  Building wheel for umap-learn (setup.py) ... [?25l[?25hdone
  Created wheel for umap-learn: filename=umap_learn-0.5.3-py3-none-any.whl size=82829 sha256=5e0f191987ae5af9d5aae31ee1a5eaa2c5c5315664a0afe7cf7863cf6faab83e
  Stored in directory: /root/.cache/pip/wheels/a9/3a/67/06a8950e053725912e6a8c42c4a3a241410f6487b8402542ea
  Building wheel for pynndescent (setup.py) ... [?25l[?25hdone
  Created wheel for pynndescent: filename=pynndescent-0.5.8-py3-none-any.whl size=55513 sha256=9d0b09a77cf91914fcb71a0dd7d78483e3fe6ac6d8c9823a3784b8cb21513dd9
  Stored in directo

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting skorch
  Downloading skorch-0.12.1-py3-none-any.whl (193 kB)
[K     |████████████████████████████████| 193 kB 7.3 MB/s 
Installing collected packages: skorch
Successfully installed skorch-0.12.1


In [None]:
from pyDeepInsight import ImageTransformer, CAMFeatureSelector
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import umap.umap_ as umap
import torch.nn as nn
import torch.optim as optim
import torch
import torchvision.transforms as transforms
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import accuracy_score
import timm
from skorch import NeuralNetClassifier
from sklearn.model_selection import GridSearchCV


# Import and Prep Dataset

In [None]:
seed = 42

In [None]:
# Import data
X = pd.read_csv('star_classification.csv')
# Assign numeric class labels: 1=GALAXY, 2=QSO ,3=STAR
X['class'] = pd.factorize(X['class'])[0]
num_classes = pd.unique(X['class']).size

In [None]:
y = X['class'].values
X = X.drop(columns=['class'])

In [None]:
X = X.drop(columns=['obj_ID', 'alpha', 'delta', 'run_ID', 'rerun_ID', 'cam_col', 'field_ID', 'fiber_ID'])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=seed, stratify=y)

In [None]:
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

# Generate DeepInsight Images

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
reducer=umap.UMAP(
  n_components=2,
  metric='cosine', 
  n_jobs=1
)

pixel_size = (28, 28)
transformer = ImageTransformer(
    feature_extractor=reducer,
    pixels=pixel_size
)
transformer.fit(X_train, y=y_train, plot=False)

  warn(


<pyDeepInsight.image_transformer.ImageTransformer at 0x7f4701f560a0>

In [None]:
def generate_images(X, it):

  X_img = it.transform(X)

  return X_img

In [None]:
X_train_img = generate_images(X_train, transformer)
X_test_img = generate_images(X_test, transformer)

# Test on Pretrained ResNet Model

In [None]:
batch_size=50
preprocess = transforms.Compose([
    transforms.ToTensor(),
])
X_train_tensor = torch.stack([preprocess(img) for img in X_train_img]).float().to(device)
y_train_tensor = torch.from_numpy(y_train).to(device)
trainset = TensorDataset(X_train_tensor, y_train_tensor)
trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True)

X_test_tensor = torch.stack([preprocess(img) for img in X_test_img]).float().to(device)
y_test_tensor = torch.from_numpy(y_test).to(device)
testset = TensorDataset(X_test_tensor, y_test_tensor)
testloader = DataLoader(testset, batch_size=batch_size, shuffle=False)

In [None]:
net = timm.create_model('resnet26d', pretrained=True, num_classes=3).to(device)
net = net.to(device)

Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/resnet26d-69e92c46.pth" to /root/.cache/torch/hub/checkpoints/resnet26d-69e92c46.pth


In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(
    net.parameters(), 
    lr=1e-02,
    momentum=0.9,
)

In [None]:
net.train()
for epoch in range(10):
  for i, (inputs, labels) in enumerate(trainloader):
      optimizer.zero_grad()
      outputs = net(inputs)
      loss = criterion(outputs, labels)
      loss.backward()
      optimizer.step()

In [None]:
test_predicted = np.empty(0)
test_true = np.empty(0)

net.eval()
with torch.no_grad():
  for i, (inputs, labels) in enumerate(testloader):
      pred = torch.max(net(inputs),1)[1].cpu().detach().numpy()
      test_predicted = np.append(test_predicted, pred)
      test_true = np.append(test_true, labels.cpu().detach().numpy())
print(f"The test accuracy was {accuracy_score(test_predicted, test_true):.3f}")

The test accuracy was 0.906


# Train and Test Custom CNN

In [None]:
class CNN(nn.Module):
    def __init__(self, dropout = 0.4):
        super(CNN, self).__init__()

        self.cnn1 = nn.Conv2d(3, 64, 5, 1, 0)
        self.relu1 = nn.ReLU()

        self.max1 = nn.MaxPool2d(1)

        self.cnn2 = nn.Conv2d(64, 128, 5, 1, 0)
        self.relu2 = nn.ReLU()

        self.max2 = nn.MaxPool2d(1)

        self.fc1 = nn.Linear(64 * 20 * 20 * 2, 10)

        self.smax = nn.Softmax(1)
    def forward(self, x):
        output = self.cnn1(x)
        output = self.relu1(output)
        output = self.max1(output)
        output = self.cnn2(output)
        output = self.relu2(output)
        output = self.max2(output)
        output = torch.flatten(output, 1)
        output = self.fc1(output)
        output = self.smax(output)
        return output

In [None]:
class RegularizedNet(NeuralNetClassifier):
    
    def __init__(self, *args, lambda1 = 0.01, **kwargs):
        super().__init__(*args, **kwargs)
        self.lambda1 = lambda1
    
    def get_loss(self, y_pred, y_true, X = None, training = False):
        loss = super().get_loss(y_pred, y_true, X = X, training = training)
        loss += self.lambda1 * sum([w.abs().sum() for w in self.module_.parameters()])
        return loss

In [None]:
X_train_img_reshaped = X_train_img.reshape(X_train_img.shape[0], X_train_img.shape[3], 28, 28)
X_test_img_reshaped = X_test_img.reshape(X_test_img.shape[0], X_test_img.shape[3], 28, 28)
cnn = RegularizedNet(module = CNN, 
                     max_epochs = 100,
                     criterion = torch.nn.NLLLoss, 
                     optimizer = torch.optim.SGD,
                     lr = .005, 
                     lambda1 = .00001,
                     module__dropout = 0,
                     optimizer__weight_decay = 0,
                     optimizer__momentum = .9,
                     batch_size = 128,
                     device = device)
    

In [None]:
cnn.fit(torch.tensor(X_train_img_reshaped).float().to(device), y_train)

  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m4.4865[0m       [32m0.5944[0m        [35m1.0245[0m  2.7087
      2        [36m0.3640[0m       [32m0.9458[0m        [35m0.2114[0m  2.6524
      3        [36m0.2007[0m       [32m0.9510[0m        [35m0.1820[0m  2.6617
      4        [36m0.1814[0m       [32m0.9538[0m        [35m0.1664[0m  2.6551
      5        [36m0.1685[0m       [32m0.9569[0m        [35m0.1530[0m  2.6650
      6        [36m0.1605[0m       [32m0.9573[0m        [35m0.1483[0m  2.6761
      7        [36m0.1559[0m       [32m0.9587[0m        [35m0.1445[0m  2.6636
      8        [36m0.1532[0m       0.9585        0.1468  2.6627
      9        [36m0.1497[0m       0.9586        0.1485  2.6621
     10        [36m0.1481[0m       [32m0.9591[0m        0.1456  2.6600
     11        [36m0.1460[0m       0.9590        0.1479  2.6520
     12        [36m0.144

<class '__main__.RegularizedNet'>[initialized](
  module_=CNN(
    (cnn1): Conv2d(3, 64, kernel_size=(5, 5), stride=(1, 1))
    (relu1): ReLU()
    (max1): MaxPool2d(kernel_size=1, stride=1, padding=0, dilation=1, ceil_mode=False)
    (cnn2): Conv2d(64, 128, kernel_size=(5, 5), stride=(1, 1))
    (relu2): ReLU()
    (max2): MaxPool2d(kernel_size=1, stride=1, padding=0, dilation=1, ceil_mode=False)
    (fc1): Linear(in_features=51200, out_features=10, bias=True)
    (smax): Softmax(dim=1)
  ),
)

In [None]:
y_pred = cnn.predict(torch.tensor(X_test_img_reshaped).float().to(device))
accuracy_score(y_test, y_pred)

0.74355

In [None]:
grid = {
    'lambda1': [.000001, .00001, .0001],
    'lr': [.01, .005, .001, .0001],
    'optimizer__momentum': [.2, .5, .9],
    'batch_size': [128, 256]
}

gs = GridSearchCV(cnn, grid, refit = True, cv = 5, scoring = 'accuracy')


gs.fit(torch.tensor(X_train_img_reshaped).float().to(device), y_train)

print(gs.best_score_, gs.best_params_)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
-------  ------------  -----------  ------------  ------
      1        [36m1.0930[0m       [32m0.5903[0m        [35m0.9882[0m  2.5971
      2        [36m0.9770[0m       0.5903        [35m0.9640[0m  2.6005
      3        [36m0.9496[0m       [32m0.5923[0m        [35m0.9334[0m  2.5914
      4        [36m0.9143[0m       [32m0.6126[0m        [35m0.8937[0m  2.5996
      5        [36m0.8689[0m       [32m0.6626[0m        [35m0.8431[0m  2.5982
      6        [36m0.8127[0m       [32m0.6925[0m        [35m0.7849[0m  2.5956
      7        [36m0.7515[0m       [32m0.7032[0m        [35m0.7296[0m  2.6016
      8        [36m0.6954[0m       [32m0.7139[0m        [35m0.6828[0m  2.6067
      9        [36m0.6484[0m       [32m0.7271[0m        [35m0.6414[0m  2.5973
     10        [36m0.6068[0m       [32m0.7410[0m        [35m0.6014[0m  2.7207
     11        [36m0.5675[0m       [32m0.751

In [None]:
batch_size=50
preprocess = transforms.Compose([
    transforms.ToTensor(),
])
X_train_tensor = torch.stack([preprocess(img) for img in X_train_img]).float().to(device)
y_train_tensor = torch.from_numpy(y_train).to(device)
trainset = TensorDataset(X_train_tensor, y_train_tensor)
trainloader = DataLoader(trainset, batch_size=batch_size, shuffle=True)

X_test_tensor = torch.stack([preprocess(img) for img in X_test_img]).float().to(device)
y_test_tensor = torch.from_numpy(y_test).to(device)
testset = TensorDataset(X_test_tensor, y_test_tensor)
testloader = DataLoader(testset, batch_size=batch_size, shuffle=False)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(
    net.parameters(), 
    lr=1e-02,
    momentum=0.9,
)

In [None]:
net.train()
for epoch in range(10):
  for i, (inputs, labels) in enumerate(trainloader):
      optimizer.zero_grad()
      outputs = net(inputs)
      loss = criterion(outputs, labels)
      loss.backward()
      optimizer.step()

In [None]:
test_predicted = np.empty(0)
test_true = np.empty(0)

net.eval()
with torch.no_grad():
  for i, (inputs, labels) in enumerate(testloader):
      pred = torch.max(net(inputs),1)[1].cpu().detach().numpy()
      test_predicted = np.append(test_predicted, pred)
      test_true = np.append(test_true, labels.cpu().detach().numpy())
print(f"The test accuracy was {accuracy_score(test_predicted, test_true):.3f}")

The test accuracy was 0.906
