In [1]:
import torch
import pandas as pd
import gc
import matplotlib
matplotlib.use('Agg')

import matplotlib.pyplot as plt
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import accuracy_score
from sklearn.utils import shuffle

import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim

from model.classifier import Net

from torch.autograd import Variable

from matplotlib import pyplot

from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Assuming that we are on a CUDA machine, this should print a CUDA device:

print(device)

cuda:0


In [3]:
file_loc = '/afs/crc.nd.edu/user/t/tkim12/Work/CMB_ML/Data/Nside1024/'

# Load background
bkg_file = 400
CMB_bkg = []
for file_num in range(bkg_file):
    Xtemp = np.load(file_loc+"quicklens_backgound/500_events_90Sqr_lensed_Backgroundlmax3500_"+str(file_num)+".npy")
    for k in range(Xtemp.shape[0]):
        CMB_bkg.append(Xtemp[k])


# Load CMB signal only (g = 1)
CMB_sig = []
for file_num in range(1,201):
    Xtemp = np.load(file_loc+"PHS_signal/500_eta100PHS_g1_Sig_"+str(file_num)+".npy")
    for k in range(Xtemp.shape[0]):
        CMB_sig.append(Xtemp[k])

In [4]:
CMB_bkg = np.array(CMB_bkg)
CMB_sig = np.array(CMB_sig)

CMB_bkg_num = CMB_bkg.shape[0]


# Prepare pure background and background + signal
rescale_val = 3

bkgonly = CMB_bkg[0:int(CMB_bkg_num/2)]
SpB = CMB_bkg[int(CMB_bkg_num/2):]+ rescale_val *CMB_sig

bkg_indicator = np.zeros(int(CMB_bkg_num/2))
sig_indicator = np.ones(int(CMB_bkg_num/2))

In [5]:
#plt.imshow(CMB_sig[4])
#plt.savefig('sample_img.pdf')
sig_indicator[100]

1.0

In [6]:
X_data = np.concatenate((bkgonly,      SpB          ))
Y_data = np.concatenate((bkg_indicator,sig_indicator))

wid, hei = X_data.shape[1], X_data.shape[2]

X_data, Y_data = shuffle(X_data, Y_data, random_state=23)

print(wid, hei)
#print(Y_data)

# Data Splitting
ts1 = 0.2
rs1 = 24
X_train, X_test, Y_train, Y_test = train_test_split(X_data, Y_data, test_size = ts1, random_state = rs1)

X_data = []
Y_data = []

X_train = X_train.reshape(X_train.shape[0], 1, wid, hei).astype('float64')
X_train = torch.from_numpy(X_train)
X_test = X_test.reshape(X_test.shape[0], 1, wid, hei).astype('float64')
X_test = torch.from_numpy(X_test)

Y_train = Y_train.reshape(Y_train.shape[0], 1).astype('float64')
Y_train = torch.from_numpy(Y_train)
Y_test = Y_test.reshape(Y_test.shape[0], 1).astype('float64')
Y_test = torch.from_numpy(Y_test)

print("Training sample : "+str(X_train.shape[0])+" , Validation sample : "+str(X_test.shape[0]))

from torch.utils.data import Dataset, TensorDataset, DataLoader

train_data = TensorDataset(X_train, Y_train)
test_data = TensorDataset(X_test, Y_test)

trainloader = DataLoader(dataset=train_data, batch_size=128, shuffle=True)
testloader = DataLoader(dataset=test_data, batch_size=1, shuffle=False)

90 90
Training sample : 160000 , Validation sample : 40000


In [7]:
trained_class = './result/eta100_gpu_classification_g4train.pt'
class_model = Net()
class_model.load_state_dict(torch.load(trained_class, map_location=torch.device('cpu')))
class_model.eval()
model = class_model.double()

#model = Net()
#model = model.double()
print(model)
model.to(device)


#from tqdm import tqdm
# https://medium.com/analytics-vidhya/simple-neural-network-with-bceloss-for-binary-classification-for-a-custom-dataset-8d5c69ffffee
loss_fn = nn.BCELoss()

Net(
  (conv1): Conv2d(1, 8, kernel_size=(16, 16), stride=(2, 2), padding=(7, 7))
  (conv2): Conv2d(8, 8, kernel_size=(8, 8), stride=(1, 1), padding=(1, 1))
  (pool1): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(8, 8, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
  (pool2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (conv4): Conv2d(8, 8, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
  (avepool): AvgPool2d(kernel_size=(2, 2), stride=(1, 1), padding=0)
  (fc1): Linear(in_features=128, out_features=200, bias=True)
  (fc2): Linear(in_features=200, out_features=200, bias=True)
  (fin): Linear(in_features=200, out_features=1, bias=True)
  (sig): Sigmoid()
)


In [16]:
def train_model(model, epochs=20, lr=0.0001):
    optimizer = optim.Adam(model.parameters(), lr=lr)

    train_loss = []
    test_loss = []
    train_accuracy = []
    test_accuracy = []
    for epoch in range(epochs):
        model.train() # train mode (affects batchnorm layers:
                      # in the subsequent forward passes they'll
                      # exhibit 'train' behaviour, i.e. they'll
                      # normalize activations over batches)
        for i, (X, y) in enumerate(tqdm(trainloader)):
        #for i, (X, y) in enumerate(trainloader):
            X, y = X.to(device), y.to(device)
            #print(X.is_cuda, y.is_cuda)

            pred = model(X)
            loss = loss_fn(pred, y)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            train_loss.append(np.mean(loss.item()))
            train_accuracy.append((pred.round() == y).to(torch.float32).mean().item())

        model.eval() # test mode (affects batchnorm layers:
                     # in the subsequent forward passes they'll
                     # exhibit 'test' behaviour, i.e. they'll
                     # use the accumulated running statistics
                     # to normalize activations)
        epoch_losses = []
        epoch_accuracies = []
        with torch.no_grad(): # avoid calculating gradients during evaluation
            for X, y in testloader:
                X, y = X.to(device), y.to(device)
                pred = model(X)
                pred_round = pred.round()

                epoch_losses.append(loss_fn(pred, y).item())
                _, pred = torch.max(pred.data, 1) # pred = index of maximal output along axis=1
                epoch_accuracies.append(
                    (pred_round == y).to(torch.float32).mean().item()
                )
        test_loss.append(np.mean(epoch_losses))
        print("\n Epoch = ",epoch)
        print("\n Training loss = ", np.mean(train_loss))
        print("\n Validation loss = ", np.mean(epoch_losses))
        #print("\n Training acc = ", train_accuracy)
        print("\n Validation acc = ", np.mean(epoch_accuracies))
        test_accuracy.append(np.mean(epoch_accuracies))

    return dict(
        train_loss=train_loss,
        test_loss=test_loss,
        train_accuracy=train_accuracy,
        test_accuracy=test_accuracy
    )

In [None]:
result = train_model(model)

100%|██████████| 1250/1250 [01:53<00:00, 11.04it/s]



 Epoch =  0

 Training loss =  0.031298653628113376

 Validation loss =  0.05921913820146898

 Validation acc =  0.98135


100%|██████████| 1250/1250 [01:53<00:00, 11.04it/s]



 Epoch =  1

 Training loss =  0.03205292987732784

 Validation loss =  0.0573627943223681

 Validation acc =  0.9824


100%|██████████| 1250/1250 [01:53<00:00, 11.04it/s]



 Epoch =  2

 Training loss =  0.031836187359922344

 Validation loss =  0.05507296922914572

 Validation acc =  0.9831


100%|██████████| 1250/1250 [01:53<00:00, 11.01it/s]



 Epoch =  3

 Training loss =  0.03148812342097017

 Validation loss =  0.05518341977448126

 Validation acc =  0.9826


100%|██████████| 1250/1250 [01:53<00:00, 10.99it/s]



 Epoch =  4

 Training loss =  0.030970626216211328

 Validation loss =  0.05576358625694778

 Validation acc =  0.982375


100%|██████████| 1250/1250 [01:53<00:00, 11.03it/s]



 Epoch =  5

 Training loss =  0.030720912940419058

 Validation loss =  0.06059241048588421

 Validation acc =  0.98155


 86%|████████▌ | 1075/1250 [01:37<00:15, 11.01it/s]

In [10]:
#dataiter = iter(testloader)
#images, labels = dataiter.next()

#outputs = model(images)

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = model(images).round()
        #print("Expected : ", labels, "Predicted : ", outputs)
        total += labels.size(0)
        correct += (outputs == labels).sum().item()
        #if (outputs == labels).sum().item() ==0:
        #    print("Expected : ", labels, "Predicted : ", outputs)

print('Accuracy of the network on the validation images: %d %%' % (100. * correct / total))

Accuracy of the network on the validation images: 96 %


In [None]:
Path = "./result/eta100_gpu_classification_g"+str(rescale_val)+"train.pt"
torch.save(model.state_dict(), Path)