In [1]:
import torch
import pandas as pd
import gc
import matplotlib
matplotlib.use('Agg')

import matplotlib.pyplot as plt
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import accuracy_score
from sklearn.utils import shuffle

import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim

from model.classifier import Net

from torch.autograd import Variable

from matplotlib import pyplot

from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Assuming that we are on a CUDA machine, this should print a CUDA device:

print(device)

cuda:0


In [3]:
file_loc = '/afs/crc.nd.edu/user/t/tkim12/Work/CMB_ML/Data/Nside1024/'

# Load background
bkg_file = 400
CMB_bkg = []
for file_num in range(bkg_file):
    Xtemp = np.load(file_loc+"quicklens_backgound/500_events_90Sqr_lensed_Backgroundlmax3500_"+str(file_num)+".npy")
    for k in range(Xtemp.shape[0]):
        CMB_bkg.append(Xtemp[k])


# Load CMB signal only (g = 1)
CMB_sig = []
for file_num in range(1,201):
    Xtemp = np.load(file_loc+"PHS_signal/500_eta50PHS_g1_Sig_"+str(file_num)+".npy")
    for k in range(Xtemp.shape[0]):
        CMB_sig.append(Xtemp[k])

In [4]:
CMB_bkg = np.array(CMB_bkg)
CMB_sig = np.array(CMB_sig)

CMB_bkg_num = CMB_bkg.shape[0]


# Prepare pure background and background + signal
rescale_val = 3

bkgonly = CMB_bkg[0:int(CMB_bkg_num/2)]
SpB = CMB_bkg[int(CMB_bkg_num/2):]+ rescale_val *CMB_sig

bkg_indicator = np.zeros(int(CMB_bkg_num/2))
sig_indicator = np.ones(int(CMB_bkg_num/2))

In [5]:
#plt.imshow(CMB_sig[4])
#plt.savefig('sample_img.pdf')
sig_indicator[100]

1.0

In [6]:
X_data = np.concatenate((bkgonly,      SpB          ))
Y_data = np.concatenate((bkg_indicator,sig_indicator))

wid, hei = X_data.shape[1], X_data.shape[2]

X_data, Y_data = shuffle(X_data, Y_data, random_state=23)

print(wid, hei)
#print(Y_data)

# Data Splitting
ts1 = 0.2
rs1 = 23
X_train, X_test, Y_train, Y_test = train_test_split(X_data, Y_data, test_size = ts1, random_state = rs1)

X_data = []
Y_data = []

X_train = X_train.reshape(X_train.shape[0], 1, wid, hei).astype('float64')
X_train = torch.from_numpy(X_train)
X_test = X_test.reshape(X_test.shape[0], 1, wid, hei).astype('float64')
X_test = torch.from_numpy(X_test)

Y_train = Y_train.reshape(Y_train.shape[0], 1).astype('float64')
Y_train = torch.from_numpy(Y_train)
Y_test = Y_test.reshape(Y_test.shape[0], 1).astype('float64')
Y_test = torch.from_numpy(Y_test)

print("Training sample : "+str(X_train.shape[0])+" , Validation sample : "+str(X_test.shape[0]))

from torch.utils.data import Dataset, TensorDataset, DataLoader

train_data = TensorDataset(X_train, Y_train)
test_data = TensorDataset(X_test, Y_test)

trainloader = DataLoader(dataset=train_data, batch_size=128, shuffle=True)
testloader = DataLoader(dataset=test_data, batch_size=1, shuffle=False)

90 90
Training sample : 160000 , Validation sample : 40000


In [7]:
trained_class = './result/eta50_gpu_classification_g4train.pt'
class_model = Net()
class_model.load_state_dict(torch.load(trained_class, map_location=torch.device('cpu')))
class_model.eval()
model = class_model.double()

#model = Net()
#model = model.double()
print(model)
model.to(device)


#from tqdm import tqdm
# https://medium.com/analytics-vidhya/simple-neural-network-with-bceloss-for-binary-classification-for-a-custom-dataset-8d5c69ffffee
loss_fn = nn.BCELoss()

Net(
  (conv1): Conv2d(1, 8, kernel_size=(16, 16), stride=(2, 2), padding=(7, 7))
  (conv2): Conv2d(8, 8, kernel_size=(8, 8), stride=(1, 1), padding=(1, 1))
  (pool1): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(8, 8, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
  (pool2): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
  (conv4): Conv2d(8, 8, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
  (avepool): AvgPool2d(kernel_size=(2, 2), stride=(1, 1), padding=0)
  (fc1): Linear(in_features=128, out_features=200, bias=True)
  (fc2): Linear(in_features=200, out_features=200, bias=True)
  (fin): Linear(in_features=200, out_features=1, bias=True)
  (sig): Sigmoid()
)


In [8]:
def train_model(model, epochs=40, lr=0.0001):
    optimizer = optim.Adam(model.parameters(), lr=lr)

    train_loss = []
    test_loss = []
    train_accuracy = []
    test_accuracy = []
    for epoch in range(epochs):
        model.train() # train mode (affects batchnorm layers:
                      # in the subsequent forward passes they'll
                      # exhibit 'train' behaviour, i.e. they'll
                      # normalize activations over batches)
        for i, (X, y) in enumerate(tqdm(trainloader)):
        #for i, (X, y) in enumerate(trainloader):
            X, y = X.to(device), y.to(device)
            #print(X.is_cuda, y.is_cuda)

            pred = model(X)
            loss = loss_fn(pred, y)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            train_loss.append(np.mean(loss.item()))
            train_accuracy.append((pred.round() == y).to(torch.float32).mean().item())

        model.eval() # test mode (affects batchnorm layers:
                     # in the subsequent forward passes they'll
                     # exhibit 'test' behaviour, i.e. they'll
                     # use the accumulated running statistics
                     # to normalize activations)
        epoch_losses = []
        epoch_accuracies = []
        with torch.no_grad(): # avoid calculating gradients during evaluation
            for X, y in testloader:
                X, y = X.to(device), y.to(device)
                pred = model(X)
                pred_round = pred.round()

                epoch_losses.append(loss_fn(pred, y).item())
                _, pred = torch.max(pred.data, 1) # pred = index of maximal output along axis=1
                epoch_accuracies.append(
                    (pred_round == y).to(torch.float32).mean().item()
                )
        test_loss.append(np.mean(epoch_losses))
        print("\n Epoch = ",epoch)
        print("\n Training loss = ", np.mean(train_loss))
        print("\n Validation loss = ", np.mean(epoch_losses))
        #print("\n Training acc = ", train_accuracy)
        print("\n Validation acc = ", np.mean(epoch_accuracies))
        test_accuracy.append(np.mean(epoch_accuracies))

    return dict(
        train_loss=train_loss,
        test_loss=test_loss,
        train_accuracy=train_accuracy,
        test_accuracy=test_accuracy
    )

In [9]:
result = train_model(model)

100%|██████████| 1250/1250 [01:54<00:00, 10.88it/s]



 Epoch =  0

 Training loss =  0.20465458515652366

 Validation loss =  0.22080158239725284

 Validation acc =  0.91105


100%|██████████| 1250/1250 [02:01<00:00, 10.26it/s]



 Epoch =  1

 Training loss =  0.2003278021262484

 Validation loss =  0.2115437101770928

 Validation acc =  0.91635


100%|██████████| 1250/1250 [01:58<00:00, 10.58it/s]



 Epoch =  2

 Training loss =  0.19591328820819034

 Validation loss =  0.2023178637630879

 Validation acc =  0.91995


100%|██████████| 1250/1250 [01:56<00:00, 10.71it/s]



 Epoch =  3

 Training loss =  0.19221412148854206

 Validation loss =  0.20414231290040458

 Validation acc =  0.917475


100%|██████████| 1250/1250 [01:56<00:00, 10.73it/s]



 Epoch =  4

 Training loss =  0.18837845195579958

 Validation loss =  0.19253758336530474

 Validation acc =  0.92435


100%|██████████| 1250/1250 [01:56<00:00, 10.75it/s]



 Epoch =  5

 Training loss =  0.18479057361830048

 Validation loss =  0.17677036343478875

 Validation acc =  0.931825


100%|██████████| 1250/1250 [01:56<00:00, 10.75it/s]



 Epoch =  6

 Training loss =  0.1812606557514247

 Validation loss =  0.17232606294188704

 Validation acc =  0.932225


100%|██████████| 1250/1250 [01:56<00:00, 10.73it/s]



 Epoch =  7

 Training loss =  0.17785034944196695

 Validation loss =  0.1714666495530868

 Validation acc =  0.932825


100%|██████████| 1250/1250 [01:56<00:00, 10.77it/s]



 Epoch =  8

 Training loss =  0.17428795656816842

 Validation loss =  0.176785038983228

 Validation acc =  0.93115


100%|██████████| 1250/1250 [01:55<00:00, 10.78it/s]



 Epoch =  9

 Training loss =  0.1709015228564931

 Validation loss =  0.17113653213790567

 Validation acc =  0.933475


100%|██████████| 1250/1250 [01:55<00:00, 10.78it/s]



 Epoch =  10

 Training loss =  0.16753928500991444

 Validation loss =  0.15980508900014234

 Validation acc =  0.9386


100%|██████████| 1250/1250 [01:55<00:00, 10.78it/s]



 Epoch =  11

 Training loss =  0.1642443625638262

 Validation loss =  0.13880627034336132

 Validation acc =  0.9471


100%|██████████| 1250/1250 [01:56<00:00, 10.77it/s]



 Epoch =  12

 Training loss =  0.16099939348064515

 Validation loss =  0.13389962830392907

 Validation acc =  0.94905


100%|██████████| 1250/1250 [01:56<00:00, 10.77it/s]



 Epoch =  13

 Training loss =  0.15776855017866384

 Validation loss =  0.140638496296769

 Validation acc =  0.94655


100%|██████████| 1250/1250 [01:56<00:00, 10.76it/s]



 Epoch =  14

 Training loss =  0.15462192938262379

 Validation loss =  0.13010732887316018

 Validation acc =  0.9506


100%|██████████| 1250/1250 [01:56<00:00, 10.75it/s]



 Epoch =  15

 Training loss =  0.15162973971755614

 Validation loss =  0.11590189205269831

 Validation acc =  0.95745


100%|██████████| 1250/1250 [01:56<00:00, 10.76it/s]



 Epoch =  16

 Training loss =  0.14859402187185278

 Validation loss =  0.1108549494498533

 Validation acc =  0.958175


100%|██████████| 1250/1250 [01:56<00:00, 10.73it/s]



 Epoch =  17

 Training loss =  0.14565807738583908

 Validation loss =  0.11272286550997994

 Validation acc =  0.95745


100%|██████████| 1250/1250 [01:56<00:00, 10.71it/s]



 Epoch =  18

 Training loss =  0.1428879692818146

 Validation loss =  0.1094911903901409

 Validation acc =  0.9587


100%|██████████| 1250/1250 [01:56<00:00, 10.73it/s]



 Epoch =  19

 Training loss =  0.14012318780004854

 Validation loss =  0.0969954491566689

 Validation acc =  0.964725


100%|██████████| 1250/1250 [01:56<00:00, 10.75it/s]



 Epoch =  20

 Training loss =  0.13740631466003544

 Validation loss =  0.09858184085825648

 Validation acc =  0.963825


100%|██████████| 1250/1250 [01:55<00:00, 10.79it/s]



 Epoch =  21

 Training loss =  0.13484286479916396

 Validation loss =  0.09909154751242921

 Validation acc =  0.963


100%|██████████| 1250/1250 [01:55<00:00, 10.81it/s]



 Epoch =  22

 Training loss =  0.13235393895208278

 Validation loss =  0.09149483809790301

 Validation acc =  0.9662


100%|██████████| 1250/1250 [01:56<00:00, 10.76it/s]



 Epoch =  23

 Training loss =  0.1298706979257597

 Validation loss =  0.09533323947571144

 Validation acc =  0.965775


100%|██████████| 1250/1250 [01:55<00:00, 10.79it/s]



 Epoch =  24

 Training loss =  0.1275031907703854

 Validation loss =  0.08463681251553386

 Validation acc =  0.968


100%|██████████| 1250/1250 [01:56<00:00, 10.76it/s]



 Epoch =  25

 Training loss =  0.1251417013941755

 Validation loss =  0.07871682342706861

 Validation acc =  0.971375


100%|██████████| 1250/1250 [01:55<00:00, 10.79it/s]



 Epoch =  26

 Training loss =  0.12281203230846192

 Validation loss =  0.07753042195419291

 Validation acc =  0.9725


100%|██████████| 1250/1250 [01:55<00:00, 10.81it/s]



 Epoch =  27

 Training loss =  0.12062286978737574

 Validation loss =  0.06951267616823108

 Validation acc =  0.973725


100%|██████████| 1250/1250 [01:55<00:00, 10.80it/s]



 Epoch =  28

 Training loss =  0.11841889354740741

 Validation loss =  0.07465280874244624

 Validation acc =  0.9727


100%|██████████| 1250/1250 [01:56<00:00, 10.76it/s]



 Epoch =  29

 Training loss =  0.11635695947565056

 Validation loss =  0.07259221199553055

 Validation acc =  0.973075


100%|██████████| 1250/1250 [01:56<00:00, 10.76it/s]



 Epoch =  30

 Training loss =  0.11429140007226546

 Validation loss =  0.06907658240799862

 Validation acc =  0.975225


100%|██████████| 1250/1250 [01:55<00:00, 10.79it/s]



 Epoch =  31

 Training loss =  0.11231810686723798

 Validation loss =  0.05772139738364421

 Validation acc =  0.978525


100%|██████████| 1250/1250 [01:55<00:00, 10.80it/s]



 Epoch =  32

 Training loss =  0.11050261869481783

 Validation loss =  0.059548697900236536

 Validation acc =  0.97855


100%|██████████| 1250/1250 [01:55<00:00, 10.84it/s]



 Epoch =  33

 Training loss =  0.10857968057389915

 Validation loss =  0.07613828808521433

 Validation acc =  0.9734


100%|██████████| 1250/1250 [01:55<00:00, 10.84it/s]



 Epoch =  34

 Training loss =  0.10677459733712699

 Validation loss =  0.05366219241213347

 Validation acc =  0.98115


100%|██████████| 1250/1250 [01:55<00:00, 10.81it/s]



 Epoch =  35

 Training loss =  0.1050289091597905

 Validation loss =  0.05184554691023948

 Validation acc =  0.981925


100%|██████████| 1250/1250 [01:55<00:00, 10.84it/s]



 Epoch =  36

 Training loss =  0.10326069296056364

 Validation loss =  0.05876555624074526

 Validation acc =  0.979125


100%|██████████| 1250/1250 [01:55<00:00, 10.86it/s]



 Epoch =  37

 Training loss =  0.10160231621147019

 Validation loss =  0.059255407404198136

 Validation acc =  0.9789


100%|██████████| 1250/1250 [01:55<00:00, 10.84it/s]



 Epoch =  38

 Training loss =  0.09993794688385613

 Validation loss =  0.04680211134546845

 Validation acc =  0.983125


100%|██████████| 1250/1250 [01:55<00:00, 10.83it/s]



 Epoch =  39

 Training loss =  0.0983510370711301

 Validation loss =  0.04596914007616456

 Validation acc =  0.98395


In [10]:
#dataiter = iter(testloader)
#images, labels = dataiter.next()

#outputs = model(images)

correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = model(images).round()
        #print("Expected : ", labels, "Predicted : ", outputs)
        total += labels.size(0)
        correct += (outputs == labels).sum().item()
        #if (outputs == labels).sum().item() ==0:
        #    print("Expected : ", labels, "Predicted : ", outputs)

print('Accuracy of the network on the validation images: %d %%' % (100. * correct / total))

Accuracy of the network on the validation images: 96 %


In [10]:
Path = "./result/eta50_gpu_classification_g"+str(rescale_val)+"train.pt"
torch.save(model.state_dict(), Path)