In [83]:
import sys
import h5py
import torch
import random
import progressbar
import numpy as np
from tqdm import tqdm
from torch import nn, optim
from torch.utils import data
import torch.nn.functional as F
from matplotlib import pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [2]:
f =  h5py.File("electrons.hdf5", 'r')
keys = list(f)
elect_x = f[keys[0]][:]
elect_y = f[keys[1]][:]

f =  h5py.File("photons.hdf5", 'r')
keys = list(f)

phot_x = f[keys[0]][:]
phot_y = f[keys[1]][:]

In [3]:
phot_x.shape, elect_x.shape

((249000, 32, 32, 2), (249000, 32, 32, 2))

In [6]:
random.shuffle(elect_x)
random.shuffle(phot_x)

set_size = 249000

splice_e = elect_x[:set_size]
splice_p = phot_x[:set_size]

features = np.vstack((splice_p,splice_e))
labels = np.concatenate((phot_y[:set_size],elect_y[:set_size]))

In [10]:
encoder = LabelEncoder()
encoder.fit(labels)
labels = encoder.transform(labels)

In [11]:
features.shape, labels.shape

((498000, 32, 32, 2), (498000,))

In [36]:
x_train, x_test, y_train, y_test = train_test_split(features, labels, test_size=0.2,random_state=10)

In [37]:
len(x_train), len(y_train)

(398400, 398400)

In [38]:
x_train = x_train.reshape(x_train.shape[0],-1)
x_test = x_test.reshape(x_test.shape[0],-1)

In [39]:
input_size = x_train.shape[-1]

In [40]:
x_train = torch.from_numpy(np.array(x_train)).float()
y_train = torch.from_numpy(np.array(y_train)).long()

x_test = torch.from_numpy(x_test).float()
y_test = torch.from_numpy(y_test).long()

In [41]:
train_dataset = data.TensorDataset(x_train,y_train)
train_loader = data.DataLoader(train_dataset, batch_size=1024)

test_dataset = data.TensorDataset(x_test,y_test)
test_loader = data.DataLoader(test_dataset)

In [69]:
test_loader = data.DataLoader(test_dataset, batch_size=1024)

In [31]:
class Classifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(input_size, input_size//2)
        self.drop1 = nn.Dropout(0.25)
        self.fc2 = nn.Linear(input_size//2, input_size//8)
        self.fc3 = nn.Linear(input_size//8, input_size//32)
        self.fc4 = nn.Linear(input_size//32, input_size//128)
        self.fc5 = nn.Linear(input_size//128, 2)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.drop1(x)
        x = F.relu(self.fc2(x))
        x = self.drop1(x)
        x = F.relu(self.fc3(x))
        x = self.drop1(x)
        x = F.relu(self.fc4(x))
        x = F.log_softmax(self.fc5(x), dim=1)
        
        return x

In [90]:
num_batches = 398400//1024
def train_model(model):

    training_loss = []
    testing_loss = []
    accu = []
    criterion = nn.NLLLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.0003)

    epochs = 21
    running_loss = 0
    best_acc = 0
    for e in range(1,epochs):
        print("Epoch no.", e)
        model.train()
        with tqdm(total = num_batches+1, file=sys.stdout) as pbar:
            for batch_idx, (features, labels) in enumerate(train_loader):
                log_ps = model.forward(features)
                loss = criterion(log_ps, labels)

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                pbar.update(1)
                
        training_loss.append(running_loss/len(trainloader))
        test_loss = 0
        accuracy = 0
        model.eval()
        
        for inputs, labels in test_loader:
            logps = model.forward(inputs)
            ps = torch.exp(logps)
            batch_loss = criterion(logps, labels)
            test_loss += batch_loss.item()
            top_p, top_class = ps.topk(1, dim=1)
            equals = top_class == labels.view(*top_class.shape)
            accuracy += torch.mean(equals.type(torch.FloatTensor)).item()

        test_accuracy = accuracy/len(test_loader)
        
        print(f"Training loss: {running_loss/len(trainloader):.3f} \t Testing loss: {test_loss/len(test_loader):.3f} \t Test accuracy: {test_accuracy:.3f}")
        testing_loss.append(test_loss/len(test_loader))
        accu.append(test_accuracy)
        running_loss = 0
        
        if test_accuracy > best_acc:
            best_acc = test_accuracy
            if best_acc > 70:
                print("Accuracy increased to:", best_acc)
            torch.save(model, f'model_{test_accuracy}_accuracy.pt')
        model.train()

    plt.plot(training_loss)
    plt.plot(training_loss)
    plt.close()
        
    return model, [training_loss, testing_loss], accu

- Since I did the Keras implementation first, I used the architecture of the best model I found from Keras.
- I kept running the below cells till the testing accuracy stops increasing. 
- The best test accuracy I got was **87.9%**

In [85]:
model = Classifier()
model, history, accuracy = train_model(model)

Epoch no. 1
100%|████████████████████████████████████████████████████████████████████████████████| 390/390 [00:53<00:00,  7.35it/s]
Training loss: 0.663 	 Testing loss: 0.640 	 Test accuracy: 0.635
Epoch no. 2
  0%|                                                                                          | 0/390 [00:00<?, ?it/s]

  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "


100%|████████████████████████████████████████████████████████████████████████████████| 390/390 [00:52<00:00,  7.49it/s]
Training loss: 0.629 	 Testing loss: 0.619 	 Test accuracy: 0.658
Epoch no. 3
100%|████████████████████████████████████████████████████████████████████████████████| 390/390 [00:52<00:00,  7.49it/s]
Training loss: 0.595 	 Testing loss: 0.590 	 Test accuracy: 0.690
Epoch no. 4
100%|████████████████████████████████████████████████████████████████████████████████| 390/390 [00:52<00:00,  7.47it/s]
Training loss: 0.553 	 Testing loss: 0.563 	 Test accuracy: 0.715
Epoch no. 5
100%|████████████████████████████████████████████████████████████████████████████████| 390/390 [00:52<00:00,  7.47it/s]
Training loss: 0.511 	 Testing loss: 0.538 	 Test accuracy: 0.738
Epoch no. 6
100%|████████████████████████████████████████████████████████████████████████████████| 390/390 [00:51<00:00,  7.58it/s]
Training loss: 0.473 	 Testing loss: 0.519 	 Test accuracy: 0.755
Epoch no. 7
100%|█████

In [86]:
model, history, accuracy = train_model(model)

Epoch no. 1
100%|████████████████████████████████████████████████████████████████████████████████| 390/390 [00:51<00:00,  7.54it/s]
Training loss: 0.237 	 Testing loss: 0.445 	 Test accuracy: 0.847
Epoch no. 2
100%|████████████████████████████████████████████████████████████████████████████████| 390/390 [00:51<00:00,  7.54it/s]
Training loss: 0.227 	 Testing loss: 0.443 	 Test accuracy: 0.849
Epoch no. 3
100%|████████████████████████████████████████████████████████████████████████████████| 390/390 [00:51<00:00,  7.58it/s]
Training loss: 0.220 	 Testing loss: 0.451 	 Test accuracy: 0.851
Epoch no. 4
100%|████████████████████████████████████████████████████████████████████████████████| 390/390 [00:51<00:00,  7.59it/s]
Training loss: 0.215 	 Testing loss: 0.449 	 Test accuracy: 0.854
Epoch no. 5
100%|████████████████████████████████████████████████████████████████████████████████| 390/390 [00:51<00:00,  7.61it/s]
Training loss: 0.210 	 Testing loss: 0.455 	 Test accuracy: 0.852
Epoch no. 

In [87]:
model, history, accuracy = train_model(model)

Epoch no. 1
100%|████████████████████████████████████████████████████████████████████████████████| 390/390 [00:52<00:00,  7.43it/s]
Training loss: 0.149 	 Testing loss: 0.479 	 Test accuracy: 0.869
Epoch no. 2
100%|████████████████████████████████████████████████████████████████████████████████| 390/390 [00:50<00:00,  7.65it/s]
Training loss: 0.146 	 Testing loss: 0.470 	 Test accuracy: 0.869
Epoch no. 3
100%|████████████████████████████████████████████████████████████████████████████████| 390/390 [00:51<00:00,  7.55it/s]
Training loss: 0.143 	 Testing loss: 0.479 	 Test accuracy: 0.871
Epoch no. 4
100%|████████████████████████████████████████████████████████████████████████████████| 390/390 [00:50<00:00,  7.65it/s]
Training loss: 0.142 	 Testing loss: 0.480 	 Test accuracy: 0.868
Epoch no. 5
100%|████████████████████████████████████████████████████████████████████████████████| 390/390 [00:51<00:00,  7.56it/s]
Training loss: 0.138 	 Testing loss: 0.483 	 Test accuracy: 0.870
Epoch no. 

In [88]:
model, history, accuracy = train_model(model)

Epoch no. 1
100%|████████████████████████████████████████████████████████████████████████████████| 390/390 [00:52<00:00,  7.49it/s]
Training loss: 0.114 	 Testing loss: 0.514 	 Test accuracy: 0.874
Epoch no. 2
100%|████████████████████████████████████████████████████████████████████████████████| 390/390 [00:51<00:00,  7.60it/s]
Training loss: 0.112 	 Testing loss: 0.508 	 Test accuracy: 0.875
Epoch no. 3
100%|████████████████████████████████████████████████████████████████████████████████| 390/390 [00:51<00:00,  7.56it/s]
Training loss: 0.110 	 Testing loss: 0.518 	 Test accuracy: 0.875
Epoch no. 4
100%|████████████████████████████████████████████████████████████████████████████████| 390/390 [00:51<00:00,  7.63it/s]
Training loss: 0.109 	 Testing loss: 0.516 	 Test accuracy: 0.875
Epoch no. 5
100%|████████████████████████████████████████████████████████████████████████████████| 390/390 [00:51<00:00,  7.63it/s]
Training loss: 0.107 	 Testing loss: 0.527 	 Test accuracy: 0.875
Epoch no. 

In [91]:
model, history, accuracy = train_model(model)

Epoch no. 1
100%|████████████████████████████████████████████████████████████████████████████████| 390/390 [00:52<00:00,  7.42it/s]
Training loss: 0.094 	 Testing loss: 0.537 	 Test accuracy: 0.877
Epoch no. 2
100%|████████████████████████████████████████████████████████████████████████████████| 390/390 [00:51<00:00,  7.59it/s]
Training loss: 0.092 	 Testing loss: 0.528 	 Test accuracy: 0.877
Epoch no. 3
100%|████████████████████████████████████████████████████████████████████████████████| 390/390 [00:51<00:00,  7.56it/s]
Training loss: 0.092 	 Testing loss: 0.542 	 Test accuracy: 0.876
Epoch no. 4
100%|████████████████████████████████████████████████████████████████████████████████| 390/390 [00:51<00:00,  7.62it/s]
Training loss: 0.090 	 Testing loss: 0.554 	 Test accuracy: 0.877
Epoch no. 5
100%|████████████████████████████████████████████████████████████████████████████████| 390/390 [00:51<00:00,  7.59it/s]
Training loss: 0.089 	 Testing loss: 0.558 	 Test accuracy: 0.876
Epoch no. 