The neural net is trained on a combination of provided and original features. The file sensor_geometry.csv contains the (x,y,z) positions of the sensors in the IceCube detector. The file train_meta.parquet provides additional information about each neutrino event. Most importantly, it contains the true azimuth and zenith of the incoming neutrino, which we train against. The neutrino events are stored in files of the form batch_##.parquet, where ## ranges from 1 to 660. Each batch file contains the information on ~200,000 neutrino events. A single neutrino event consists of an arbitrary number of rows in the parquet file, with each row corresponding to a single sensor activation during the neutrino event. 

The all-features.csv file consist of original features derived from the linear regression and clustering computations. We utilize just an initial guess of the azimuth and zenith and an estimate of the number of sensor activation clusters in the event. 

This is a simple fully connected neural network. 

In [None]:
#Take Care of the Imports
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.nn import Linear
from torch.utils.data import Dataset, DataLoader


In [None]:
'''
Set parent directories for:

sensor_geometry.csv
batch_##.parquet
train_meta.parquet
all-features.csv

respectively. 

'''

pre_dir = '/opt/app/data/erdos-data/'
sensor_geom_dir = '/opt/app/data/erdos-data/'
meta_dir = '/opt/app/data/erdos-data/'
batch_dir = '/opt/app/data/erdos-data/train/'


In [None]:
#Set paths using specified directories. 
pre_path=pre_dir+"all-features.csv"
sensor_geom_path=sensor_geom_dir+"sensor_geometry.csv"
meta_path=meta_dir+"train_meta.parquet"

#Load precompiled features and sensor geometry
pre_feature=pd.read_csv(pre_path)
sensor_geom = pd.read_csv(sensor_geom_path)


#Load metadata, if it is not already loaded. 
try: meta
except NameError: meta=pd.read_parquet(meta_path)

In [None]:
# Set Default Device to CUDA or CPU
#to be called in .to(device) to ensure all pytorch Tensors are on the same device
device = (
    "cuda:0"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
torch.set_default_device(device)
print(f"Using {device} device")

In [None]:
'''
Torch Dataset Wrapper.
Initialization takes the filename, the sensor geometry file name, the batch ID, and the auxiliary flag.
'''
class NeutrinoDataset(Dataset):
    def __init__(self, batch_filename, sensor_file_name, batch_id, aux):
        
        #save sensor_geometry and meta_data for the given patch
        self.sensor_geom = pd.read_csv(sensor_file_name)
        self.vals_df = meta[meta.batch_id==batch_id]
        
        #Loads the parquet file as a panda dataframe and filter by aux flag
        self.dataframe = pd.read_parquet(batch_filename)
        self.dataframe = self.dataframe[self.dataframe.auxiliary==aux]
        
        #set the number of features per sensor. 
        #Currently, first and last time, cumulative charge, scattering flag, x,y,z.  
        self.num_features = 5160*(3+1+3) 
        
        #Number of neutrino events in the data frame
        self.num_events = self.dataframe.index.nunique()
        
        #Since an event can span multiple rows, save the unique indices, to obtain event_ids.  
        self.unique_indices = np.unique(self.dataframe.index)
        
        
    #Return number of Neutrino Events in the Dataset (not the number of rows)
    def __len__(self):
        return self.num_events
    
    #Get the ith Neutrino Event, based on the ith unique event id
    def __getitem__(self, i):
        df = self.dataframe
        sg = self.sensor_geom
        
        #Get the event id corresponding to the ith unique index
        event_id=self.unique_indices[i]
        
        #Load the ith neutrino event
        event=df.loc[event_id]
        
        #Load the metavalues associated to the event
        meta_vals = np.array(
            self.vals_df.loc[self.vals_df['event_id'] == event_id])[0].astype(float)
        
        #Convert the event into an array of pulses
        pulse_array = np.array(event)
        
        #Creates a 5160x(1+ num_features) array
        #First column is sensor_id, subsequent entries will correspond to sensor features
        pulse_array_sensors = np.concatenate((np.expand_dims(np.arange(5160), axis=1), np.zeros([5160, 7])), 1)
        
        #Find pulse with largest charge
        loudest=self.loudest_bang(event)
        
        #For each pulse in the event, extract the per-sensor features.
        #In order, these are first and last time, cumulative charge, scattering distance, x,y,z.  
        for pulse in pulse_array:
            
            #Extract sensor_id
            sensor_id=pulse[0]
            
        
            #If this is the first pulse for a detector, store it as the first time.
            #Else, store it as the second time. After looping through all pulses, this will be the last time. 
            if(pulse_array_sensors[sensor_id][1] == 0):
                pulse_array_sensors[sensor_id][1] = pulse[1] - meta_vals[2] 
            else:
                pulse_array_sensors[sensor_id][2] = pulse[1] - meta_vals[2]
            pulse_array_sensors[sensor_id][3] += pulse[2]
        
            
            #Get sensor xyz
            sensor_xyz=self.id_to_xyz(sensor_id)
            
            #Compute the distance between the sensor and the furthest possible cascade from loudest bang.
            scatter=np.linalg.norm( np.array([sensor_xyz[0], sensor_xyz[1], sensor_xyz[2]])-loudest[2])-0.23*(loudest[1]-pulse[1])
            pulse_array_sensors[sensor_id][4]=scatter
            
            #Extract and store the positional information.
            xyz_from_id=self.id_to_xyz(sensor_id)
            pulse_array_sensors[sensor_id][5]=xyz_from_id[0]
            pulse_array_sensors[sensor_id][6]=xyz_from_id[1]
            pulse_array_sensors[sensor_id][7]=xyz_from_id[2]
            
        #Extract the non-sensor specific features. 
        #Extract the initial guesses of azimuth, zenith and the number of clusters.    
        az_t_pre = pre_feature.loc[pre_feature['event_id']==event_id]['az_t_pred']
        ze_t_pre = pre_feature.loc[pre_feature['event_id']==event_id]['ze_t_pred']
        num_clusters= pre_feature.loc[pre_feature['event_id']==event_id]['num_clusters']
        
        #Omit the sensor_id and flattern the sensor feature array. 
        flattened_pulse = (pulse_array_sensors[:, 1:]).flatten()
        
        #Concatenate the event features, 
        flattened_pulse = np.append(flattened_pulse,[az_t_pre,ze_t_pre,num_clusters])
                
        #Return our features and true azimuth and zenith as torch tensors
        return (torch.from_numpy(flattened_pulse), 
                                 torch.from_numpy(meta_vals[-2:]))
    
        
    #Function which computes the pulse with maximum charge of a given event and outputs its sensor_id, time and position
    def loudest_bang(self, event):
        charges=event.charge.values
        sensors=event.sensor_id.values
        times=event.time.values
        i=charges.argmax(axis=0)
        sen_max=sensors[i]
        time_max=times[i]
        xyz_from_id=self.id_to_xyz(sen_max)
        max_pos=[xyz_from_id[0], xyz_from_id[1], xyz_from_id[2]]
            
        return [sen_max,time_max,max_pos]
        
        
    #Computes xyz-coordinates based of sensor based on sensor_id   
    def id_to_xyz(self, sen):
        row = tuple(self.sensor_geom.loc[sen][1:4])
        return row
        
    

In [None]:
'''
A simple fully connected neural network. 
We construct several layers, and use tanh activation 
to allow for +/- information to propagate. 
We then apply a final linear classifier. 
'''

class NNpredictor(torch.nn.Module):
    def __init__(self,  use_activation = True ):
        super().__init__()
        
        #Set the Layers for the Neural Net
        self.layers = nn.ModuleList()
        self.layer_norms = nn.ModuleList()
        self.use_activation = use_activation
        
        self.layers.append(nn.Linear(dataset.num_features+3,
            4000, dtype=float))
        self.layers.append(nn.Linear(4000, 2000, dtype=float))
        self.layers.append(nn.Linear(2000, 1000, dtype=float))
        self.layers.append(nn.Linear(1000, 500, dtype=float))
        self.layers.append(nn.Linear(500, 100, dtype=float))
        self.layers.append(nn.Linear(100, 50, dtype=float))
        self.layers.append(nn.Linear(50, 10, dtype=float))
        self.classifier = (nn.Linear(10,2, dtype=float))

    def forward(self, x):
        new_x = x.to(device)
        if(self.use_activation):
            for layer in self.layers:
                new_x = layer(new_x)
                new_x= nn.Tanh()(new_x)
                
        else:
            for layer in self.layers:
                new_x = layer(new_x)
        
        # Apply a final (linear) classifier.
        return self.classifier(new_x)

In [None]:
#define our custom loss class
class custom_MAE(nn.Module):
    def __init__(self):
        super(custom_MAE, self).__init__();

    def forward(self, predictions, target):
        loss_value = self.angular_dist_score(predictions, target).to(device)
        return loss_value
    
    #This is the scoring metric provided by Kaggle
    def angular_dist_score(self, predictions, true):
        '''
        calculate the MAE of the angular distance between two directions.
        The two vectors are first converted to cartesian unit vectors,
        and then their scalar product is computed, which is equal to
        the cosine of the angle between the two vectors. The inverse 
        cosine (arccos) thereof is then the angle between the two input vectors
    
        Parameters:
        -----------
    
        az_true : float (or array thereof)
            true azimuth value(s) in radian
        zen_true : float (or array thereof)
            true zenith value(s) in radian
        az_pre : float (or array thereof)
            predicted azimuth value(s) in radian
        zen_pre : float (or array thereof)
            predicted zenith value(s) in radian
    
        Returns:
        --------
    
        dist : float
            mean over the angular distance(s) in radian
        '''
    
        az_true=true[:,0].to(device)
        zen_true=true[:,1].to(device)
        az_pred=predictions[:,0].to(device)
        zen_pred=predictions[:,1].to(device)
    
        if not (torch.all(torch.isfinite(az_true)) and
                torch.all(torch.isfinite(zen_true)) and
                torch.all(torch.isfinite(az_pred)) and
                torch.all(torch.isfinite(zen_pred))):
            raise ValueError("All arguments must be finite")
    
        # pre-compute all sine and cosine values
        sa1 = torch.sin(az_true).to(device)
        ca1 = torch.cos(az_true).to(device)
        sz1 = torch.sin(zen_true).to(device)
        cz1 = torch.cos(zen_true).to(device)
    
        sa2 = torch.sin(az_pred).to(device)
        ca2 = torch.cos(az_pred).to(device)
        sz2 = torch.sin(zen_pred).to(device)
        cz2 = torch.cos(zen_pred).to(device)
    
        # scalar product of the two cartesian vectors (x = sz*ca, y = sz*sa, z = cz)
        scalar_prod = sz1*sz2*(ca1*ca2 + sa1*sa2) + (cz1*cz2)
    
        # scalar product of two unit vectors is always between -1 and 1, this is against nummerical instability
        # that might otherwise occure from the finite precision of the sine and cosine functions
        scalar_prod =  torch.clip(scalar_prod, -1, 1)
    
        # convert back to an angle (in radian)
        return torch.mean(torch.abs(torch.arccos(scalar_prod))).to(device)
    

In [None]:
#Define the Training Loop
def train_loop(dataloader, model, loss_fn, optimizer, epoch, lr, bs):
    size = len(dataloader.dataset)
    # Set the model to training mode
    model=model.train()
    loss_list=np.empty(0)
    for batch, (X, y) in enumerate(dataloader): 
        # Compute preiction and loss
        pred = model(X).to(device)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
        #Format training output
        new_loss, current = loss.item(), (batch + 1) * len(X)
        loss_list=np.append(loss_list, loss.item())
        if (batch % 100 == 0 ):
            loss_list=loss_list[-100:]
            loss=np.mean(loss_list)
            print(f"epoch: {epoch:>2d}, lr: {lr:>2f}, batch_size: {bs:>5d}, loss: {loss:>f}  [{current:>5d}/{size:>5d}]")
        
    return model

In [None]:
def validate_loop(model, val_set):
    #Load the validation data
    validation_dataloader=DataLoader(val_set, batch_size=5, shuffle=False, num_workers=0, generator=torch.Generator(device=device))
    
    #initialize the loss and number of events
    loss_total = 0
    num = 0
    
    #Set the model to evaluate
    with torch.no_grad():
        model.eval()
        for batch, (X, y) in enumerate(validation_dataloader):

            # Compute preiction and loss
            pred = model(X)
            loss_total += loss_fn.angular_dist_score(pred,y)
            num +=1
            mean=loss_total/num
    
    return mean

In [None]:
#Set epoch, batch size, learning rate, loss_fn. 
epoch=20
batch_size=10
learning_rate = 1e-5
loss_fn = custom_MAE()

#Set choice of training batch
batch_id=10
batch_path=batch_dir+"batch_"+str(batch_id)+".parquet"

#Load dataset, setup data splits, and set manual seed
dataset = NeutrinoDataset(batch_path, sensor_geom_path, batch_id, aux=False)
sub_dataset=torch.utils.data.Subset(dataset, np.arange(16000))
train=torch.utils.data.Subset(dataset, np.arange(12000))
test=torch.utils.data.Subset(dataset, np.arange(12000, 16000))
torch.manual_seed(42)

#Initalize the NN, optimizer, dataloader. 
model = NNpredictor()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
train_dataloader = DataLoader(train, batch_size=batch_size, shuffle=False, num_workers=0, generator=torch.Generator(device=device))



In [None]:
for i in range(epoch):
    model=train_loop(train_dataloader, model, loss_fn, optimizer, i, learning_rate, batch_size)
    mean=validate_loop(model, test)
    print(f"Validation MAE: {mean:>5f}.")
