In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd 

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split

In [2]:
#import data
bhbh = pd.read_csv('BHBH.csv')

#fill na
bhbh.fillna(0)

Unnamed: 0.1,Unnamed: 0,name,Mass_0,Z_0,Mass_1,a,e,Mass_0_out,Mass_1_out,GWtime,BWorldtime,RLO,CE,tdelay,Mass_max_out,q,Mass_chirp
0,0,0_892687128228499,34.044,0.0001,26.514,1850.0,0.0535,33.47953,10.136270,1.063429e+11,6.826057,0.0,2.0,1.063429e+11,10.136270,0.302760,15.504785
1,1,0_853530625884132,72.066,0.0001,27.308,2980.0,0.6550,35.86105,6.408674,5.383296e+05,6.673187,1.0,1.0,5.383363e+05,6.408674,0.178708,12.348916
2,2,0_663761388951870,45.129,0.0001,43.875,8900.0,0.8860,44.50211,43.256140,2.696385e+10,4.636688,0.0,0.0,2.696385e+10,43.256140,0.972002,38.194377
3,3,0_288577009013924,91.090,0.0001,53.664,7730.0,0.6940,38.06668,53.322640,1.109533e+11,4.094508,1.0,0.0,1.109533e+11,38.066680,0.713893,39.110638
4,4,0_736350695048425,53.827,0.0001,38.009,15200.0,0.0196,53.10823,37.424980,4.710726e+13,5.119633,0.0,0.0,4.710726e+13,37.424980,0.704693,38.693005
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81888,331,0_133330331314376,24.346,0.0100,24.215,7570.0,0.0716,20.10294,20.050940,5.588582e+13,7.316824,0.0,0.0,5.588582e+13,20.050940,0.997413,17.477974
81889,332,0_386148050098142,119.702,0.0100,84.740,81600.0,0.3620,41.13939,34.333730,1.901373e+18,3.272379,0.0,0.0,1.901373e+18,34.333730,0.834571,32.691048
81890,333,0_272337824410863,56.315,0.0100,34.050,11500.0,0.6130,23.80512,22.512770,2.610532e+14,5.458789,0.0,0.0,2.610532e+14,22.512770,0.945711,20.151614
81891,334,0_862589532557700,104.790,0.0100,44.480,93300.0,0.6480,38.47061,20.186010,1.457729e+18,4.544498,0.0,0.0,1.457729e+18,20.186010,0.524713,24.012884


In [3]:
#add column label
n_bins = 20

# Define the edges between bins
counts, bin_edges = np.histogram(bhbh.q.values, bins=n_bins, range=(0.0,1.0))
weight = [sum(counts)/count if count != 0 else 0 for count in counts]
#weight = weight/max(weight)
weight = weight/sum(weight)
print(weight)
weight = torch.from_numpy(np.array(weight))

# pd.cut each column, with each bin closed on left and open on right
bhbh['labels'] = pd.cut(bhbh['q'].values, bins=bin_edges, right=False, labels=False)

bhbh.head()

print('bin_edges:', len(bin_edges))

bhbh.head()

[0.00000000e+00 9.82967344e-01 8.69882605e-03 2.23910557e-03
 1.43289700e-03 1.00200545e-03 7.49212914e-04 5.69836141e-04
 4.20971025e-04 3.19768166e-04 2.88938079e-04 2.29129917e-04
 1.97185024e-04 1.71159210e-04 1.52539935e-04 1.41985749e-04
 1.34560896e-04 1.16272456e-04 9.38124970e-05 7.44503025e-05]
bin_edges: 21


Unnamed: 0.1,Unnamed: 0,name,Mass_0,Z_0,Mass_1,a,e,Mass_0_out,Mass_1_out,GWtime,BWorldtime,RLO,CE,tdelay,Mass_max_out,q,Mass_chirp,labels
0,0,0_892687128228499,34.044,0.0001,26.514,1850.0,0.0535,33.47953,10.13627,106342900000.0,6.826057,,2.0,106342900000.0,10.13627,0.30276,15.504785,6
1,1,0_853530625884132,72.066,0.0001,27.308,2980.0,0.655,35.86105,6.408674,538329.6,6.673187,1.0,1.0,538336.3,6.408674,0.178708,12.348916,3
2,2,0_663761388951870,45.129,0.0001,43.875,8900.0,0.886,44.50211,43.25614,26963850000.0,4.636688,,,26963850000.0,43.25614,0.972002,38.194377,19
3,3,0_288577009013924,91.09,0.0001,53.664,7730.0,0.694,38.06668,53.32264,110953300000.0,4.094508,1.0,,110953300000.0,38.06668,0.713893,39.110638,14
4,4,0_736350695048425,53.827,0.0001,38.009,15200.0,0.0196,53.10823,37.42498,47107260000000.0,5.119633,,,47107260000000.0,37.42498,0.704693,38.693005,14


## Dataset

In [4]:
class BHBHdatset(torch.utils.data.Dataset):
    def __init__(self, df, n_bins, input_cols, target_cols, scale_data=True):
        for i in input_cols:
            if i != 'Z_0':
                df[i] = (df[i] - df[i].mean())/df[i].std()
            
        self.X = torch.from_numpy(df[input_cols].values)
        
        #label one-hot encoding
        y = torch.from_numpy(df[target_cols].values)
        t = torch.zeros(len(self.X), n_bins)
        for k in range(len(t)):
            t[k, y[k].item()] = 1
        self.y = t
            
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, i):
        return self.X[i], self.y[i]


## Dataloader

In [5]:
data = bhbh
dataset = BHBHdatset(data,
                     n_bins,
                     ['Mass_0','Z_0', 'Mass_1', 'a', 'e'],
                     ['labels'])
#trainloader
trainloader = torch.utils.data.DataLoader(dataset,
                                          batch_size=500,
                                          shuffle=True)

In [6]:
for i, x in enumerate(trainloader):
    if i < 2:
        print(i, x[1])

0 tensor([[0., 0., 0.,  ..., 1., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 1.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 1.],
        [0., 0., 0.,  ..., 0., 0., 0.]])
1 tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 1., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])


## Network

In [7]:
class Net(nn.Module):
    def __init__(self, n_bins):
        
        super(Net, self).__init__()
        
        self.in1 = nn.Linear(5, 10)
        
        self.in2 = nn.Linear(10, 20)
        
        self.in3 = nn.Linear(20, 40)
        
        self.output = nn.Linear(40, n_bins)
        
    # x represents our data
    def forward(self, x):
        x = self.in1(x)
        x = torch.relu(x)
        x = self.in2(x)
        x = torch.relu(x)
        x = self.in3(x)
        x = torch.relu(x)
        
        x = self.output(x)
        #x = F.softmax(x)
        #prediction = torch.argmax(x)
        
        return x  #, prediction

## Training

In [8]:
device = "cuda" # device cardinal

weight = weight.to(device)
net = Net(n_bins).to(device)
loss_function = nn.CrossEntropyLoss(weight=weight)
optimizer = torch.optim.Adam(net.parameters(), lr=1e-4)

In [None]:
# Define the latent CG dimension, feel free to set your own, it only needs to be smaller than the number of total atoms 

#Liste contenenti i valori delle loss per ogni epoca
loss_log = []

#Ciclo del training, 600 epoche
for epoch in range(50):

    loss_epoch = 0.0  

    #Ciclo sulle batch
    for i, t in enumerate(trainloader):
        
        # recenter xyz 
        x = t[0].to(device)
        y = t[1].to(device)

        # encode and decode coordinates 
        x_distribution = net(x.float())
        
        #loss
        loss = loss_function(x_distribution, y)

        #parameter update
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        #losses update
        loss_epoch += loss.item()

    #losses normalization
    loss_epoch = loss_epoch/x.shape[0]

    #Vengono aggiunti i valori delle loss alle liste
    loss_log.append(loss_epoch)
    
    if epoch %10 == 0:
        print(loss_epoch, loss.item())

0.00028597464690111317 0.0005779783566616171
0.0002723425250223559 0.000633535937115127
0.00025398477228123106 0.0005298533901039662
0.00024856843017543315 0.0005294453092737371
0.00024493798210919164 0.0005636260963391666


In [None]:
net.eval()

In [None]:
dataset[:][0].shape

In [None]:
_ = net(dataset[:][0].to(device).float())

In [None]:
label = torch.argmax(_, dim=1).detach().cpu().numpy()

label

In [None]:
plt.hist(label, bins=n_bins)

In [None]:
plt.hist(bhbh['labels'], bins=n_bins)