In [28]:
import pandas as pd
import torch
import numpy as np
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import time

## Define ManyToOneRNN class

In [72]:
class ManyToOneRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(ManyToOneRNN, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True,nonlinearity='relu')
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # Initialize hidden state with zeros
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        # Forward propagate the RNN
        out, _ = self.rnn(x,h0)

        # Decode the hidden state of the last time step
        out = self.fc(out[:, -1, :])
        return out

## Define MyDataset Class (not needed)

In [29]:
class MyDataset(Dataset):
    def __init__(self, dataframe):
        self.data = dataframe

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        evid = self.data.iloc[idx,0]    #added to include evid in output file
        features = torch.tensor(self.data.iloc[idx, 1:-1], dtype=torch.float32)
        features = features.t()
        target = torch.tensor(self.data.iloc[idx, -1], dtype=torch.float32)
        return evid, features, target   #added evid as return value

## Load model

In [73]:
model = torch.load('/Users/juhe/annie/analysis/Muon_vertex/model.pth')
model.eval()

ManyToOneRNN(
  (rnn): RNN(2, 4, batch_first=True)
  (fc): Linear(in_features=4, out_features=1, bias=True)
)

## Load data (needs to be in Tensor format)

In [36]:
# h5 format
data = pd.read_hdf('/Users/juhe/annie/analysis/Muon_vertex/data.h5', 'df')
data.head(5)

Unnamed: 0,id,ai,eta,truetracklen
0,p0_66,"[70, 100, 115, 130, 145, 160, 175, 190, 205, 2...","[1847.96, 1125.22, 1636.88, 1500.98, 2160.9, 1...",147.972
1,p100_11,"[70, 100, 115, 130, 145, 175, 205, 220, 235, 2...","[3902.38, 1059.4, 3823.62, 1585.48, 3049.67, 1...",188.655
2,p102_48,"[70, 100, 115, 130, 145, 160, 175, 190, 205, 2...","[921.235, 1818.78, 1769.22, 1853.33, 2079.88, ...",176.992
3,p104_61,"[70, 100, 115, 130, 145, 160, 175, 190, 205, 2...","[2840.03, 1991.47, 2949.01, 1859.28, 2389.12, ...",303.262
4,p104_97,"[55, 70, 85, 100, 115, 130, 160, 175, 190, 205...","[2915.57, 2026.21, 1047.48, 109.152, 93.3911, ...",125.716


In [50]:
#data.iloc[2,1:-1]   # the first number selects just the 2nd row; the second number selects the columns
data.iloc[:,1:]   # prints all rows

Unnamed: 0,ai,eta
0,"[70, 100, 115, 130, 145, 160, 175, 190, 205, 2...","[1847.96, 1125.22, 1636.88, 1500.98, 2160.9, 1..."
1,"[70, 100, 115, 130, 145, 175, 205, 220, 235, 2...","[3902.38, 1059.4, 3823.62, 1585.48, 3049.67, 1..."
2,"[70, 100, 115, 130, 145, 160, 175, 190, 205, 2...","[921.235, 1818.78, 1769.22, 1853.33, 2079.88, ..."
3,"[70, 100, 115, 130, 145, 160, 175, 190, 205, 2...","[2840.03, 1991.47, 2949.01, 1859.28, 2389.12, ..."
4,"[55, 70, 85, 100, 115, 130, 160, 175, 190, 205...","[2915.57, 2026.21, 1047.48, 109.152, 93.3911, ..."
...,...,...
1203,"[70, 85, 100, 115, 130, 145, 175, 190, 205, 22...","[3255.71, 2179.67, 1000.26, 3099.8, 1554.33, 3..."
1204,"[55, 70, 85, 100, 115, 130, 145, 160, 175, 190...","[7974.39, 3223.17, 8453.17, 5943.81, 3536.89, ..."
1205,"[55, 70, 85, 100, 115, 130, 145, 160, 175, 190...","[5206.06, 2499.49, 1726.9, 2406.31, 2801.53, 3..."
1206,"[55, 85, 100, 115, 145, 160, 175, 190, 205, 22...","[4837.46, 3448.8, 2344.75, 2591.72, 1900.58, 1..."


In [39]:
# drop truth col
data = data.drop(columns=['truetracklen'])
data.head(5)

Unnamed: 0,id,ai,eta
0,p0_66,"[70, 100, 115, 130, 145, 160, 175, 190, 205, 2...","[1847.96, 1125.22, 1636.88, 1500.98, 2160.9, 1..."
1,p100_11,"[70, 100, 115, 130, 145, 175, 205, 220, 235, 2...","[3902.38, 1059.4, 3823.62, 1585.48, 3049.67, 1..."
2,p102_48,"[70, 100, 115, 130, 145, 160, 175, 190, 205, 2...","[921.235, 1818.78, 1769.22, 1853.33, 2079.88, ..."
3,p104_61,"[70, 100, 115, 130, 145, 160, 175, 190, 205, 2...","[2840.03, 1991.47, 2949.01, 1859.28, 2389.12, ..."
4,p104_97,"[55, 70, 85, 100, 115, 130, 160, 175, 190, 205...","[2915.57, 2026.21, 1047.48, 109.152, 93.3911, ..."


In [60]:
# not needed
# dataS = MyDataset(data)
# dataloader = DataLoader(dataS)

# convert data into tensor
dataT = torch.tensor(data.iloc[:,1:].values).t()   # this line with .values doesn't work
dataT.unsqueeze_(0)
print(dataT.size())
print(dataT)

TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint8, and bool.

In [63]:
data_r2633 = pd.read_csv('/Users/juhe/annie/analysis/ev_ai_eta_r2633.txt', header=None, names=['evid','ai','eta'])
data_r2633.head(5)

Unnamed: 0,evid,ai,eta
0,p1_884,70,2230.38
1,p1_884,85,2646.42
2,p1_884,115,882.544
3,p1_884,130,1309.77
4,p1_884,145,3528.67


In [67]:
data_r2633 = data_r2633.groupby('evid').agg(list).reset_index()
data_r2633.head(5)

Unnamed: 0,evid,ai,eta
0,p104_1374,"[70, 85, 115, 130, 145, 160, 175, 190, 205, 22...","[1340.04, 1005.23, 1941.67, 2167.4, 1567.54, 2..."
1,p105_3956,"[70, 85, 100, 130, 145, 160, 175, 190, 205, 22...","[1494.16, 1346.65, 1878.08, 1374.96, 2435.83, ..."
2,p106_2078,"[355, 370]","[91.1868, 149.54]"
3,p106_2267,"[70, 85, 115, 130, 145, 160, 190, 205, 220, 23...","[3378.22, 975.051, 2383.83, 2210.3, 3012.36, 2..."
4,p118_1648,"[100, 115, 130]","[28.5719, 115.743, 64.9081]"


In [70]:
# open file to write to
out_f = open("fitbyeye_r2633_RNN.txt", "a")

for idx in range(len(data_r2633)):
    dataT = torch.tensor(data_r2633.iloc[idx,1:]).t()
    dataT.unsqueeze_(0)
    out = model(dataT)
    print(data_r2633.iloc[idx,0], out)
    out_f.write(str(data_r2633.iloc[idx,0]) + "," + str(out.data.numpy()[0][0]) + "\n")

p104_1374 tensor([[196.0140]], grad_fn=<AddmmBackward0>)
p105_3956 tensor([[185.3026]], grad_fn=<AddmmBackward0>)
p106_2078 tensor([[37.6015]], grad_fn=<AddmmBackward0>)
p106_2267 tensor([[244.0697]], grad_fn=<AddmmBackward0>)
p118_1648 tensor([[26.6526]], grad_fn=<AddmmBackward0>)
p118_1876 tensor([[56.9690]], grad_fn=<AddmmBackward0>)
p11_513 tensor([[235.3429]], grad_fn=<AddmmBackward0>)
p120_3869 tensor([[105.4688]], grad_fn=<AddmmBackward0>)
p121_735 tensor([[268.9644]], grad_fn=<AddmmBackward0>)
p123_1101 tensor([[154.5600]], grad_fn=<AddmmBackward0>)
p123_3892 tensor([[162.4580]], grad_fn=<AddmmBackward0>)
p124_2066 tensor([[113.1103]], grad_fn=<AddmmBackward0>)
p127_298 tensor([[98.2866]], grad_fn=<AddmmBackward0>)
p128_1711 tensor([[219.7200]], grad_fn=<AddmmBackward0>)
p128_2764 tensor([[151.7610]], grad_fn=<AddmmBackward0>)
p129_3029 tensor([[191.5911]], grad_fn=<AddmmBackward0>)
p135_3460 tensor([[205.4232]], grad_fn=<AddmmBackward0>)
p140_185 tensor([[136.7501]], grad_fn=<

p82_689 tensor([[130.3162]], grad_fn=<AddmmBackward0>)
p83_3364 tensor([[-30.3370]], grad_fn=<AddmmBackward0>)
p84_2874 tensor([[93.1781]], grad_fn=<AddmmBackward0>)
p85_4055 tensor([[195.6964]], grad_fn=<AddmmBackward0>)
p87_471 tensor([[152.5490]], grad_fn=<AddmmBackward0>)
p91_2450 tensor([[188.8239]], grad_fn=<AddmmBackward0>)
p97_151 tensor([[89.2594]], grad_fn=<AddmmBackward0>)
p97_2964 tensor([[138.5030]], grad_fn=<AddmmBackward0>)


In [71]:
out_f.close()

In [49]:
model(dataT)

torch.Size([1, 1])

In [21]:
# using MyDataset and DataLoader
for evid, data, target in dataloader:
    out = model(data)
    print(evid,out)

AttributeError: 'Tensor' object has no attribute 'iloc'