# Read the npz file and convert to txt file
# Then extract four momentum of the leading 100 particles within the jet

In [1]:
import pandas as pd
import numpy as np
import math

data_with_labels = np.load('/home/daohan/apps/qgtagging/QG_jets_1.npz')
data = data_with_labels['X']
labels = data_with_labels['y']

sorted_data = np.zeros_like(data)
for i in range(data.shape[0]):
    sorted_data[i] = data[i][np.argsort(-data[i,:,0])]
datax = sorted_data[:,0:100,:]

n_points = datax.shape[1]

# Save the data to a text file
with open('/home/daohan/apps/qgtagging/train_data1.txt', 'w') as f:
    for i, point_cloud in enumerate(datax):
        for point in point_cloud:
            row_str = ' '.join([str(x) for x in point])
            f.write(row_str)
            f.write('\n')
        if (i+1) % n_points == 0:  # Add a "#" after every point cloud
            f.write('#')
        if (i+1) != data.shape[0]*n_points:
            f.write('\n') # Add a newline after every point except for the last one
print(datax.shape)

with open("/home/daohan/apps/qgtagging/train_data1.txt", 'r') as oldf, open('/home/daohan/apps/qgtagging/train_data1y.txt', 'w') as newf:
    lines = oldf.readlines()
    for i, line in enumerate(lines, start=1):
        ls = line.split()
        if len(ls) == 4:
            if all(float(val) != 0 for val in ls[:2]):
                new_line = "   ".join(ls[:4]) + "   \n"
                newf.write(new_line)
        if i % 101 == 0 or len(ls) == 1:
            newf.write("#\n")

(100000, 100, 4)


# Normalize the PID information and extract (E,px,py,pz,pT,eta,phi,PID) information

In [None]:
import math
oldf= open("/home/daohan/apps/qgtagging/train_data1y.txt",'r')
newf= open('/home/daohan/apps/qgtagging/train_data1z.txt','w')
lines = oldf.readlines()
for line in lines:
    ls = line.split()
    if len(ls) == 4:
        pt = float(ls[0])
        eta = float(ls[1])
        phi = float(ls[2])
        if phi > 3.14159:
            phi = phi - math.pi
        px = pt*math.cos(phi)
        py = pt*math.sin(phi)
        pz = pt*math.sinh(eta)
        E = pt*math.cosh(eta)
        PID = float(ls[3])
        if PID == 22:
            PID = 0
        if PID == 211:
            PID = 0.1
        if PID == -211:
            PID = 0.2
        if PID == 321:
            PID = 0.3
        if PID == -321:
            PID = 0.4
        if PID == 130:
            PID = 0.5
        if PID == 2112:
            PID = 0.6
        if PID == -2112:
            PID = 0.7
        if PID == 2212:
            PID = 0.8
        if PID == -2212:
            PID = 0.9
        if PID == 11:
            PID = 1
        if PID == -11:
            PID = 1.1
        if PID == 13:
            PID = 1.2
        if PID == -13:
            PID = 1.3      
        newf.write(str(E))
        newf.write("   ")
        newf.write(str(px))
        newf.write("   ")
        newf.write(str(py))
        newf.write("   ")
        newf.write(str(pz))
        newf.write("   ")
        newf.write(str(pt))
        newf.write("   ")
        newf.write(str(eta))
        newf.write("   ")
        newf.write(str(phi))
        newf.write("   ")
        newf.write(str(PID))
        newf.write("\r\n")   
    if len(ls) == 1:
        newf.write("#")
        newf.write("\r\n") 
oldf.close()
newf.close()

# Preparation of the particle input features 

In [None]:
import math
oldf= open("/home/daohan/apps/qgtagging/train_data1z.txt",'r')
newf= open('/home/daohan/apps/qgtagging/train_data1a.txt','w')
lines = oldf.readlines()
PTJ = []
ETAJ = []
PHIJ = []
EJ = []
decimal = 5
i = 0
ptsum = 0
pxsum = 0
pysum = 0
pzsum = 0
etasum = 0
phisum = 0
Esum = 0
for line in lines:
    ls = line.split()
    if len(ls) == 8:
        pxsum = pxsum + float(ls[1])
        pysum = pysum + float(ls[2])
        pzsum = pzsum + float(ls[3])
        Esum = Esum + float(ls[0])
    if len(ls) == 1:
        ptsum = (pxsum*pxsum+pysum*pysum)**0.5
        etasum = -math.log(ptsum/(pzsum+(ptsum*ptsum+pzsum*pzsum)**0.5))
        phisum = math.acos(pxsum/ptsum)
        PTJ.append(ptsum)
        ETAJ.append(etasum)
        PHIJ.append(phisum)
        EJ.append(Esum)
        ptsum = 0
        etasum = 0
        phisum = 0
        Esum = 0
        pxsum = 0
        pysum = 0
        pzsum = 0
for line in lines:
    ls = line.split()
    if len(ls) == 8:    
        PID = float(ls[7])
        px = round(float(ls[1]),decimal)
        py = round(float(ls[2]),decimal)
        pz = round(float(ls[3]),decimal)
        E = round(float(ls[0]),decimal)
        pt = round(float(ls[4]),decimal)
        eta = round(float(ls[5]),decimal)
        if eta > 3.14159:
            eta = eta - 3.14159
        phi = round(float(ls[6]),decimal)
        ptc = round(pt/PTJ[i],decimal)
        Ec = round(E/EJ[i],decimal)
        delta_eta = round(eta - ETAJ[i],decimal)
        delta_phi = round(phi - PHIJ[i],decimal)
        delta_R = round((delta_eta*delta_eta+delta_phi*delta_phi)**0.5,decimal)
        newf.write(str(E))
        newf.write("   ")
        newf.write(str(px))
        newf.write("   ")
        newf.write(str(py))
        newf.write("   ")
        newf.write(str(pz))
        newf.write("   ")
        newf.write(str(pt))
        newf.write("   ")
        newf.write(str(eta))
        newf.write("   ")
        newf.write(str(phi))
        newf.write("   ")
        newf.write(str(ptc))
        newf.write("   ")
        newf.write(str(Ec))
        newf.write("   ")
        newf.write(str(delta_eta))
        newf.write("   ")
        newf.write(str(delta_phi))
        newf.write("   ")
        newf.write(str(delta_R))
        newf.write("   ")
        newf.write(str(PID))
        newf.write("   ")
        newf.write("\r\n")   
    if len(ls) == 1:
        i = i + 1
        newf.write("#")
        newf.write("\r\n")
oldf.close()
newf.close()

# Preparation of the jet input features 

In [None]:
import math
import numpy as np
oldf= open("/home/daohan/apps/qgtagging/train_data1a.txt",'r')
lines = oldf.readlines()
EJ_S = []
PTJ_S = []
PXJ_S = []
PYJ_S = []
PZJ_S = []
PTF_S = []
EF_S = []

EJPID = []
PXJPID = []
PYJPID = []
PZJPID = []
PTJPID = []
PTFPID = []
EFPID = []
ETAPID = []
PHIPID = []
RPID = []
decimal = 5
i = 0
Esum = 0
pxsum = 0
pysum = 0
pzsum = 0
ptsum = 0
drsum = 0
dphisum = 0
detasum = 0

Esum_1 = 0
pxsum_1 = 0
pysum_1 = 0
pzsum_1 = 0
ptsum_1 = 0
drsum_1 = 0
dphisum_1 = 0
detasum_1 = 0

Esum_2 = 0
pxsum_2 = 0
pysum_2 = 0
pzsum_2 = 0
ptsum_2 = 0
drsum_2 = 0
dphisum_2 = 0
detasum_2 = 0

Esum_3 = 0
pxsum_3 = 0
pysum_3 = 0
pzsum_3 = 0
ptsum_3 = 0
drsum_3 = 0
dphisum_3 = 0
detasum_3 = 0

Esum_4 = 0
pxsum_4 = 0
pysum_4 = 0
pzsum_4 = 0
ptsum_4 = 0
drsum_4 = 0
dphisum_4 = 0
detasum_4 = 0

Esum_5 = 0
pxsum_5 = 0
pysum_5 = 0
pzsum_5 = 0
ptsum_5 = 0
drsum_5 = 0
dphisum_5 = 0
detasum_5 = 0

Esum_6 = 0
pxsum_6 = 0
pysum_6 = 0
pzsum_6 = 0
ptsum_6 = 0
drsum_6 = 0
dphisum_6 = 0
detasum_6 = 0

Esum_7 = 0
pxsum_7 = 0
pysum_7 = 0
pzsum_7 = 0
ptsum_7 = 0
drsum_7 = 0
dphisum_7 = 0
detasum_7 = 0

Esum_8 = 0
pxsum_8 = 0
pysum_8 = 0
pzsum_8 = 0
ptsum_8 = 0
drsum_8 = 0
dphisum_8 = 0
detasum_8 = 0

Esum_9 = 0
pxsum_9 = 0
pysum_9 = 0
pzsum_9 = 0
ptsum_9 = 0
drsum_9 = 0
dphisum_9 = 0
detasum_9 = 0

Esum_10 = 0
pxsum_10 = 0
pysum_10 = 0
pzsum_10 = 0
ptsum_10 = 0
drsum_10 = 0
dphisum_10 = 0
detasum_10 = 0

Drsum = 0
Detasum = 0
Dphisum = 0

DrsumPID = 0
DetasumPID = 0
DphisumPID = 0

PXJ = []
PYJ = []
PZJ = []
PTJ = []
ETAJ = []
PHIJ = []
RJ = []
EJ = []

for line in lines:
    ls = line.split()
    if len(ls) == 13:
        pxsum = pxsum + float(ls[1])
        pysum = pysum + float(ls[2])
        pzsum = pzsum + float(ls[3])
        Esum = Esum + float(ls[0])
        detasum = detasum + float(ls[9])*float(ls[4])
        dphisum = dphisum + float(ls[10])*float(ls[4])
        drsum = drsum + float(ls[11])*float(ls[4])        
        if float(ls[12]) == 0:
            pxsum_1 = pxsum_1 + float(ls[1])
            pysum_1 = pysum_1 + float(ls[2])
            pzsum_1 = pzsum_1 + float(ls[3])
            Esum_1 = Esum_1 + float(ls[0])
            detasum_1 = detasum_1 + float(ls[9])*float(ls[4])
            dphisum_1 = dphisum_1 + float(ls[10])*float(ls[4])
            drsum_1 = drsum_1 + float(ls[11])*float(ls[4])
        if float(ls[12]) == 0.1:
            pxsum_2 = pxsum_2 + float(ls[1])
            pysum_2 = pysum_2 + float(ls[2])
            pzsum_2 = pzsum_2 + float(ls[3])
            Esum_2 = Esum_2 + float(ls[0])
            detasum_2 = detasum_2 + float(ls[9])*float(ls[4])
            dphisum_2 = dphisum_2 + float(ls[10])*float(ls[4])
            drsum_2 = drsum_2 + float(ls[11])*float(ls[4])
        if float(ls[12]) == 0.2:
            pxsum_3 = pxsum_3 + float(ls[1])
            pysum_3 = pysum_3 + float(ls[2])
            pzsum_3 = pzsum_3 + float(ls[3])
            Esum_3 = Esum_3 + float(ls[0])
            detasum_3 = detasum_3 + float(ls[9])*float(ls[4])
            dphisum_3 = dphisum_3 + float(ls[10])*float(ls[4])
            drsum_3 = drsum_3 + float(ls[11])*float(ls[4])
        if float(ls[12]) == 0.3:
            pxsum_4 = pxsum_4 + float(ls[1])
            pysum_4 = pysum_4 + float(ls[2])
            pzsum_4 = pzsum_4 + float(ls[3])
            Esum_4 = Esum_4 + float(ls[0])
            detasum_4 = detasum_4 + float(ls[9])*float(ls[4])
            dphisum_4 = dphisum_4 + float(ls[10])*float(ls[4])
            drsum_4 = drsum_4 + float(ls[11])*float(ls[4])
        if float(ls[12]) == 0.4:
            pxsum_5 = pxsum_5 + float(ls[1])
            pysum_5 = pysum_5 + float(ls[2])
            pzsum_5 = pzsum_5 + float(ls[3])
            Esum_5 = Esum_5 + float(ls[0])
            detasum_5 = detasum_5 + float(ls[9])*float(ls[4])
            dphisum_5 = dphisum_5 + float(ls[10])*float(ls[4])
            drsum_5 = drsum_5 + float(ls[11])*float(ls[4])
        if float(ls[12]) == 0.5:
            pxsum_6 = pxsum_6 + float(ls[1])
            pysum_6 = pysum_6 + float(ls[2])
            pzsum_6 = pzsum_6 + float(ls[3])
            Esum_6 = Esum_6 + float(ls[0])
            detasum_6 = detasum_6 + float(ls[9])*float(ls[4])
            dphisum_6 = dphisum_6 + float(ls[10])*float(ls[4])
            drsum_6 = drsum_6 + float(ls[11])*float(ls[4])
        if float(ls[12]) == 0.6:
            pxsum_7 = pxsum_7 + float(ls[1])
            pysum_7 = pysum_7 + float(ls[2])
            pzsum_7 = pzsum_7 + float(ls[3])
            Esum_7 = Esum_7 + float(ls[0])
            detasum_7 = detasum_7 + float(ls[9])*float(ls[4])
            dphisum_7 = dphisum_7 + float(ls[10])*float(ls[4])
            drsum_7 = drsum_7 + float(ls[11])*float(ls[4])
        if float(ls[12]) == 0.7:
            pxsum_8 = pxsum_8 + float(ls[1])
            pysum_8 = pysum_8 + float(ls[2])
            pzsum_8 = pzsum_8 + float(ls[3])
            Esum_8 = Esum_8 + float(ls[0])
            detasum_8 = detasum_8 + float(ls[9])*float(ls[4])
            dphisum_8 = dphisum_8 + float(ls[10])*float(ls[4])
            drsum_8 = drsum_8 + float(ls[11])*float(ls[4])
        if float(ls[12]) == 0.8:
            pxsum_9 = pxsum_9 + float(ls[1])
            pysum_9 = pysum_9 + float(ls[2])
            pzsum_9 = pzsum_9 + float(ls[3])
            Esum_9 = Esum_9 + float(ls[0])
            detasum_9 = detasum_9 + float(ls[9])*float(ls[4])
            dphisum_9 = dphisum_9 + float(ls[10])*float(ls[4])
            drsum_9 = drsum_9 + float(ls[11])*float(ls[4])
        if float(ls[12]) == 0.9:
            pxsum_10 = pxsum_10 + float(ls[1])
            pysum_10 = pysum_10 + float(ls[2])
            pzsum_10 = pzsum_10 + float(ls[3])
            Esum_10 = Esum_10 + float(ls[0])
            detasum_10 = detasum_10 + float(ls[9])*float(ls[4])
            dphisum_10 = dphisum_10 + float(ls[10])*float(ls[4])
            drsum_10 = drsum_10 + float(ls[11])*float(ls[4])


    if len(ls) == 1:
        
        ptsum = (pxsum*pxsum+pysum*pysum)**0.5
        Detasum = detasum/ptsum
        Dphisum = dphisum/ptsum
        Drsum = drsum/ptsum
        
        ptsum_1 = (pxsum_1*pxsum_1+pysum_1*pysum_1)**0.5        
        ptsum_2 = (pxsum_2*pxsum_2+pysum_2*pysum_2)**0.5        
        ptsum_3 = (pxsum_3*pxsum_3+pysum_3*pysum_3)**0.5        
        ptsum_4 = (pxsum_4*pxsum_4+pysum_4*pysum_4)**0.5       
        ptsum_5 = (pxsum_5*pxsum_5+pysum_5*pysum_5)**0.5        
        ptsum_6 = (pxsum_6*pxsum_6+pysum_6*pysum_6)**0.5    
        ptsum_7 = (pxsum_7*pxsum_7+pysum_7*pysum_7)**0.5        
        ptsum_8 = (pxsum_8*pxsum_8+pysum_8*pysum_8)**0.5        
        ptsum_9 = (pxsum_9*pxsum_9+pysum_9*pysum_9)**0.5        
        ptsum_10 = (pxsum_10*pxsum_10+pysum_10*pysum_10)**0.5        
        PXJ_S = [abs(pxsum_1),abs(pxsum_2),abs(pxsum_3),abs(pxsum_4),abs(pxsum_5),abs(pxsum_6),abs(pxsum_7),abs(pxsum_8),abs(pxsum_9),abs(pxsum_10)]
        PYJ_S = [abs(pysum_1),abs(pysum_2),abs(pysum_3),abs(pysum_4),abs(pysum_5),abs(pysum_6),abs(pysum_7),abs(pysum_8),abs(pysum_9),abs(pysum_10)]
        PZJ_S = [abs(pzsum_1),abs(pzsum_2),abs(pzsum_3),abs(pzsum_4),abs(pzsum_5),abs(pzsum_6),abs(pzsum_7),abs(pzsum_8),abs(pzsum_9),abs(pzsum_10)]
        PTJ_S = [ptsum_1,ptsum_2,ptsum_3,ptsum_4,ptsum_5,ptsum_6,ptsum_7,ptsum_8,ptsum_9,ptsum_10]
        EJ_S = [Esum_1,Esum_2,Esum_3,Esum_4,Esum_5,Esum_6,Esum_7,Esum_8,Esum_9,Esum_10]
        PTF_S = [ptsum_1/ptsum,ptsum_2/ptsum,ptsum_3/ptsum,ptsum_4/ptsum,ptsum_5/ptsum,ptsum_6/ptsum,ptsum_7/ptsum,ptsum_8/ptsum,ptsum_9/ptsum,ptsum_10/ptsum]
        EF_S = [Esum_1/Esum,Esum_2/Esum,Esum_3/Esum,Esum_4/Esum,Esum_5/Esum,Esum_6/Esum,Esum_7/Esum,Esum_8/Esum,Esum_9/Esum,Esum_10/Esum]
        if max(PTJ_S) == ptsum_1:        
            DetasumPID = detasum_1/ptsum_1
            DphisumPID = dphisum_1/ptsum_1
            DrsumPID = drsum_1/ptsum_1
        
        if max(PTJ_S) == ptsum_2:
            DetasumPID = detasum_2/ptsum_2
            DphisumPID = dphisum_2/ptsum_2
            DrsumPID = drsum_2/ptsum_2

        if max(PTJ_S) == ptsum_3:
            DetasumPID = detasum_3/ptsum_3
            DphisumPID = dphisum_3/ptsum_3
            DrsumPID = drsum_3/ptsum_3

        if max(PTJ_S) == ptsum_4:
            DetasumPID = detasum_4/ptsum_4
            DphisumPID = dphisum_4/ptsum_4
            DrsumPID = drsum_4/ptsum_4

        if max(PTJ_S) == ptsum_5:
            DetasumPID = detasum_5/ptsum_5
            DphisumPID = dphisum_5/ptsum_5
            DrsumPID = drsum_5/ptsum_5

        if max(PTJ_S) == ptsum_6:
            DetasumPID = detasum_6/ptsum_6
            DphisumPID = dphisum_6/ptsum_6
            DrsumPID = drsum_6/ptsum_6

        if max(PTJ_S) == ptsum_7:
            DetasumPID = detasum_7/ptsum_7
            DphisumPID = dphisum_7/ptsum_7
            DrsumPID = drsum_7/ptsum_7

        if max(PTJ_S) == ptsum_8:
            DetasumPID = detasum_8/ptsum_8
            DphisumPID = dphisum_8/ptsum_8
            DrsumPID = drsum_8/ptsum_8

        if max(PTJ_S) == ptsum_9:
            DetasumPID = detasum_9/ptsum_9
            DphisumPID = dphisum_9/ptsum_9
            DrsumPID = drsum_9/ptsum_9

        if max(PTJ_S) == ptsum_10:
            DetasumPID = detasum_10/ptsum_10
            DphisumPID = dphisum_10/ptsum_10
            DrsumPID = drsum_10/ptsum_10
        
        PTJ.append(ptsum)
        EJ.append(Esum)
        PXJ.append(pxsum)
        PYJ.append(pysum)
        PZJ.append(pzsum)
        ETAJ.append(Detasum)
        PHIJ.append(Dphisum)
        RJ.append(Drsum)
        EJPID.append(max(EJ_S))
        PXJPID.append(max(PXJ_S))
        PYJPID.append(max(PYJ_S))
        PZJPID.append(max(PZJ_S))
        PTJPID.append(max(PTJ_S))
        PTFPID.append(max(PTF_S))
        EFPID.append(max(EF_S))
        ETAPID.append(DetasumPID)
        PHIPID.append(DphisumPID)
        RPID.append(DrsumPID)
        
        ptsum = 0
        detasum = 0
        dphisum = 0
        drsum = 0
        Esum = 0
        pxsum = 0
        pysum = 0
        pzsum = 0

        Esum_1 = 0
        pxsum_1 = 0
        pysum_1 = 0
        pzsum_1 = 0
        ptsum_1 = 0
        drsum_1 = 0
        dphisum_1 = 0
        detasum_1 = 0

        Esum_2 = 0
        pxsum_2 = 0
        pysum_2 = 0
        pzsum_2 = 0
        ptsum_2 = 0
        drsum_2 = 0
        dphisum_2 = 0
        detasum_2 = 0

        Esum_3 = 0
        pxsum_3 = 0
        pysum_3 = 0
        pzsum_3 = 0
        ptsum_3 = 0
        drsum_3 = 0
        dphisum_3 = 0
        detasum_3 = 0

        Esum_4 = 0
        pxsum_4 = 0
        pysum_4 = 0
        pzsum_4 = 0
        ptsum_4 = 0
        drsum_4 = 0
        dphisum_4 = 0
        detasum_4 = 0

        Esum_5 = 0
        pxsum_5 = 0
        pysum_5 = 0
        pzsum_5 = 0
        ptsum_5 = 0
        drsum_5 = 0
        dphisum_5 = 0
        detasum_5 = 0

        Esum_6 = 0
        pxsum_6 = 0
        pysum_6 = 0
        pzsum_6 = 0
        ptsum_6 = 0
        drsum_6 = 0
        dphisum_6 = 0
        detasum_6 = 0

        Esum_7 = 0
        pxsum_7 = 0
        pysum_7 = 0
        pzsum_7 = 0
        ptsum_7 = 0
        drsum_7 = 0
        dphisum_7 = 0
        detasum_7 = 0

        Esum_8 = 0
        pxsum_8 = 0
        pysum_8 = 0
        pzsum_8 = 0
        ptsum_8 = 0
        drsum_8 = 0
        dphisum_8 = 0
        detasum_8 = 0

        Esum_9 = 0
        pxsum_9 = 0
        pysum_9 = 0
        pzsum_9 = 0
        ptsum_9 = 0
        drsum_9 = 0
        dphisum_9 = 0
        detasum_9 = 0

        Esum_10 = 0
        pxsum_10 = 0
        pysum_10 = 0
        pzsum_10 = 0
        ptsum_10 = 0
        drsum_10 = 0
        dphisum_10 = 0
        detasum_10 = 0

        DrsumPID = 0
        DetasumPID = 0
        DphisumPID = 0
        DrsumPID = 0
        DetasumPID = 0
        DphisumPID = 0
EJ = np.array(EJ)
PXJ = np.array(PXJ)
PYJ = np.array(PYJ)
PZJ = np.array(PZJ)
PTJ = np.array(PTJ)
PTFJ = np.ones(100000)
EFJ = np.ones(100000)
ETAJ = np.array(ETAJ)
PHIJ = np.array(PHIJ)
RJ = np.array(RJ)
EJPID = np.array(EJPID)
PXJPID = np.array(PXJPID)
PYJPID = np.array(PYJPID)
PZJPID = np.array(PZJPID)
PTJPID = np.array(PTJPID)
PTFPID = np.array(PTFPID)
EFPID = np.array(EFPID)
ETAPID = np.array(ETAPID)
PHIPID = np.array(PHIPID)
RPID = np.array(RPID)
print(EJ[5])
final = np.column_stack((EJ,PXJ,PYJ,PZJ,PTJ,PTFJ,EFJ,ETAJ,PHIJ,RJ,EJPID,PXJPID,PYJPID,PZJPID,PTJPID,PTFPID,EFPID,ETAPID,PHIPID,RPID))
print(final.shape)
np.save('/home/daohan/apps/qgtagging/train/train_jet_information1.npy', final)
 
oldf.close()

# Convert to numpy format

In [None]:
import numpy as np
import os   #python os模块

# Read data file
data_file_path = "/home/daohan/apps/qgtagging/train/train_data1a.txt"
data = []
point_counts = []
with open(data_file_path) as f:
    points = []
    for line in f:
        if line.strip() == "#":  # 找到分隔符号
            data.append(np.array(points))
            point_counts.append(len(points))
            points = []
        else:
            points.append([float(x) for x in line.strip().split()])

# Read label file
label_file_path = "/home/daohan/apps/qgtagging/train/train_label1.txt"
labels = []
with open(label_file_path) as f:
    for line in f:
        labels.append(float(line.strip()))

# Convert to numpy format
labels = np.array(labels)

# Flatten each point cloud into a 2-D array
#max_point_count = max(point_counts)
max_point_count = 100
data_flat = np.zeros((len(data), max_point_count, len(data[0][0])))
for i in range(len(data)):
    data_flat[i][:point_counts[i]] = data[i]
np.savez('/home/daohan/apps/qgtagging/train/train_data_with_labels1.npz',data=data_flat, labels=labels)
print("data shape:", data_flat.shape)
print("labels shape:", labels.shape)
print("data type:", data_flat.dtype)
print("labels type:", labels.dtype)

# Calculate the particle interaction matrix

In [None]:
import numpy as np
data = np.load('/home/daohan/apps/qgtagging/train/train_data_with_labels1.npz')
data = data['data']
print(data.shape)
point_cloud = data[:10000]


deltaR = ((point_cloud[:, :, 7][:, :, np.newaxis] - point_cloud[:, :, 7][:, np.newaxis, :])**2 + (point_cloud[:, :, 8][:, :, np.newaxis] - point_cloud[:, :, 8][:, np.newaxis, :])**2)**0.5
mass = np.log(abs((np.exp(point_cloud[:, :, 0][:, :, np.newaxis]) + np.exp(point_cloud[:, :, 0][:, np.newaxis, :]))**2 - (np.exp(point_cloud[:, :, 1][:, :, np.newaxis])+np.exp(point_cloud[:, :, 1][:,np.newaxis,:]))**2- (np.exp(point_cloud[:, :, 2][:, :, np.newaxis])+np.exp(point_cloud[:, :, 2][:,np.newaxis,:]))**2- (np.exp(point_cloud[:, :, 3][:, :, np.newaxis])+np.exp(point_cloud[:, :, 3][:,np.newaxis,:]))**2))
kt = np.log(abs((((point_cloud[:, :, 7][:, :, np.newaxis] - point_cloud[:, :, 7][:, np.newaxis, :])**2 + (point_cloud[:, :, 8][:, :, np.newaxis] - point_cloud[:, :, 8][:, np.newaxis, :])**2)**0.5)*np.exp(np.minimum(point_cloud[:, :, 4][:, :, np.newaxis],point_cloud[:, :, 4][:,np.newaxis,:]))))
z = np.exp(np.minimum(point_cloud[:, :, 4][:, :, np.newaxis],point_cloud[:, :, 4][:,np.newaxis,:]))/(np.exp(point_cloud[:, :, 4][:, :, np.newaxis])+np.exp(point_cloud[:, :, 4][:,np.newaxis,:]))
ptd = np.log(abs(np.exp(point_cloud[:, :, 4][:, :, np.newaxis]) - np.exp(point_cloud[:, :, 4][:,np.newaxis,:])))

feature_values = np.concatenate((deltaR[:, :, :, np.newaxis], mass[:, :, :, np.newaxis],kt[:, :, :, np.newaxis],z[:, :, :, np.newaxis],ptd[:, :, :, np.newaxis]), axis=3)
new_dim = np.zeros((10000,150,150,1))
new_feature = np.concatenate([feature_values, new_dim], axis=-1)
for i in range(150):
    for j in range(150):
        mask = np.equal(point_cloud[:, i, 10], point_cloud[:, j, 10])
        new_feature[:, i, j, 5] = mask.astype(int)
            
for i in range(150):
    new_feature[:, i, i, :] = 1

has_nan = np.isnan(new_feature).any()

print(new_feature[1,2,5])
print(new_feature.shape)
np.save('/home/daohan/apps/qgtagging/train/interact1a.npy', new_feature)


# Calculate the jet interaction matrix

In [None]:
import numpy as np
a = np.load('/home/daohan/apps/qgtagging/train/train_jet_information1.npy')
b = np.zeros((100000,11,11))
for i in range(11):
    b[:,i,i] = 1
for i in range(7,10):
    for j in range(0,5):
        b[:,i,j] = 0
for i in range(0,5):
    for j in range(7,10):
        b[:,i,j] = 0
#first column        
b[:,0,1] = a[:,1]/a[:,0]
b[:,0,2] = a[:,2]/a[:,0]
b[:,0,3] = a[:,3]/a[:,0]
b[:,0,4] = a[:,4]/a[:,0]
b[:,0,6] = 1
b[:,0,10] = np.log(a[:,10])
#second column
b[:,1,0] = a[:,1]/a[:,0]
b[:,1,4] = a[:,1]/a[:,4]
b[:,1,10] = np.log(a[:,11])
#third column
b[:,2,0] = a[:,2]/a[:,0]
b[:,2,4] = a[:,2]/a[:,4]
b[:,2,10] = np.log(a[:,12])
#fourth column
b[:,3,0] = a[:,3]/a[:,0]
b[:,3,10] = np.log(a[:,13])
#fifth column
b[:,4,0] = a[:,4]/a[:,0]
b[:,4,1] = a[:,1]/a[:,4]
b[:,4,2] = a[:,2]/a[:,4]
b[:,4,5] = 1
b[:,4,10] = np.log(a[:,14])
#sixth column
b[:,5,4] = 1
b[:,5,10] = a[:,15]
#seventh column
b[:,6,0] = 1
b[:,6,10] = a[:,16]


#eighth column
b[:,7,9] = a[:,7]/a[:,9]
b[:,7,10] = a[:,17]
#nineth column
b[:,8,9] = a[:,8]/a[:,9]
b[:,8,10] = a[:,18]
#tenth column
b[:,9,7] = a[:,7]/a[:,9]
b[:,9,8] = a[:,8]/a[:,9]
b[:,9,10] = a[:,19]
#eleventh column
b[:,10,0] = np.log(a[:,10])
b[:,10,1] = np.log(a[:,11])
b[:,10,2] = np.log(a[:,12])
b[:,10,3] = np.log(a[:,13])
b[:,10,4] = np.log(a[:,14])
b[:,10,5] = a[:,15]
b[:,10,6] = a[:,16]
b[:,10,7] = a[:,17]
b[:,10,8] = a[:,18]
b[:,10,9] = a[:,19]


print(np.count_nonzero(np.isnan(b)))


b = np.nan_to_num(b,nan=0)

print(b.shape)
np.save('/home/daohan/apps/qgtagging/train/interact1.npy', b)
