# Task 1: Quantum Computing

In [None]:
import cirq
import numpy as np

# Define the circuit
circuit = cirq.Circuit()

# Define the qubits
qubits = [cirq.LineQubit(i) for i in range(5)]

# Add Hadamard gates to every qubit
for qubit in qubits:
    circuit.append(cirq.H(qubit))

# Add CNOT gates to the specified pairs of qubits
circuit.append(cirq.CNOT(qubits[0], qubits[1]))
circuit.append(cirq.CNOT(qubits[1], qubits[2]))
circuit.append(cirq.CNOT(qubits[2], qubits[3]))
circuit.append(cirq.CNOT(qubits[3], qubits[4]))

# Add SWAP gate between qubits 0 and 4
circuit.append(cirq.SWAP(qubits[0], qubits[4]))

# Add X rotation gate with pi/2 on qubit 3
circuit.append(cirq.rx(0.5 * np.pi).on(qubits[3]))

# Print the circuit
print(circuit)

                          ┌─────────┐
0: ───H───@────────────────×────────────
          │                │
1: ───H───X───@────────────┼────────────
              │            │
2: ───H───────X───@────────┼────────────
                  │        │
3: ───H───────────X───@────┼Rx(0.5π)────
                      │    │
4: ───H───────────────X────×────────────
                          └─────────┘


In [None]:
from qiskit import QuantumCircuit, execute, Aer

# Create a quantum circuit with 4 qubits
circuit = QuantumCircuit(4, 1)

# Apply Hadamard gate to the first qubit
circuit.h(0)

# Rotate the second qubit by pi/3 around X
circuit.rx(2/3 * 3.14159, 1)

# Apply Hadamard gate to the third and fourth qubit
circuit.h(2)
circuit.h(3)

# Perform swap test between qubits 1 and 2, and qubits 3 and 4
circuit.h(0)
circuit.cx(0, 3)
circuit.cx(1, 2)
circuit.h(0)
circuit.measure(0, 0)

# Draw the circuit
print(circuit.draw())

         ┌───┐     ┌───┐     ┌───┐┌─┐
q_0: ────┤ H ├─────┤ H ├──■──┤ H ├┤M├
     ┌───┴───┴────┐└───┘  │  └───┘└╥┘
q_1: ┤ Rx(2.0944) ├──■────┼────────╫─
     └───┬───┬────┘┌─┴─┐  │        ║ 
q_2: ────┤ H ├─────┤ X ├──┼────────╫─
         ├───┤     └───┘┌─┴─┐      ║ 
q_3: ────┤ H ├──────────┤ X ├──────╫─
         └───┘          └───┘      ║ 
c: 1/══════════════════════════════╩═
                                   0 


In [None]:
!pip install energyflow



# Task 2 : Classical Graph Neural Network, Part 1:Using GCN's

In [None]:
import energyflow

In [None]:
dataset = energyflow.qg_jets.load(num_data=100000, pad=True, ncol=4, generator='pythia',with_bc=False, cache_dir='~/.energyflow')                    

In [None]:
X = dataset[:-1][0]
y = dataset[-1]

In [None]:
sum_array_1 = []
sum_array_2 = []
sum_array_3 = []
sum_array_4 = []

In [None]:
for i in range(X.shape[0]):
    temp_1 = 0
    temp_2 = 0
    temp_3 = 0
    temp_4 = 0
    for j in range(X.shape[1]):
        if(X[i][j][0]!=0):
            temp_1 += X[i][j][0]
        if(X[i][j][1]!=0):
            temp_2 += X[i][j][1]
        if(X[i][j][2]!=0):
            temp_3 += X[i][j][2]
        if(X[i][j][3]!=0):
            temp_4 += X[i][j][3]
    sum_array_1.append(temp_1)
    sum_array_2.append(temp_2)
    sum_array_3.append(temp_3)
    sum_array_4.append(temp_4)
        

In [None]:
mean_1 = sum(sum_array_1)/len(sum_array_1)
mean_2 = sum(sum_array_2)/len(sum_array_2)
mean_3 = sum(sum_array_3)/len(sum_array_3)
mean_4 = sum(sum_array_4)/len(sum_array_4)

In [None]:
bool_1 = [i<=mean_1 for i in sum_array_1]
bool_2 = [i<=mean_2 for i in sum_array_2]
bool_3 = [i<=mean_3 for i in sum_array_3]
bool_4 = [i<=mean_4 for i in sum_array_4]

In [None]:
binary_1 = []
for i in bool_1:
    if(i==True):
        binary_1.append(1)
    else:
        binary_1.append(0)
binary_2 = []
for i in bool_2:
    if(i==True):
        binary_2.append(1)
    else:
        binary_2.append(0)
binary_3 = []
for i in bool_3:
    if(i==True):
        binary_3.append(1)
    else:
        binary_3.append(0)
binary_4 = []
for i in bool_4:
    if(i==True):
        binary_4.append(1)
    else:
        binary_4.append(0)

In [None]:
binary = []
for i in range(len(binary_1)):
    binary.append(binary_1[i] + binary_2[i] + binary_3[i] + binary_4[i])

In [None]:
connections_0 = []
connections_1 = []
connections_2 = []
connections_3 = []
connections_4 = []

In [None]:
for i in range(len(binary)):
    if(binary[i]==0):
        connections_0.append(i)
    if(binary[i]==1):
        connections_1.append(i)
    if(binary[i]==2):
        connections_2.append(i)
    if(binary[i]==3):
        connections_3.append(i)
    if(binary[i]==4):
        connections_4.append(i)

### The above lines of code is to basically make connections b/w nodes based on the mean values of the corresponding features.Note that this mean must be for each node (mean of the 37 datapoints including the paddings but the padded zeros are not considered while calculating the mean) , not the mean of every datapoint of all nodes.We create lists of indices whose features values are more than mean for 0 features,1 feature,2 features,3 features and 4(all) features respectively and these lists are connections_0,1,2,3 and 4.Else,it is considered a 1.It would have been way easier to use NumPy arrays for this,however,I have encountered a situation where I had to convert a list of lists having variable length to np arrays keeping their dimensions same and avoiding padding,which made it complicated

In [None]:
labels = []
labels.append('Quarks')
labels.append('Gluons')
nodes = []
N = X.shape[0]
F = X.shape[2]
print('X shape: ', X.shape)
edge_list=[]

X shape:  (100000, 139, 4)


In [None]:
edge_list=[]

In [None]:
for i in range(len(connections_1)-1):
    edge_list.append((connections_1[i],connections_1[i+1]))

In [None]:
for i in range(len(connections_2)-1):
    edge_list.append((connections_2[i],connections_2[i+1]))

In [None]:
for i in range(len(connections_3)-1):
    edge_list.append((connections_3[i],connections_3[i+1]))

In [None]:
for i in range(len(connections_4)-1):
    edge_list.append((connections_4[i],connections_4[i+1]))

In [None]:
print('\nNumber of nodes (N): ', N)
print('\nNumber of features (F) of each node: ', F)
print('\nCategories: ', set(labels))

num_classes = len(set(labels))
print('\nNumber of classes: ', num_classes)


Number of nodes (N):  100000

Number of features (F) of each node:  4

Categories:  {'Gluons', 'Quarks'}

Number of classes:  2


In [None]:
def limit_data(labels,limit=5000,val_num=0,test_num=5000):
    '''
    Get the index of train, validation, and test data
    '''
    label_counter = dict((l, 0) for l in labels)
    train_idx = []

    for i in range(len(labels)):
        label = labels[i]
        if label_counter[label]<limit:
            #add the example to the training data
            train_idx.append(i)
            label_counter[label]+=1
        
        #exit the loop once we found 20 examples for each class
        if all(count == limit for count in label_counter.values()):
            break
    
    #get the indices that do not go to traning data
    rest_idx = [x for x in range(len(labels)) if x not in train_idx]
    val_idx = rest_idx[:val_num]
    test_idx = rest_idx[val_num:(val_num+test_num)]
    return train_idx, val_idx,test_idx

train_idx,val_idx,test_idx = limit_data(labels)

#set the mask
train_mask = np.zeros((N,),dtype=bool)
train_mask[train_idx] = True

val_mask = np.zeros((N,),dtype=bool)
val_mask[val_idx] = True

test_mask = np.zeros((N,),dtype=bool)
test_mask[test_idx] = True

In [None]:
import networkx as nx
#build the graph
G = nx.Graph()
G.add_nodes_from(nodes)
G.add_edges_from(edge_list)

#obtain the adjacency matrix (A)
A = nx.adjacency_matrix(G)
print('Graph info: ', nx.info(G))

  A = nx.adjacency_matrix(G)


Graph info:  Graph with 94576 nodes and 94572 edges



  print('Graph info: ', nx.info(G))


In [None]:
!pip install sklearn



In [None]:
import sklearn
import tensorflow as tf

In [None]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

def encode_label(labels):
    label_encoder = LabelEncoder()
    labels = label_encoder.fit_transform(labels)
    labels = to_categorical(labels)
    return labels, label_encoder.classes_

labels_encoded, classes = encode_label(labels)

In [None]:
# Parameters
channels = 16           # Number of channels in the first layer
dropout = 0.5           # Dropout rate for the features
l2_reg = 5e-4           # L2 regularization rate
learning_rate = 1e-2    # Learning rate
epochs = 200            # Number of training epochs
es_patience = 10        # Patience for early stopping

In [None]:
import torch
import spektral

In [None]:
import numpy as np
import scipy.sparse as sp

# Renormalization trick: D^-1/2 * A * D^-1/2
# Compute D^-1/2
d = np.squeeze(np.asarray(sp.csr_matrix.sum(A, axis=1)))
d_sqrt_inv = np.power(d, -0.5)
d_sqrt_inv[np.isinf(d_sqrt_inv)] = 0.0
D_sqrt_inv = sp.diags(d_sqrt_inv)

# Compute D^-1/2 * A * D^-1/2
A_norm = D_sqrt_inv.dot(A).dot(D_sqrt_inv)

In [None]:
from tensorflow.keras.layers import Input, Dropout, Dense
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

In [None]:
!pip install torch_geometric



In [None]:
!pip install torch_sparse



In [None]:
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv

class GCN(torch.nn.Module):
    def __init__(self, num_features, hidden_channels, num_classes):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(num_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, num_classes)
        self.dropout = torch.nn.Dropout(p=0.5)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = self.dropout(x)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

In [None]:
# Parameters
channels = 16           # Number of channels in the first layer
dropout = 0.5           # Dropout rate for the features
l2_reg = 5e-4           # L2 regularization rate
learning_rate = 1e-2    # Learning rate
epochs = 200            # Number of training epochs
es_patience = 10        # Patience for early stopping

In [None]:
import torch
from torch_geometric.data import Data

# Convert the renormalized adjacency matrix A to a sparse tensor format
row, col = A.nonzero()
edge_index = torch.tensor([row, col], dtype=torch.long)

# Convert X and y to PyTorch tensors
X = torch.tensor(X, dtype=torch.float)
X_train = X.reshape(100000, 139*4)
y = torch.tensor(y, dtype=torch.long)

# Create a PyTorch Geometric data object
data = Data(x=X_train, edge_index=edge_index,y=y)

# Set the train and test masks as attributes of the data object
data.train_mask = torch.tensor(train_mask, dtype=torch.bool)
data.test_mask = torch.tensor(test_mask, dtype=torch.bool)

# Initialize the GCN model
model = GCN(num_features=139*4, hidden_channels=4, num_classes=2)

# Define the loss function and optimizer
criterion = torch.nn.NLLLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
for epoch in range(100):
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    loss = criterion(out[train_mask], data.y[train_mask])
    loss.backward()
    optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

  X = torch.tensor(X, dtype=torch.float)
  y = torch.tensor(y, dtype=torch.long)


Epoch 1, Loss: 104.2855
Epoch 2, Loss: 54.2523
Epoch 3, Loss: 23.1599
Epoch 4, Loss: 2.3075
Epoch 5, Loss: 0.6540
Epoch 6, Loss: 0.6450
Epoch 7, Loss: 0.6363
Epoch 8, Loss: 0.6279
Epoch 9, Loss: 0.6198
Epoch 10, Loss: 0.6119
Epoch 11, Loss: 0.6042
Epoch 12, Loss: 0.5966
Epoch 13, Loss: 0.5891
Epoch 14, Loss: 0.5817
Epoch 15, Loss: 0.5745
Epoch 16, Loss: 0.5673
Epoch 17, Loss: 0.5603
Epoch 18, Loss: 0.5533
Epoch 19, Loss: 0.5465
Epoch 20, Loss: 0.5397
Epoch 21, Loss: 0.5330
Epoch 22, Loss: 0.5263
Epoch 23, Loss: 0.5198
Epoch 24, Loss: 0.5133
Epoch 25, Loss: 0.5070
Epoch 26, Loss: 0.5007
Epoch 27, Loss: 0.4944
Epoch 28, Loss: 0.4883
Epoch 29, Loss: 0.4822
Epoch 30, Loss: 0.4762
Epoch 31, Loss: 0.4703
Epoch 32, Loss: 0.4644
Epoch 33, Loss: 0.4586
Epoch 34, Loss: 0.4529
Epoch 35, Loss: 0.4473
Epoch 36, Loss: 0.4417
Epoch 37, Loss: 0.4362
Epoch 38, Loss: 0.4308
Epoch 39, Loss: 0.4254
Epoch 40, Loss: 0.4202
Epoch 41, Loss: 0.4150
Epoch 42, Loss: 0.4098
Epoch 43, Loss: 0.4048
Epoch 44, Loss: 

# The loss could further be improved by making connections more sophisticated.My original idea was to make these node connections based on a threshold on the deviation from mean rather than just a comparision of these features with their corresponding mean values,but the idea was scrapped due to limited computational power.