In [None]:
# import libraries

%pylab inline
from rdkit.Chem import AllChem as rdkit
import py3Dmol
import stk
import stko
%matplotlib inline
import numpy as np
import MDAnalysis as mda
import pywindow as pw

from scipy.ndimage import gaussian_filter1d
from itertools import combinations

Must update the cage and system type in the below code cell. Variable names of tritopic_bb, ditopic_bb and cage should be kept the same. System type is simply 1 for CC3/7/8 and 2 for B4/B6/D4/D6 cages.

In [None]:
# must update system type and stk cage building used here
System_type = 1

tritopic_bb = stk.BuildingBlock('C1=C(C=C(C=C1C=O)C=O)C=O', [stk.AldehydeFactory()])
ditopic_bb = stk.BuildingBlock(smiles='N[C@@H]1CCCC[C@H]1N',functional_groups=[stk.PrimaryAminoFactory()],)
cage = stk.ConstructedMolecule(
            topology_graph=stk.cage.EightPlusTwelve(
                building_blocks={
                    ditopic_bb: range(8, 20),
                    tritopic_bb: range(0, 8),
                },
                optimizer =stk.MCHammer(),),)

In [None]:
def get_cage_frame_data(file_name, frame_number):
    '''
    Outputs the position matrix of the cage for a given frame.

    Inputs:
    file_name (str): path to the cage only unwrapped file
    frame_number (int): the frame to be extracted
    Returns:
    frame_data (list): list[cage_atom][coorindates] containing cage position information
    '''
    # open file and setup counts and conditions
    datafile = open(file_name, 'r')
    line_count = 0
    frame_count = 0 # count of lines after frame has been identified
    frame_id = False
    frame_data = [] # empty list to store cage frame information

    # loop through each line
    for line in datafile:

        # define number of atoms in cage on first line of file
        if line_count == 0:
            frame_atoms_range = range(0,int(line))
        line_count = line_count + 1

        # when condition is true, extract frame coordinates
        if frame_id == True:
            if frame_count in frame_atoms_range:
                line = line.split()
                frame_data.extend([[float(x) for x in line[1:]]])
            frame_count = frame_count + 1
        
        # set condition to true when the desired frame is encountered
        if line == f"frame {frame_number}\n":
            frame_id = True
    return frame_data

In [None]:
def euclidean_distance(a,b):
    '''
    Calculates distance between two points.

    Inputs:
    a,b (list): lists representing points a/b, with x,y,z coordinate information
    Returns:
    distance (float): the distance between the two points    
    '''
    distance = ( (a[0]-b[0])**2 + (a[1]-b[1])**2 + (a[2]-b[2])**2 )**0.5
    return distance

In [None]:
def get_reduced_building_block(building_block): 
    '''
    Takes an stk building block and removes deleter atoms, such that later updating of building block 
    position matrix can be done easily on a building block by building block basis.
    
    Inputs:
    building_block: stk building block
    Returns:
    reduced_building_block: stk building block with deleter atoms removed
    '''

    # identify stk ids of deleter atoms by looping through building block functional groups
    deleter_atom_ids = []
    for i in building_block.get_functional_groups():
        deleter_atom_ids.extend(list(i.get_deleter_ids()))

    # create stk atom list containing only non-deleter atoms
    filtered_atoms = [atom for atom in building_block.get_atoms() if atom.get_id() not in deleter_atom_ids]

    # we now need to correct the stk mapping for the filtered atoms
    # make dictionary to hold old_stk_id:new_stk_id mapping
    old_to_new_id = {old_id: new_id for new_id, old_id in enumerate(atom.get_id() for atom in filtered_atoms)}
    # use dictionary to create list containing only non-deleter atoms with fixed stk atom ids
    renumbered_atoms = [atom.with_id(new_id) for atom, new_id in zip(filtered_atoms, old_to_new_id.values())]


    # we now need to correct the stk bonding for the filtered atoms, with only bonds between non-deleted atoms
    # loop through original building block bonds and add non-deleted bonds to new bonding list, with fixed ids
    filtered_bonds = []
    for bond in building_block.get_bonds():
        if bond.get_atom1().get_id() in old_to_new_id and bond.get_atom2().get_id() in old_to_new_id:
            new_bond = stk.Bond(
                atom1=renumbered_atoms[old_to_new_id[bond.get_atom1().get_id()]],  # update atom1 id
                atom2=renumbered_atoms[old_to_new_id[bond.get_atom2().get_id()]],  # update atom2 id
                order=bond.get_order()  # keep the bond order the same
            )
            filtered_bonds.append(new_bond)

    # fix the position matrix to include only the non-deleted atoms 
    original_position_matrix = building_block.get_position_matrix()
    updated_position_matrix = np.array([original_position_matrix[old_id] for old_id in old_to_new_id])

    # construct the new building block with updated atoms, atom ids, bonds, and positions
    reduced_building_block = stk.BuildingBlock.init(
        atoms = renumbered_atoms,
        bonds = filtered_bonds,
        position_matrix = updated_position_matrix)
    
    print("Deleted Atom IDs:", deleter_atom_ids)
    print("Original Atom Count:", len(list(building_block.get_atoms())))
    print("Filtered Atom Count:", len(list(reduced_building_block.get_atoms())))
    print("Original Bond Count:", len(list(building_block.get_bonds())))
    print("Filtered Bond Count:", len(list(reduced_building_block.get_bonds())))
    print("New Atom ID Mapping:", old_to_new_id)

    return reduced_building_block

If using more than 2 types of building blocks, the below code will need to be adapted. The order of list appending in defining building_blocks below also needs to be in the same order as defined when first constructing the cage above. Here it is based on tritopic building block followed by ditopic building blocks; eg: during cage building, building blocks: {ditopic_bb: range(8, 20),tritopic_bb: range(0, 8)}.

In [None]:
# load the system to MDanalysis universe. update parameters if needed.

u = mda.Universe("lammps_files/lammps_input_data.data", "lammps_files/lammps_output_lammpstrj.lammpstrj", format="LAMMPSDUMP", lammps_coordinate_convention="scaled", dt=1)
no_frames = len(u.trajectory)
sim_length = 10 # simulation length
time_data = (np.array(range(0, no_frames)) / ((no_frames-1)/sim_length)).tolist() # setup x axis list for plots
no_atoms_in_cage = len(u.select_atoms("resid 1"))
no_tritopic_bb = cage.get_num_building_block(tritopic_bb)
no_ditopic_bb = cage.get_num_building_block(ditopic_bb)
no_building_blocks = no_ditopic_bb + no_tritopic_bb
cage_file_name = 'data_storage/data_cage_only_unwrapped.xyz'

building_blocks = [get_reduced_building_block(tritopic_bb) for _ in range(no_tritopic_bb)] + [get_reduced_building_block(ditopic_bb) for _ in range(no_ditopic_bb)]

In [None]:
def infer_connectivity(bb_com_data, n, no_tritopic_bb, no_building_blocks):
    '''
    Infers the connectivity between building blocks by finding the nearest n neighbouring building blocks, 
    based on distances between building block centres of masses at frame 0 (since connectivity does not 
    change over time).
    
    Inputs:
    bb_com_data (list): list[no_bbs][no_frames][coorindates] with building blocks of interest centre of mass data
    n: per building block, the number of neighbouring building blocks to define 'connections' with at frame 0.
       eg for an 8+12 tritopic based cube shape, each vertex connects to three points => n = 3.
    no_tritopic_bb (int): the number of building blocks to be evaluated
    no_building_blocks (int): the total number of building blocks in the cage
    Returns:
    list(adjacency_list) (list): list containting connectivity information between the building blocks. Each 
                                 element is a tuple, which represent a nearest neighbour pair. Aka any given 
                                 element of this list will be (a,b), where a and b represent the ids of the 
                                 two building blocks that are neighbouring.

    '''
    # use a set to store nearest neighbour pairs, while avoiding duplicates.
    adjacency_list = set()

    # loop through all building blocks of interest
    for i in range(no_tritopic_bb):
        distances = [] # setup list to store (distance, index) tuples

        # loop through all building blocks again for pairwise distance calculations, using data fromm frame 0
        for j in range(no_building_blocks):
            if i != j: # avoid calculating distance with the same building block
                distance = euclidean_distance(bb_com_data[i][0], bb_com_data[j][0])
                distances.append((distance, j))

        # sort distances from low to high and extract the indices of the first n elements (nearest n neighbours)
        distances.sort()
        print(f'Index {i}: (distance, nearest neighbour index):',distances[:n])
        closest_neighbor_indexes = [element[1] for element in distances[:n]]

        # loop through each neighbouring index and add any new (building block, neighbour) tuples to adjacency_list
        for index in closest_neighbor_indexes:
            adjacency_list.add((min(i, index), max(i, index))) 
    
    return list(adjacency_list) # convert set to a list before outputting

In [None]:
def calculate_angle(p1, p2, p3, p4):
    '''
    Calculates dot product between two lines, which are represented by vectors between points p1 and p2 and 
    points p3 and p4.

    Inputs:
    p1,p2,p3,p4 (list): list/array containing x,y,z coordinates of the point
    Returns:
    angle_deg (float): angle between the two lines in degrees
    '''

    A = np.array(p2) - np.array(p1)  # vector from p1 to p2
    B = np.array(p4) - np.array(p3)  # vector from p3 to p4

    # calculate dot product between vectors A and B and vector magnitudes
    dot_product = np.dot(A, B)
    A_magnitude = np.linalg.norm(A)
    B_magnitude = np.linalg.norm(B)

    # calculate angle in radians and convert to degrees
    angle_rad = np.arccos(dot_product/(A_magnitude*B_magnitude))
    angle_deg = np.degrees(angle_rad)

    return angle_deg



def track_frame_symmetry_angles(bb_com_data, adjacency_list, no_frames):
    '''
    Calculates, for each frame, the standard deviation of all vertex angles of the tetrahedron/cube chape 
    defined by the tetrahedral/cubic shape given by the tritopic building blocks of a 4+6/8+12 cage.
    
    Inputs:
    bb_com_data (list): list[no_bbs][no_frames][coorindates] with building block centre of mass data
    adjacency_list (list): list containting connectivity information between the building blocks. Each 
                           element is a tuple, which represent a nearest neighbour pair. See infer_connectivity
                           function for more detail.
    no_frames (int): the number of frames to be evaluated
    Returns:
    frame_stdevs (list): list containing the standard deviation of the angles at each frame
    '''
    
    no_building_blocks = len(bb_com_data)
    frame_stdevs = []  # list to store standard deviation for each frame

    # loop through frames
    for frame_number in range(no_frames):
        angles = [] # reset angles list

        # loop through building blocks (aka loop through each vertex)
        for bb_index in range(no_building_blocks):
            adjacent_vector_index = [] # reset adjacent vector index list

            # loop through adjacency_list and, for the current building block, build adjacent_vector_index list, giving the neighbouring building block indexes
            for count in range(len(adjacency_list)):
                if bb_index == adjacency_list[count][0]:
                    adjacent_vector_index.extend([adjacency_list[count][1]])
                if bb_index == adjacency_list[count][1]:
                    adjacent_vector_index.extend([adjacency_list[count][0]])

            # loop through each possible pair of neighbours that share a closest neighbour with the current building block index and calculate angles
            for pair in combinations(adjacent_vector_index, 2):
                angles.extend([calculate_angle(bb_com_data[bb_index][frame_number], bb_com_data[pair[0]][frame_number],bb_com_data[bb_index][frame_number],bb_com_data[pair[1]][frame_number])])

        # calculate standard deviation of all angles for the frame
        frame_stdev = np.std(angles)
        frame_stdevs.append(frame_stdev)

    return frame_stdevs

In [None]:
# code to store all building block centres of masses (com) for each frame as a list of 
# shape [no_bbs][no_frames][coordinates]
# also outputs an .xyz file of the coms for easy visualisation. Each tritopic/ditopic building block is 
# represented as an H/C atom at its com

bb_com_data = [[] for _ in range(no_building_blocks)] # setup list for each tritopic building block

with open('data_storage/coms_all_bbs.xyz', 'w') as f:
    for frame_number in range(0,no_frames): # loop through each frame
        f.write(f"{no_building_blocks}\n") # write number of building blocks
        f.write(f"frame {frame_number}\n") # write frame number

        cage_frame_data = np.array(get_cage_frame_data(cage_file_name,frame_number)) # obtain cage position matrix for specified frame

        # loop through each building block, extract relevant atomic coordinates and calculate com
        for bb_number in range(0,no_building_blocks):
            no_atoms_in_bb = building_blocks[bb_number].get_num_atoms() # define number of atoms in building block
            if bb_number in range(0,no_tritopic_bb): # for tritopic building blocks
                upper_index = (bb_number+1)*no_atoms_in_bb # define region in which building block data resides within cage_frame_data        
            if bb_number in range(no_tritopic_bb,no_building_blocks): # for ditopic building blocks
                upper_index = (no_tritopic_bb*building_blocks[0].get_num_atoms()) + ((bb_number + 1 - no_tritopic_bb)*no_atoms_in_bb) # define region in which building block data resides within cage_frame_data
            bb_frame_data = cage_frame_data[upper_index - no_atoms_in_bb:upper_index] # get building block position matrix


            # update building block position matrix and calculate com
            building_blocks[bb_number] = building_blocks[bb_number].with_position_matrix(bb_frame_data)
            rdkit_mol = building_blocks[bb_number].to_rdkit_mol()
            molsys = pw.MolecularSystem.load_rdkit_mol(rdkit_mol)
            mol = molsys.system_to_molecule()
            com = mol.calculate_centre_of_mass()
            bb_com_data[bb_number].extend([com])

            if bb_number in range(0,no_tritopic_bb):
                f.write(f"H {com[0]:.3f} {com[1]:.3f} {com[2]:.3f}\n") # represent tritopic bb as H
            if bb_number in range(no_tritopic_bb,no_building_blocks):
                f.write(f"C {com[0]:.3f} {com[1]:.3f} {com[2]:.3f}\n") # represent ditopic bb as C

In [None]:
# code to store tritopic building block centres of masses (com) for each frame as a list of 
# shape [no_tritopic_bbs][no_frames][coordinates]
# also outputs an .xyz file of the coms for easy visualisation. Each building block is represented as an H atom 
# at its com

tritopic_bb_com_data = [[] for _ in range(no_tritopic_bb)] # setup list for each tritopic building block

with open('data_storage/coms_tritopic_bbs.xyz', 'w') as f:
    for frame_number in range(0,no_frames): # loop through each frame
        f.write(f"{no_tritopic_bb}\n") # write number of tritopic building blocks
        f.write(f"frame {frame_number}\n") # write frame number

        cage_frame_data = np.array(get_cage_frame_data(cage_file_name,frame_number)) # obtain cage position matrix for specified frame

        # loop through each tritopic building block, extract relevant atomic coordinates and calculate com
        for bb_number in range(0,no_tritopic_bb):
            no_atoms_in_bb = building_blocks[bb_number].get_num_atoms() # define number of atoms in building block
            upper_index = (bb_number+1)*no_atoms_in_bb # define region in which building block data resides within cage_frame_data        
            bb_frame_data = cage_frame_data[upper_index - no_atoms_in_bb:upper_index] # get building block position matrix

            # update building block position matrix and calculate com
            building_blocks[bb_number] = building_blocks[bb_number].with_position_matrix(bb_frame_data)
            rdkit_mol = building_blocks[bb_number].to_rdkit_mol()
            molsys = pw.MolecularSystem.load_rdkit_mol(rdkit_mol)
            mol = molsys.system_to_molecule()
            com = mol.calculate_centre_of_mass()
            tritopic_bb_com_data[bb_number].extend([com])

            f.write(f"H {com[0]:.3f} {com[1]:.3f} {com[2]:.3f}\n")


In [None]:
# code to store ditopic building block centres of masses (com) for each frame as a list of 
# shape [no_tritopic_bbs][no_frames][coordinates]
# also outputs an .xyz file of the coms for easy visualisation. Each building block is represented as an H atom 
# at its com

ditopic_bb_com_data = [[] for _ in range(no_building_blocks)] # setup list for each ditopic building block

with open('data_storage/coms_ditopic_bbs.xyz', 'w') as f:
    for frame_number in range(0,no_frames): # loop through frames
        f.write(f"{no_ditopic_bb}\n") # write number of ditopic building blocks
        f.write(f"frame {frame_number}\n") # write frame number

        cage_frame_data = np.array(get_cage_frame_data(cage_file_name,frame_number)) # obtain cage position matrix for specified frame

        # loop through each ditopic building block, extract relevant atomic coordinates and calculate com
        for bb_number in range(no_tritopic_bb,no_building_blocks):
            no_atoms_in_bb = building_blocks[bb_number].get_num_atoms() # define number of atoms in building block
            upper_index = (no_tritopic_bb*building_blocks[0].get_num_atoms()) + ((bb_number + 1 - no_tritopic_bb)*no_atoms_in_bb) # define region in which building block data resides within cage_frame_data
            bb_frame_data = cage_frame_data[upper_index - no_atoms_in_bb:upper_index] # get building block position matrix

            # update building block position matrix and calculate com
            building_blocks[bb_number] = building_blocks[bb_number].with_position_matrix(bb_frame_data)
            rdkit_mol = building_blocks[bb_number].to_rdkit_mol()
            molsys = pw.MolecularSystem.load_rdkit_mol(rdkit_mol)
            mol = molsys.system_to_molecule()
            com = mol.calculate_centre_of_mass()
            ditopic_bb_com_data[bb_number].extend([com])

            f.write(f"H {com[0]:.3f} {com[1]:.3f} {com[2]:.3f}\n")

ditopic_bb_com_data = ditopic_bb_com_data[no_tritopic_bb:]

In [None]:
adjacency_list = infer_connectivity(bb_com_data, 3, no_tritopic_bb, no_building_blocks) # define adjacency list using all building blocks
adjacency_list_tritopic = infer_connectivity(tritopic_bb_com_data, 3, no_tritopic_bb, no_tritopic_bb) # define adjacency list using only tritopic building blocks (aka the tetrahedron/cube representation)
adjacency_list_ditopic = infer_connectivity(ditopic_bb_com_data, 2, no_ditopic_bb, no_ditopic_bb) # define adjacency list using only ditopic building blocks

In [None]:
tritopic_angles = track_frame_symmetry_angles(tritopic_bb_com_data, adjacency_list_tritopic, no_frames)

figure(figsize=[10,6],dpi=300)
tritopic_angles_smooth = gaussian_filter1d(tritopic_angles[2:], sigma=5)  # increase sigma for more smoothing
plot(time_data[2:], tritopic_angles[2:], linestyle='-', marker='x',linewidth=0.6, markersize=4, label = 'Raw Data') # plot raw data
plot(time_data[2:], tritopic_angles_smooth, linewidth=1, color='red', label='Smoothed Curve') # plot gausian smoothed curve
title('Tritopic Building Block Polyhedron: Angle Standard Deviation vs Time', fontsize=15)
xlabel("Time (ns)", fontsize=15)
ylabel("Standard Deviation", fontsize=15)
legend()
show()

In [None]:
# Outputs a list of lists where each inner list corresponds to 
# the a ditopic building block, with its elements giving the angles between neighbouring 
def track_triditri_angles(bb_com_data, adjacency_list, no_frames):
    '''
    Calculates, for each ditopic building block, the tritopic-ditopic-tritopic angles (using centres of masses), 
    using neighbouring tritopic building blocks. 
    
    Inputs:
    bb_com_data (list): list[no_bbs][no_frames][coorindates] with building block centre of mass data
    adjacency_list (list): list containting connectivity information between the building blocks. Each 
                           element is a tuple, which represent a nearest neighbour pair. See infer_connectivity
                           function for more detail.
    no_frames (int): the number of frames to be evaluated
    Returns:
    triditri_angles (list): list[no_ditopic_bbs][no_frames] containing the tritopic-ditopic-tritopic angle for 
                            each ditopic building block for each frame
    '''


    no_building_blocks = len(bb_com_data)
    tri_di_tri_angles = [[] for i in range(0,no_ditopic_bb)] # list of lists to store tri-di-tri angles

    # loop through ditopic building blocks
    for ditopic_bb_index in range(no_tritopic_bb,no_building_blocks):
        adjacent_vector_index = [] # reset adjacent vector index list

        # loop through adjacency_list and, for the current building block, build adjacent_vector_index list, giving the neighbouring building block indexes
        for count in range(len(adjacency_list)):
            if ditopic_bb_index == adjacency_list[count][1]: # (ditopic index will always be second element of adjacency_list tuple)
                adjacent_vector_index.extend([adjacency_list[count][0]])

        # loop through each frame and calculate triditri angle
        for frame_number in range(no_frames):
            for pair in combinations(adjacent_vector_index, 2):
                tri_di_tri_angles[ditopic_bb_index-no_ditopic_bb].extend([calculate_angle(bb_com_data[ditopic_bb_index][frame_number], bb_com_data[pair[0]][frame_number],bb_com_data[ditopic_bb_index][frame_number],bb_com_data[pair[1]][frame_number])])

    return tri_di_tri_angles

In [None]:
# plot all tritopic-ditopic-tritopic angles vs time
triditri_angles = track_triditri_angles(bb_com_data,adjacency_list,no_frames)
figure(figsize=[10,6],dpi=300)
for i in range(no_ditopic_bb):
    plot(time_data[2:],triditri_angles[i][2:],linewidth=0.6, markersize=4) # plot each raw data curve

title('Tritopic-Ditopic-Tritopic Angles Raw Data vs Time', fontsize=15)
xlabel("Time (ns)", fontsize=15)
ylabel("Angle (°)", fontsize=15)
show()

In [None]:
# plot notable tritopic-ditopic-tritopic angles vs time, requires manual inspection to find notable data curves
triditri_angles = track_triditri_angles(bb_com_data,adjacency_list,no_frames)
figure(figsize=[10,6],dpi=300)
plot(time_data[2:],triditri_angles[0][2:],linewidth=0.6, markersize=4)
#plot(time_data[2:],triditri_angles[1][2:],linewidth=0.6, markersize=4)
#plot(time_data[2:],triditri_angles[2][2:],linewidth=0.6, markersize=4)
#plot(time_data[2:],triditri_angles[3][2:],linewidth=0.6, markersize=4)
plot(time_data[2:],triditri_angles[4][2:],linewidth=0.6, markersize=4)
#plot(time_data[2:],triditri_angles[5][2:],linewidth=0.6, markersize=4)
plot(time_data[2:],triditri_angles[6][2:],linewidth=0.6, markersize=4)
#plot(time_data[2:],triditri_angles[7][2:],linewidth=0.6, markersize=4)
plot(time_data[2:],triditri_angles[8][2:],linewidth=0.6, markersize=4)
#plot(time_data[2:],triditri_angles[9][2:],linewidth=0.6, markersize=4)
#plot(time_data[2:],triditri_angles[10][2:],linewidth=0.6, markersize=4)
#plot(time_data[2:],triditri_angles[11][2:],linewidth=0.6, markersize=4)

title('Notable Tritopic-Ditopic-Tritopic Angles vs Time (Raw Data)', fontsize=15)
xlabel("Time (ns)", fontsize=15)
ylabel("Angle (°)", fontsize=15)
show()

In [None]:
# plot smoothed curves for notable tritopic-ditopic-tritopic angles vs frame
triditri_angles = track_triditri_angles(bb_com_data,adjacency_list,no_frames) 
figure(figsize=[10,6],dpi=300)
first_triditri_smooth = gaussian_filter1d(triditri_angles[0][2:], sigma=5)  # increase sigma for more smoothing
second_triditri_smooth = gaussian_filter1d(triditri_angles[4][2:], sigma=5)  # increase sigma for more smoothing
third_triditri_smooth = gaussian_filter1d(triditri_angles[6][2:], sigma=5)  # increase sigma for more smoothing
fourth_triditri_smooth = gaussian_filter1d(triditri_angles[8][2:], sigma=5)  # increase sigma for more smoothing

plot(time_data[2:],first_triditri_smooth,linewidth=1)
plot(time_data[2:],second_triditri_smooth,linewidth=1)
plot(time_data[2:],third_triditri_smooth,linewidth=1)
plot(time_data[2:],fourth_triditri_smooth,linewidth=1)
title('Notable Tritopic-Ditopic-Tritopic Angles vs Time (Smoothed Curves)', fontsize=15)
xlabel("Time (ns)", fontsize=15)
ylabel("Angle (°)", fontsize=15)
show()

Now, we work towards calcululating key dihedrals, which will need different definitions for CC3/7/8 and B/D/4/6 cages (because the nitrogens are defined in stk on the ditopic building block for the former and the tritopic building block for the latter). To help do this, we define a get_desired_indexes function, which will help extract the indexes of a desired atom type in a given building block.

In [None]:
def get_desired_indexes(desired_atom_type,frame_number, global_index_range):
    '''
    Finds the indexes, within a frame, of a desired atom type for a given building block, given the global 
    indexes of the building block.

    Inputs:
    desired_atom_type (str): the desired atom type to extract (eg 'C')
    frame_number (int): the current frame number
    global_index_range (range): a range defining the global indexes that a given building block can be found in 
                                from the input .xyz data file
    Returns:
    index_list (list): list of frame indexes, where atoms of the desired atom type from the specified building 
                       block can be found.    
    '''

    index_list = [] # setup list to store indexes

    # loop through data file, search lines related to the input range and add frame index to list if atom type == desired atom type
    with open(cage_file_name, "r") as file:
        for line_number, line in enumerate(file):
            if line_number in global_index_range:
                if line.split()[0]==desired_atom_type:
                    index_list.extend([line_number - 2 - frame_number*(no_atoms_in_cage+2)])
    return index_list

In [None]:
if System_type == 1: # for CC3/7/8 systems
    def track_key_dihedrals():
        '''
        Tracks the four key dihedrals at each ditopic building block, for each frame.

        Returns:
        dihedral_data (list): list[no_ditopic_bbs][no_dihedral_types][no_frames] containing the dihedral angle
                              data for each ditopic building block and type of dihedral, for each frame.        
        '''

        dihedral_data = [[[],[],[],[]] for _ in range(no_ditopic_bb)] # setup list to store output data

        # define parameters
        no_atoms_in_ditopic_bb = building_blocks[no_tritopic_bb].get_num_atoms()
        no_atoms_in_tritopic_bb = building_blocks[0].get_num_atoms()

        # loop through each frame
        for frame_number in range(no_frames):
            cage_frame_data = np.array(get_cage_frame_data(cage_file_name,frame_number)) # obtain cage position matrix for specified frame

            # loop through each ditopic bb
            for ditopic_bb_index in range(no_tritopic_bb,no_building_blocks):
                adjacent_vector_index = [] # reset adjacent vector index list

                # loop through adjacency_list and, for the current building block, build adjacent_vector_index list, giving the neighbouring building block indexes
                for count in range(len(adjacency_list)):
                    if ditopic_bb_index == adjacency_list[count][1]: # (ditopic index will always be second element of adjacency_list tuple)
                        adjacent_vector_index.extend([adjacency_list[count][0]])
            
                # get frame indexes of the two ditopic nitrogens related to key dihedrals
                ditopic_upper_index = (no_tritopic_bb*no_atoms_in_tritopic_bb) + ((ditopic_bb_index + 1 - no_tritopic_bb)*no_atoms_in_ditopic_bb) # define region in which building block data resides within cage_frame_data
                global_ditopic_index_range = range(ditopic_upper_index - no_atoms_in_ditopic_bb + 2+frame_number*(no_atoms_in_cage+2),ditopic_upper_index + 2+frame_number*(no_atoms_in_cage+2)) # define global index range
                nitrogen_indexes = get_desired_indexes('N',frame_number,global_ditopic_index_range) # get indexes of the nitrogens on the ditopic bb
                first_nitrogen_index = nitrogen_indexes[0] # obtain index of first ditopic bb nitrogen
                second_nitrogen_index = nitrogen_indexes[1] # obtain index of second ditopic bb nitrogen

                # get frame indexes for the two ditopic carbons related to key dihedrals
                ditopic_bb_carbon_indexes = get_desired_indexes('C',frame_number,global_ditopic_index_range) # get indexes of carbons on the ditopic bb
                first_nitrogen_ditopic_carbons_distances = [[euclidean_distance(cage_frame_data[first_nitrogen_index],cage_frame_data[index])] for index in ditopic_bb_carbon_indexes] # evaluate distances between ditopic bb carbons and the first nitrogen
                min_index = first_nitrogen_ditopic_carbons_distances.index(min(first_nitrogen_ditopic_carbons_distances))  # get index of smallest distance
                first_nitrogen_nearest_ditopic_c_index = ditopic_bb_carbon_indexes[min_index] # obtain index of ditopic bb carbon closest to the first nitrogen
                second_nitrogen_ditopic_carbons_distances = [[euclidean_distance(cage_frame_data[second_nitrogen_index],cage_frame_data[index])] for index in ditopic_bb_carbon_indexes] # evaluate distances between ditopic bb carbons and the second nitrogen
                min_index = second_nitrogen_ditopic_carbons_distances.index(min(second_nitrogen_ditopic_carbons_distances))  # get index of smallest distance
                second_nitrogen_nearest_ditopic_c_index = ditopic_bb_carbon_indexes[min_index] # obtain index of ditopic bb carbon closest to the second nitrogen
                #print(first_nitrogen_nearest_ditopic_c_index,second_nitrogen_nearest_ditopic_c_index)

                # now we look to find index ranges for the neighbouring tritopic bbs
                first_tritopic_upper_index = (adjacent_vector_index[0]+1)*no_atoms_in_tritopic_bb # define region in which first tritopic building block data resides with cage_frame_data        
                second_tritopic_upper_index = (adjacent_vector_index[1]+1)*no_atoms_in_tritopic_bb # define region in which second tritopic building block data resides with cage_frame_data        
                first_global_tritopic_index_range = range(first_tritopic_upper_index - no_atoms_in_tritopic_bb + 2+frame_number*(no_atoms_in_cage+2),first_tritopic_upper_index + 2+frame_number*(no_atoms_in_cage+2)) # define global index range
                second_global_tritopic_index_range = range(second_tritopic_upper_index - no_atoms_in_tritopic_bb + 2+frame_number*(no_atoms_in_cage+2),second_tritopic_upper_index + 2+frame_number*(no_atoms_in_cage+2)) # define global index range
                first_tritopic_bb_carbon_indexes = get_desired_indexes('C',frame_number,first_global_tritopic_index_range) # get indexes of carbons on the first tritopic bb
                second_tritopic_bb_carbon_indexes = get_desired_indexes('C',frame_number,second_global_tritopic_index_range) # get indexes of carbons on the second tritopic bb

                # finding the nearest two tritopic carbon indexes to the first nitrogen
                first_nitrogen_tritopic_carbons_distances = [[euclidean_distance(cage_frame_data[first_nitrogen_index],cage_frame_data[index])] for index in first_tritopic_bb_carbon_indexes]+[[euclidean_distance(cage_frame_data[first_nitrogen_index],cage_frame_data[index])] for index in second_tritopic_bb_carbon_indexes] # evaluate distances between tritopic bb carbons and the first nitrogen
                first_min_index = first_nitrogen_tritopic_carbons_distances.index(min(first_nitrogen_tritopic_carbons_distances))  # get index of smallest distance
                reduced_list = first_nitrogen_tritopic_carbons_distances[0:first_min_index]+first_nitrogen_tritopic_carbons_distances[first_min_index+1:]
                second_min_index = reduced_list.index(min(reduced_list))  # get index of second smallest distance 
                if second_min_index >= first_min_index:
                    second_min_index = second_min_index +1 # increase second min index by one to counteract possible offset due to removing the first min index number
                # get frame indexes of carbons
                if first_min_index < len(first_tritopic_bb_carbon_indexes):
                    first_nitrogen_nearest_tritopic_c_index = first_tritopic_bb_carbon_indexes[first_min_index] # obtain index of tritopic bb carbon closest to the first nitrogen
                else:
                    first_nitrogen_nearest_tritopic_c_index = second_tritopic_bb_carbon_indexes[first_min_index-len(first_tritopic_bb_carbon_indexes)] # obtain index of tritopic bb carbon closest to the first nitrogen
                if second_min_index < len(first_tritopic_bb_carbon_indexes):
                    first_nitrogen_second_nearest_tritopic_c_index = first_tritopic_bb_carbon_indexes[second_min_index] # obtain index of tritopic bb carbon second closest to the first nitrogen
                else:
                    first_nitrogen_second_nearest_tritopic_c_index = second_tritopic_bb_carbon_indexes[second_min_index-len(first_tritopic_bb_carbon_indexes)] # obtain index of tritopic bb carbon second closest to the first nitrogen

                # repeating and finding the nearest two tritopic indexes to the second nitrogen
                second_nitrogen_tritopic_carbons_distances = [[euclidean_distance(cage_frame_data[second_nitrogen_index],cage_frame_data[index])] for index in first_tritopic_bb_carbon_indexes]+[[euclidean_distance(cage_frame_data[second_nitrogen_index],cage_frame_data[index])] for index in second_tritopic_bb_carbon_indexes] # evaluate distances between tritopic bb carbons and the first nitrogen
                first_min_index = second_nitrogen_tritopic_carbons_distances.index(min(second_nitrogen_tritopic_carbons_distances))  # get index of smallest distance
                reduced_list = second_nitrogen_tritopic_carbons_distances[0:first_min_index]+second_nitrogen_tritopic_carbons_distances[first_min_index+1:]
                second_min_index = reduced_list.index(min(reduced_list))  # get index of second smallest distance 
                if second_min_index >= first_min_index:
                    second_min_index = second_min_index +1 # increase second min index by one to counteract possible offset due to removing the first min index number
                # get frame indexes of carbons
                if first_min_index < len(first_tritopic_bb_carbon_indexes):
                    second_nitrogen_nearest_tritopic_c_index = first_tritopic_bb_carbon_indexes[first_min_index] # obtain index of tritopic bb carbon closest to the first nitrogen
                else:
                    second_nitrogen_nearest_tritopic_c_index = second_tritopic_bb_carbon_indexes[first_min_index-len(first_tritopic_bb_carbon_indexes)] # obtain index of tritopic bb carbon closest to the first nitrogen
                if second_min_index < len(first_tritopic_bb_carbon_indexes):
                    second_nitrogen_second_nearest_tritopic_c_index = first_tritopic_bb_carbon_indexes[second_min_index] # obtain index of tritopic bb carbon second closest to the first nitrogen
                else:
                    second_nitrogen_second_nearest_tritopic_c_index = second_tritopic_bb_carbon_indexes[second_min_index-len(first_tritopic_bb_carbon_indexes)] # obtain index of tritopic bb carbon second closest to the first nitrogen


                # get coms of each atom related to the dihedrals
                first_nitrogen_com = cage_frame_data[first_nitrogen_index]
                first_nitrogen_nearest_ditopic_c_com = cage_frame_data[first_nitrogen_nearest_ditopic_c_index]
                first_nitrogen_nearest_tritopic_c_com = cage_frame_data[first_nitrogen_nearest_tritopic_c_index]
                first_nitrogen_second_nearest_tritopic_c_com = cage_frame_data[first_nitrogen_second_nearest_tritopic_c_index]
                second_nitrogen_com = cage_frame_data[second_nitrogen_index]
                second_nitrogen_nearest_ditopic_c_com = cage_frame_data[second_nitrogen_nearest_ditopic_c_index]
                second_nitrogen_nearest_tritopic_c_com = cage_frame_data[second_nitrogen_nearest_tritopic_c_index]
                second_nitrogen_second_nearest_tritopic_c_com = cage_frame_data[second_nitrogen_second_nearest_tritopic_c_index]


                # calculate the four dihedrals
                first_dihedral = stko.calculate_dihedral(first_nitrogen_second_nearest_tritopic_c_com,first_nitrogen_nearest_tritopic_c_com,first_nitrogen_com,first_nitrogen_nearest_ditopic_c_com)
                second_dihedral = stko.calculate_dihedral(first_nitrogen_nearest_tritopic_c_com,first_nitrogen_com,first_nitrogen_nearest_ditopic_c_com,second_nitrogen_nearest_ditopic_c_com)
                third_dihedral = stko.calculate_dihedral(first_nitrogen_nearest_ditopic_c_com,second_nitrogen_nearest_ditopic_c_com,second_nitrogen_com,second_nitrogen_nearest_tritopic_c_com)
                fourth_dihedral = stko.calculate_dihedral(second_nitrogen_nearest_ditopic_c_com,second_nitrogen_com,second_nitrogen_nearest_tritopic_c_com,second_nitrogen_second_nearest_tritopic_c_com)
                
                # add absolute dihedral values to dihedral_data
                dihedral_data[ditopic_bb_index-no_tritopic_bb][0].extend([abs(first_dihedral)])
                dihedral_data[ditopic_bb_index-no_tritopic_bb][1].extend([abs(second_dihedral)])
                dihedral_data[ditopic_bb_index-no_tritopic_bb][2].extend([abs(third_dihedral)])
                dihedral_data[ditopic_bb_index-no_tritopic_bb][3].extend([abs(fourth_dihedral)])

        return dihedral_data
    

if System_type == 2: # for B4/B6/D4/D6 systems
    def track_key_dihedrals():
        '''
        Tracks the four key dihedrals at each tritopic building block, for each frame.

        Returns:
        dihedral_data (list): list[no_tritopic_bbs][no_dihedral_types][no_frames] containing the dihedral angle
                              data for each tritopic building block and type of dihedral, for each frame.        
        '''

        dihedral_data = [[[],[],[],[],[],[]] for _ in range(no_tritopic_bb)] # setup list to store output data

        # define parameters
        no_atoms_in_ditopic_bb = building_blocks[no_tritopic_bb].get_num_atoms()
        no_atoms_in_tritopic_bb = building_blocks[0].get_num_atoms()

        # loop through each frame
        for frame_number in range(no_frames):
            cage_frame_data = np.array(get_cage_frame_data(cage_file_name,frame_number)) # obtain entire cage position matrix for specified frame

            # loop through each tritopic bb
            for tritopic_bb_index in range(no_tritopic_bb):
                adjacent_vector_index = [] # reset adjacent vector index list

                # loop through adjacency_list and, for the current building block, build adjacent_vector_index list, giving the neighbouring building block indexes
                for count in range(len(adjacency_list)):
                    if tritopic_bb_index == adjacency_list[count][0]: # (tritopic index will always be first element of adjacency_list tuple)
                        adjacent_vector_index.extend([adjacency_list[count][1]])

                # get frame indexes of the three tritopic nitrogens related to key dihedrals
                tritopic_upper_index = ((tritopic_bb_index+1)*no_atoms_in_tritopic_bb) # define region in which building block data resides within cage_frame_data
                global_tritopic_index_range = range(tritopic_upper_index - no_atoms_in_tritopic_bb + 2+frame_number*(no_atoms_in_cage+2),tritopic_upper_index + 2+frame_number*(no_atoms_in_cage+2)) # define global index range
                nitrogen_indexes = get_desired_indexes('N',frame_number,global_tritopic_index_range) # get indexes of the nitrogens on the tritopic bb
                first_nitrogen_index = nitrogen_indexes[0] # obtain index of first tritopic bb nitrogen
                second_nitrogen_index = nitrogen_indexes[1] # obtain index of second tritopic bb nitrogen
                third_nitrogen_index = nitrogen_indexes[2] # obtain index of third tritopic bb nitrogen
                #print('N indexes:',nitrogen_indexes)
                

                # get frame indexes for the 2 tritopic carbons nearest to the first nitrogen
                tritopic_bb_carbon_indexes = get_desired_indexes('C',frame_number,global_tritopic_index_range) # get indexes of carbons on the tritopic bb
                first_nitrogen_tritopic_carbons_distances = [[euclidean_distance(cage_frame_data[first_nitrogen_index],cage_frame_data[index])] for index in tritopic_bb_carbon_indexes] # evaluate distances between tritopic bb carbons and the first nitrogen
                first_min_index = first_nitrogen_tritopic_carbons_distances.index(min(first_nitrogen_tritopic_carbons_distances))  # get index of smallest distance
                first_nitrogen_nearest_tritopic_c_index = tritopic_bb_carbon_indexes[first_min_index] # obtain index of tritopic bb carbon closest to the first nitrogen
                reduced_list = first_nitrogen_tritopic_carbons_distances[0:first_min_index]+first_nitrogen_tritopic_carbons_distances[first_min_index+1:]
                second_min_index = reduced_list.index(min(reduced_list))  # get index of second smallest distance
                if second_min_index >= first_min_index:
                    second_min_index = second_min_index +1 # increase second min index by one to counteract possible offset due to removing the first min index number
                first_nitrogen_second_nearest_tritopic_c_index = tritopic_bb_carbon_indexes[second_min_index] # obtain index of tritopic bb carbon closest to the first nitrogen
                # get frame indexes for the 2 tritopic carbons nearest to the second nitrogen
                second_nitrogen_tritopic_carbons_distances = [[euclidean_distance(cage_frame_data[second_nitrogen_index],cage_frame_data[index])] for index in tritopic_bb_carbon_indexes] # evaluate distances between tritopic bb carbons and the second nitrogen
                first_min_index = second_nitrogen_tritopic_carbons_distances.index(min(second_nitrogen_tritopic_carbons_distances))  # get index of smallest distance
                second_nitrogen_nearest_tritopic_c_index = tritopic_bb_carbon_indexes[first_min_index] # obtain index of tritopic bb carbon closest to the second nitrogen
                reduced_list = second_nitrogen_tritopic_carbons_distances[0:first_min_index]+second_nitrogen_tritopic_carbons_distances[first_min_index+1:]
                second_min_index = reduced_list.index(min(reduced_list))  # get index of second smallest distance
                if second_min_index >= first_min_index:
                    second_min_index = second_min_index +1 # increase second min index by one to counteract possible offset due to removing the first min index number
                second_nitrogen_second_nearest_tritopic_c_index = tritopic_bb_carbon_indexes[second_min_index] # obtain index of tritopic bb carbon closest to the first nitrogen
                # get frame indexes for the 2 tritopic carbons nearest to the third nitrogen
                third_nitrogen_tritopic_carbons_distances = [[euclidean_distance(cage_frame_data[third_nitrogen_index],cage_frame_data[index])] for index in tritopic_bb_carbon_indexes] # evaluate distances between tritopic bb carbons and the third nitrogen
                first_min_index = third_nitrogen_tritopic_carbons_distances.index(min(third_nitrogen_tritopic_carbons_distances))  # get index of smallest distance
                third_nitrogen_nearest_tritopic_c_index = tritopic_bb_carbon_indexes[first_min_index] # obtain index of tritopic bb carbon closest to the third nitrogen
                reduced_list = third_nitrogen_tritopic_carbons_distances[0:first_min_index]+third_nitrogen_tritopic_carbons_distances[first_min_index+1:]
                second_min_index = reduced_list.index(min(reduced_list))  # get index of second smallest distance
                if second_min_index >= first_min_index:
                    second_min_index = second_min_index +1 # increase second min index by one to counteract possible offset due to removing the first min index number
                third_nitrogen_second_nearest_tritopic_c_index = tritopic_bb_carbon_indexes[second_min_index] # obtain index of tritopic bb carbon closest to the first nitrogen


                # now we look to find index ranges for the neighbouring ditopic bbs
                first_ditopic_upper_index = (no_tritopic_bb*no_atoms_in_tritopic_bb)+((adjacent_vector_index[0]+1-no_tritopic_bb)*no_atoms_in_ditopic_bb) # define region in which first ditopic building block data resides with cage_frame_data        
                second_ditopic_upper_index = (no_tritopic_bb*no_atoms_in_tritopic_bb)+((adjacent_vector_index[1]+1-no_tritopic_bb)*no_atoms_in_ditopic_bb) # define region in which second ditopic building block data resides with cage_frame_data        
                third_ditopic_upper_index = (no_tritopic_bb*no_atoms_in_tritopic_bb)+((adjacent_vector_index[2]+1-no_tritopic_bb)*no_atoms_in_ditopic_bb) # define region in which third ditopic building block data resides with cage_frame_data        
                first_global_ditopic_index_range = range(first_ditopic_upper_index - no_atoms_in_ditopic_bb + 2+frame_number*(no_atoms_in_cage+2),first_ditopic_upper_index + 2+frame_number*(no_atoms_in_cage+2)) # define global index range
                second_global_ditopic_index_range = range(second_ditopic_upper_index - no_atoms_in_ditopic_bb + 2+frame_number*(no_atoms_in_cage+2),second_ditopic_upper_index + 2+frame_number*(no_atoms_in_cage+2)) # define global index range
                third_global_ditopic_index_range = range(third_ditopic_upper_index - no_atoms_in_ditopic_bb + 2+frame_number*(no_atoms_in_cage+2),third_ditopic_upper_index + 2+frame_number*(no_atoms_in_cage+2)) # define global index range
                first_ditopic_bb_carbon_indexes = get_desired_indexes('C',frame_number,first_global_ditopic_index_range) # get indexes of carbons on the first ditopic bb
                second_ditopic_bb_carbon_indexes = get_desired_indexes('C',frame_number,second_global_ditopic_index_range) # get indexes of carbons on the second ditopic bb
                third_ditopic_bb_carbon_indexes = get_desired_indexes('C',frame_number,third_global_ditopic_index_range) # get indexes of carbons on the third ditopic bb

                # finding the nearest two ditopic carbon indexes to the first nitrogen
                first_nitrogen_ditopic_carbons_distances = [[euclidean_distance(cage_frame_data[first_nitrogen_index],cage_frame_data[index])] for index in first_ditopic_bb_carbon_indexes]+[[euclidean_distance(cage_frame_data[first_nitrogen_index],cage_frame_data[index])] for index in second_ditopic_bb_carbon_indexes]+[[euclidean_distance(cage_frame_data[first_nitrogen_index],cage_frame_data[index])] for index in third_ditopic_bb_carbon_indexes] # evaluate distances between ditopic bb carbons and the first nitrogen
                first_min_index = first_nitrogen_ditopic_carbons_distances.index(min(first_nitrogen_ditopic_carbons_distances))  # get index of smallest distance
                reduced_list = first_nitrogen_ditopic_carbons_distances[0:first_min_index]+first_nitrogen_ditopic_carbons_distances[first_min_index+1:]
                second_min_index = reduced_list.index(min(reduced_list))  # get index of second smallest distance 
                if second_min_index >= first_min_index:
                    second_min_index = second_min_index +1 # increase second min index by one to counteract possible offset due to removing the first min index number
                # get frame indexes of carbons
                if first_min_index < len(first_ditopic_bb_carbon_indexes):
                    first_nitrogen_nearest_ditopic_c_index = first_ditopic_bb_carbon_indexes[first_min_index] # obtain index of ditopic bb carbon closest to the first nitrogen
                elif first_min_index >= len(first_ditopic_bb_carbon_indexes) and first_min_index < len(first_ditopic_bb_carbon_indexes+second_ditopic_bb_carbon_indexes):
                    first_nitrogen_nearest_ditopic_c_index = second_ditopic_bb_carbon_indexes[first_min_index-len(first_ditopic_bb_carbon_indexes)] # obtain index of ditopic bb carbon closest to the first nitrogen
                else:
                    first_nitrogen_nearest_ditopic_c_index = third_ditopic_bb_carbon_indexes[first_min_index-len(first_ditopic_bb_carbon_indexes+second_ditopic_bb_carbon_indexes)] # obtain index of ditopic bb carbon closest to the first nitrogen
                if second_min_index < len(first_ditopic_bb_carbon_indexes):
                    first_nitrogen_second_nearest_ditopic_c_index = first_ditopic_bb_carbon_indexes[second_min_index] # obtain index of ditopic bb carbon second closest to the first nitrogen
                elif second_min_index >= len(first_ditopic_bb_carbon_indexes) and second_min_index < len(first_ditopic_bb_carbon_indexes+second_ditopic_bb_carbon_indexes):
                    first_nitrogen_second_nearest_ditopic_c_index = second_ditopic_bb_carbon_indexes[second_min_index-len(first_ditopic_bb_carbon_indexes)] # obtain index of ditopic bb carbon second closest to the first nitrogen
                else:
                    first_nitrogen_second_nearest_ditopic_c_index = third_ditopic_bb_carbon_indexes[second_min_index-len(first_ditopic_bb_carbon_indexes+second_ditopic_bb_carbon_indexes)] # obtain index of ditopic bb carbon second closest to the first nitrogen

                # repeating and finding the nearest two ditopic carbon indexes to the second nitrogen
                second_nitrogen_ditopic_carbons_distances = [[euclidean_distance(cage_frame_data[second_nitrogen_index],cage_frame_data[index])] for index in first_ditopic_bb_carbon_indexes]+[[euclidean_distance(cage_frame_data[second_nitrogen_index],cage_frame_data[index])] for index in second_ditopic_bb_carbon_indexes]+[[euclidean_distance(cage_frame_data[second_nitrogen_index],cage_frame_data[index])] for index in third_ditopic_bb_carbon_indexes] # evaluate distances between ditopic bb carbons and the second nitrogen
                first_min_index = second_nitrogen_ditopic_carbons_distances.index(min(second_nitrogen_ditopic_carbons_distances))  # get index of smallest distance
                reduced_list = second_nitrogen_ditopic_carbons_distances[0:first_min_index]+second_nitrogen_ditopic_carbons_distances[first_min_index+1:]
                second_min_index = reduced_list.index(min(reduced_list))  # get index of second smallest distance 
                if second_min_index >= first_min_index:
                    second_min_index = second_min_index +1 # increase second min index by one to counteract possible offset due to removing the first min index number
                # get frame indexes of carbons
                if first_min_index < len(first_ditopic_bb_carbon_indexes):
                    second_nitrogen_nearest_ditopic_c_index = first_ditopic_bb_carbon_indexes[first_min_index] # obtain index of ditopic bb carbon closest to the second nitrogen
                elif first_min_index >= len(first_ditopic_bb_carbon_indexes) and first_min_index < len(first_ditopic_bb_carbon_indexes+second_ditopic_bb_carbon_indexes):
                    second_nitrogen_nearest_ditopic_c_index = second_ditopic_bb_carbon_indexes[first_min_index-len(first_ditopic_bb_carbon_indexes)] # obtain index of ditopic bb carbon closest to the second nitrogen
                else:
                    second_nitrogen_nearest_ditopic_c_index = third_ditopic_bb_carbon_indexes[first_min_index-len(first_ditopic_bb_carbon_indexes+second_ditopic_bb_carbon_indexes)] # obtain index of ditopic bb carbon closest to the second nitrogen
                if second_min_index < len(first_ditopic_bb_carbon_indexes):
                    second_nitrogen_second_nearest_ditopic_c_index = first_ditopic_bb_carbon_indexes[second_min_index] # obtain index of ditopic bb carbon second closest to the second nitrogen
                elif second_min_index >= len(first_ditopic_bb_carbon_indexes) and second_min_index < len(first_ditopic_bb_carbon_indexes+second_ditopic_bb_carbon_indexes):
                    second_nitrogen_second_nearest_ditopic_c_index = second_ditopic_bb_carbon_indexes[second_min_index-len(first_ditopic_bb_carbon_indexes)] # obtain index of ditopic bb carbon second closest to the second nitrogen
                else:
                    second_nitrogen_second_nearest_ditopic_c_index = third_ditopic_bb_carbon_indexes[second_min_index-len(first_ditopic_bb_carbon_indexes+second_ditopic_bb_carbon_indexes)] # obtain index of ditopic bb carbon second closest to the second nitrogen
                
                # repeating and finding the nearest two ditopic carbon indexes to the third nitrogen
                third_nitrogen_ditopic_carbons_distances = [[euclidean_distance(cage_frame_data[third_nitrogen_index],cage_frame_data[index])] for index in first_ditopic_bb_carbon_indexes]+[[euclidean_distance(cage_frame_data[third_nitrogen_index],cage_frame_data[index])] for index in second_ditopic_bb_carbon_indexes]+[[euclidean_distance(cage_frame_data[third_nitrogen_index],cage_frame_data[index])] for index in third_ditopic_bb_carbon_indexes] # evaluate distances between ditopic bb carbons and the third nitrogen
                first_min_index = third_nitrogen_ditopic_carbons_distances.index(min(third_nitrogen_ditopic_carbons_distances))  # get index of smallest distance
                reduced_list = third_nitrogen_ditopic_carbons_distances[0:first_min_index]+third_nitrogen_ditopic_carbons_distances[first_min_index+1:]
                second_min_index = reduced_list.index(min(reduced_list))  # get index of second smallest distance 
                if second_min_index >= first_min_index:
                    second_min_index = second_min_index +1 # increase second min index by one to counteract possible offset due to removing the first min index number
                # get frame indexes of carbons
                if first_min_index < len(first_ditopic_bb_carbon_indexes):
                    third_nitrogen_nearest_ditopic_c_index = first_ditopic_bb_carbon_indexes[first_min_index] # obtain index of ditopic bb carbon closest to the third nitrogen
                elif first_min_index >= len(first_ditopic_bb_carbon_indexes) and first_min_index < len(first_ditopic_bb_carbon_indexes+second_ditopic_bb_carbon_indexes):
                    third_nitrogen_nearest_ditopic_c_index = second_ditopic_bb_carbon_indexes[first_min_index-len(first_ditopic_bb_carbon_indexes)] # obtain index of ditopic bb carbon closest to the third nitrogen
                else:
                    third_nitrogen_nearest_ditopic_c_index = third_ditopic_bb_carbon_indexes[first_min_index-len(first_ditopic_bb_carbon_indexes+second_ditopic_bb_carbon_indexes)] # obtain index of ditopic bb carbon closest to the third nitrogen
                if second_min_index < len(first_ditopic_bb_carbon_indexes):
                    third_nitrogen_second_nearest_ditopic_c_index = first_ditopic_bb_carbon_indexes[second_min_index] # obtain index of ditopic bb carbon second closest to the third nitrogen
                elif second_min_index >= len(first_ditopic_bb_carbon_indexes) and second_min_index < len(first_ditopic_bb_carbon_indexes+second_ditopic_bb_carbon_indexes):
                    third_nitrogen_second_nearest_ditopic_c_index = second_ditopic_bb_carbon_indexes[second_min_index-len(first_ditopic_bb_carbon_indexes)] # obtain index of ditopic bb carbon second closest to the third nitrogen
                else:
                    third_nitrogen_second_nearest_ditopic_c_index = third_ditopic_bb_carbon_indexes[second_min_index-len(first_ditopic_bb_carbon_indexes+second_ditopic_bb_carbon_indexes)] # obtain index of ditopic bb carbon second closest to the third nitrogen
                

                # get coms of each atom related to the dihedrals
                first_nitrogen_com = cage_frame_data[first_nitrogen_index]
                first_nitrogen_nearest_ditopic_c_com = cage_frame_data[first_nitrogen_nearest_ditopic_c_index]
                first_nitrogen_second_nearest_ditopic_c_com = cage_frame_data[first_nitrogen_second_nearest_ditopic_c_index]
                first_nitrogen_nearest_tritopic_c_com = cage_frame_data[first_nitrogen_nearest_tritopic_c_index]
                first_nitrogen_second_nearest_tritopic_c_com = cage_frame_data[first_nitrogen_second_nearest_tritopic_c_index]
                second_nitrogen_com = cage_frame_data[second_nitrogen_index]
                second_nitrogen_nearest_ditopic_c_com = cage_frame_data[second_nitrogen_nearest_ditopic_c_index]
                second_nitrogen_second_nearest_ditopic_c_com = cage_frame_data[second_nitrogen_second_nearest_ditopic_c_index]
                second_nitrogen_nearest_tritopic_c_com = cage_frame_data[second_nitrogen_nearest_tritopic_c_index]
                second_nitrogen_second_nearest_tritopic_c_com = cage_frame_data[second_nitrogen_second_nearest_tritopic_c_index]
                third_nitrogen_com = cage_frame_data[third_nitrogen_index]
                third_nitrogen_nearest_ditopic_c_com = cage_frame_data[third_nitrogen_nearest_ditopic_c_index]
                third_nitrogen_second_nearest_ditopic_c_com = cage_frame_data[third_nitrogen_second_nearest_ditopic_c_index]
                third_nitrogen_nearest_tritopic_c_com = cage_frame_data[third_nitrogen_nearest_tritopic_c_index]
                third_nitrogen_second_nearest_tritopic_c_com = cage_frame_data[third_nitrogen_second_nearest_tritopic_c_index]


                # calculate the six dihedrals
                first_dihedral = stko.calculate_dihedral(first_nitrogen_second_nearest_tritopic_c_com,first_nitrogen_nearest_tritopic_c_com,first_nitrogen_com,first_nitrogen_nearest_ditopic_c_com)
                second_dihedral = stko.calculate_dihedral(first_nitrogen_nearest_tritopic_c_com,first_nitrogen_com,first_nitrogen_nearest_ditopic_c_com,first_nitrogen_second_nearest_ditopic_c_com)
                third_dihedral = stko.calculate_dihedral(second_nitrogen_second_nearest_tritopic_c_com,second_nitrogen_nearest_tritopic_c_com,second_nitrogen_com,second_nitrogen_nearest_ditopic_c_com)
                fourth_dihedral = stko.calculate_dihedral(second_nitrogen_nearest_tritopic_c_com,second_nitrogen_com,second_nitrogen_nearest_ditopic_c_com,second_nitrogen_second_nearest_ditopic_c_com)
                fifth_dihedral = stko.calculate_dihedral(third_nitrogen_second_nearest_tritopic_c_com,third_nitrogen_nearest_tritopic_c_com,third_nitrogen_com,third_nitrogen_nearest_ditopic_c_com)
                sixth_dihedral = stko.calculate_dihedral(third_nitrogen_nearest_tritopic_c_com,third_nitrogen_com,third_nitrogen_nearest_ditopic_c_com,third_nitrogen_second_nearest_ditopic_c_com)                
                
                # add absolute dihedral values to dihedral_data
                dihedral_data[tritopic_bb_index][0].extend([abs(first_dihedral)])
                dihedral_data[tritopic_bb_index][1].extend([abs(second_dihedral)])
                dihedral_data[tritopic_bb_index][2].extend([abs(third_dihedral)])
                dihedral_data[tritopic_bb_index][3].extend([abs(fourth_dihedral)])
                dihedral_data[tritopic_bb_index][4].extend([abs(fifth_dihedral)])
                dihedral_data[tritopic_bb_index][5].extend([abs(sixth_dihedral)])
        return dihedral_data



In [None]:
dihedral_data = track_key_dihedrals()

In [None]:
# plot dihedral angles vs frame

if System_type == 1: # for CC3/7/8 cages
    for i in range(len(dihedral_data)):
        # define smoothed curves
        first_dihedral_smooth = gaussian_filter1d(dihedral_data[i][0][2:], sigma=5)  # increase sigma for more smoothing
        second_dihedral_smooth = gaussian_filter1d(dihedral_data[i][1][2:], sigma=5)  # increase sigma for more smoothing
        third_dihedral_smooth = gaussian_filter1d(dihedral_data[i][2][2:], sigma=5)  # increase sigma for more smoothing
        fourth_dihedral_smooth = gaussian_filter1d(dihedral_data[i][3][2:], sigma=5)  # increase sigma for more smoothing

        # plot the raw data
        figure(figsize=[10,6],dpi=300)
        plot(time_data[2:], dihedral_data[i][0][2:], linewidth=0.6, markersize=4, label="Dihedral 1")
        plot(time_data[2:], dihedral_data[i][1][2:], linewidth=0.6, markersize=4, label="Dihedral 2")
        plot(time_data[2:], dihedral_data[i][2][2:], linewidth=0.6, markersize=4, label="Dihedral 3")
        plot(time_data[2:], dihedral_data[i][3][2:], linewidth=0.6, markersize=4, label="Dihedral 4")
        title(f'Dihedral Angles vs Time Raw Data (Building Block {i})', fontsize=15)
        xlabel("Time (ns)", fontsize=15)
        ylabel("Angle (°)", fontsize=15)
        legend()
        figure(figsize=[10,6],dpi=300)
        show()

        # plot the gaussian smoothed curves
        figure(figsize=[10,6],dpi=300)
        plot(time_data[2:], first_dihedral_smooth, linewidth=1, label='Dihedral 1')
        plot(time_data[2:], second_dihedral_smooth, linewidth=1, label='Dihedral 2')
        plot(time_data[2:], third_dihedral_smooth, linewidth=1, label='Dihedral 3')
        plot(time_data[2:], fourth_dihedral_smooth, linewidth=1, label='Dihedral 4')
        title(f'Dihedral Angles vs Time Smoothed Curve (Building Block {i})', fontsize=15)
        xlabel("Time (ns)", fontsize=15)
        ylabel("Angle (°)", fontsize=15)
        legend()
        figure(figsize=[10,6],dpi=300)
        show()

if System_type == 2: # for B4/B6/D4/D6 cages
    for i in range(len(dihedral_data)):
        # define smoothed curves
        first_dihedral_smooth = gaussian_filter1d(dihedral_data[i][0][2:], sigma=5)  # increase sigma for more smoothing
        second_dihedral_smooth = gaussian_filter1d(dihedral_data[i][1][2:], sigma=5)  # increase sigma for more smoothing
        third_dihedral_smooth = gaussian_filter1d(dihedral_data[i][2][2:], sigma=5)  # increase sigma for more smoothing
        fourth_dihedral_smooth = gaussian_filter1d(dihedral_data[i][3][2:], sigma=5)  # increase sigma for more smoothing
        fifth_dihedral_smooth = gaussian_filter1d(dihedral_data[i][4][2:], sigma=5)  # increase sigma for more smoothing
        sixth_dihedral_smooth = gaussian_filter1d(dihedral_data[i][5][2:], sigma=5)  # increase sigma for more smoothing

        # plot the raw data
        figure(figsize=[10,6],dpi=300)
        plot(time_data[2:], dihedral_data[i][0][2:], linewidth=0.6, markersize=4, label="Dihedral 1")
        plot(time_data[2:], dihedral_data[i][1][2:], linewidth=0.6, markersize=4, label="Dihedral 2")
        plot(time_data[2:], dihedral_data[i][2][2:], linewidth=0.6, markersize=4, label="Dihedral 3")
        plot(time_data[2:], dihedral_data[i][3][2:], linewidth=0.6, markersize=4, label="Dihedral 4")
        plot(time_data[2:], dihedral_data[i][4][2:], linewidth=0.6, markersize=4, label="Dihedral 5")
        plot(time_data[2:], dihedral_data[i][5][2:], linewidth=0.6, markersize=4, label="Dihedral 6")
        title(f'Dihedral Angles vs Time Raw Data (Building Block {i})', fontsize=15)
        xlabel("Time (ns)", fontsize=15)
        ylabel("Angle (°)", fontsize=15)
        legend()
        figure(figsize=[10,6],dpi=300)
        show()

        # plot the gaussian smoothed curves
        figure(figsize=[10,6],dpi=300)
        plot(time_data[2:], first_dihedral_smooth, linewidth=1, label='Dihedral 1')
        plot(time_data[2:], second_dihedral_smooth, linewidth=1, label='Dihedral 2')
        plot(time_data[2:], third_dihedral_smooth, linewidth=1, label='Dihedral 3')
        plot(time_data[2:], fourth_dihedral_smooth, linewidth=1, label='Dihedral 4')
        plot(time_data[2:], fifth_dihedral_smooth, linewidth=1, label='Dihedral 5')
        plot(time_data[2:], sixth_dihedral_smooth, linewidth=1, label='Dihedral 6')
        title(f'Dihedral Angles vs Time Smoothed Curve (Building Block {i})', fontsize=15)
        xlabel("Time (ns)", fontsize=15)
        ylabel("Angle (°)", fontsize=15)
        legend()
        figure(figsize=[10,6],dpi=300)
        show()

Hereon is alternative code/ideas that could still be useful.

In [None]:
def track_frame_symmetry_bond_length(bb_com_data, adjacency_list, no_frames):
    '''
    Calculates, for each frame, the standard deviation of all edge lengths of the tetrahedron/cube chape 
    defined by the tetrahedral/cubic shape given by the tritopic building blocks of a 4+6/8+12 cage.
    
    Inputs:
    bb_com_data (list): list[no_bbs][no_frames][coorindates] with building block centre of mass data
    adjacency_list (list): list containting connectivity information between the building blocks. Each 
                           element is a tuple, which represent a nearest neighbour pair. See infer_connectivity
                           function for more detail.
    no_frames (int): the number of frames to be evaluated
    Returns:
    frame_stdevs (list): list containing the standard deviation of the edge lengths at each frame
    '''
        
    frame_stdevs = []  # list to store standard deviation for each frame

    # loop through frames
    for frame_number in range(no_frames):
        # calculate distances for all neighbouring pairs in this frame
        distances = [euclidean_distance(bb_com_data[bb1][frame_number], bb_com_data[bb2][frame_number]) for bb1, bb2 in adjacency_list]

        # calculate standard deviation and store value
        frame_stdev = np.std(distances)
        frame_stdevs.append(frame_stdev)

    return frame_stdevs

In [None]:
# the tritopic plot in particular was expected to provide information on the symmetry of cubic/tetrahedral shape
# that should be formed by the tritopic bbs, but this proved ineffective compared to the tracking of angles 
# method earlier
# to observe plots based on using a polyedron defined from all building blocks or only ditopic building blocks,
# uncomment the relevant lines

figure(figsize=[10,6],dpi=300)
#both_bb_length_symmetry = track_frame_symmetry_bond_length(bb_com_data, adjacency_list, no_frames) # using all building blocks
tritopic_bb_length_symmetry = track_frame_symmetry_bond_length(tritopic_bb_com_data, adjacency_list_tritopic, no_frames) # using only tritopic building blocks
#ditopic_bb_length_symmetry = track_frame_symmetry_bond_length(ditopic_bb_com_data, adjacency_list_ditopic, no_frames) # using only ditopic building blocks
#plot(time_data[2:],both_bb_length_symmetry[2:], linestyle='-',linewidth=0.6, label = 'both bbs')
plot(time_data[2:],tritopic_bb_length_symmetry[2:], linestyle='-',linewidth=0.6, label = 'tritopic bb')
#plot(time_data[2:],ditopic_bb_length_symmetry[2:], linestyle='-',linewidth=0.6, label = 'ditopic bb')
title('Tritopic Building Block Polyhedron: Edge Lengths Standard Deviation vs Time', fontsize=15)
xlabel("Time (ns)", fontsize=15)
ylabel("Standard Deviation", fontsize=15)
legend()
show()

In [None]:
# tracks each individual 'bond' between building blocks and calculates standard deviation and distance of each 
# for each 'bond' over the course of all frames
def track_individual_bond_length(bb_com_data, adjacency_list, no_frames):
    '''
    Calculates, for each polyhedron edge, the standard deviation of its lengths across every frame.
    
    Inputs:
    bb_com_data (list): list[no_bbs][no_frames][coorindates] with building block centre of mass data
    adjacency_list (list): list containting connectivity information between the building blocks. Each 
                           element is a tuple, which represent a nearest neighbour pair. See infer_connectivity
                           function for more detail.
    no_frames (int): the number of frames to be evaluated
    Returns:
    std_devs (dict): dictionary mapping each adjacency list neighbour pair to its edge length standard deviation 
    distances (dict): dictionary mapping each adjacency list neighbour pair to a list containing its edge length
                      for every frame
    '''

    distances = {pair: [] for pair in adjacency_list} # setup dictionary to store each (bb1,bb2) nearest neighbour pair as keys, each associated with an empty list

    # loop through frames
    for frame_number in range(no_frames):
        
        # loop through each neighbor pair and calculate separation (edge length)
        for bb1, bb2 in adjacency_list:
            distance = euclidean_distance(bb_com_data[bb1][frame_number], bb_com_data[bb2][frame_number])
            distances[(bb1, bb2)].append(distance)

    # calculate standard deviation for each edge, stored in a dictionary with (bb1,bb2) nearest neighbour pair as keys
    std_devs = {pair: np.std(dist) for pair, dist in distances.items()}

    print('neighbour pair: stdev mapping:', std_devs)
    return std_devs, distances

In [None]:
ind = track_individual_bond_length(bb_com_data, adjacency_list, no_frames) # define polyhedron with both bbs
ind_tritopic = track_individual_bond_length(tritopic_bb_com_data, adjacency_list_tritopic, no_frames) # define polyhedron with tritopic bbs
ind_ditopic = track_individual_bond_length(ditopic_bb_com_data, adjacency_list_ditopic, no_frames) # define polyhedron with ditopic bbs

In [None]:
# plot edge length values for polyhedron based on both bbs
ind_values = [[] for i in adjacency_list]
for i in range(len(adjacency_list)):
    pair = adjacency_list[i]
    ind_values[i] = ind[1][pair]

figure(figsize=[10,6],dpi=300)
for i in range(len(ind_values)):
    plot(time_data[2:],ind_values[i][2:],linewidth=0.6)

title('Both Building Blocks Polyhedron: Edge Lengths Vs Time', fontsize=15)
xlabel("Time (ns)", fontsize=15)
ylabel("Distance (Å)", fontsize=15)
show()

In [None]:
# plot edge length values for polyhedron based on tritopic bbs
ind_tritopic_values = [[] for i in adjacency_list_tritopic]
for i in range(len(adjacency_list_tritopic)):
    pair = adjacency_list_tritopic[i]
    ind_tritopic_values[i] = ind_tritopic[1][pair]

figure(figsize=[10,6],dpi=300)
for i in range(len(ind_tritopic_values)):
    plot(time_data[2:],ind_tritopic_values[i][2:],linewidth=0.6)

title('Tritopic Building Blocks Polyhedron: Edge Lengths Vs Time', fontsize=15)
xlabel("Time (ns)", fontsize=15)
ylabel("Distance (Å)", fontsize=15)
show()

In [None]:
# plot edge length values for polyhedron based on ditopic bbs
ind_ditopic_values = [[] for i in adjacency_list_ditopic]
for i in range(len(adjacency_list_ditopic)):
    pair = adjacency_list_ditopic[i]
    ind_ditopic_values[i] = ind_ditopic[1][pair]

figure(figsize=[10,6],dpi=300)
for i in range(len(ind_ditopic_values)):
    plot(time_data[2:],ind_ditopic_values[i][2:],linewidth=0.6)

title('Ditopic Building Blocks Polyhedron: Edge Lengths Vs Time', fontsize=15)
xlabel("Time (ns)", fontsize=15)
ylabel("Distance (Å)", fontsize=15)
show()