In [None]:
Create a python function that calculates the mean of the minimum distance between any two residues
pairs found in the same chain of a PDB. The script, when executed by command line, should output in
standard output the mean distance for each chain (with 4 decimal positions). The python script should use
a single argument corresponding to the PDB file path to use. This command line argument is optional. If
the PDB file path is not defined, read the PDB file from standard input.
The function must return a dictionary with chains as keys and mean minimum distances as values. It uses
a single argument which specifies the path of the PDB file If the argument pdb_file_path is None, read the
PDB file from standard input.

calculate_pdb_chain_mean_minimum_distances(pdb_file_path)

When the file is imported as a module, it should not execute the function. The function should only be
called when the script is executed by command line.

Sample output in command line:
A: 22.7400
B: 20.4224
N: 23.9393
F: 23.9730
J: 23.4187
Conditions:

● Read the pdb file parsing the pdb file using your code (no third-party
libraries)
● PDB File format specification:

PDB format is positional! (i.e. the information is located in specific positions in each line)

In [26]:

import sys #importing modules 
def calculate_pdb_chain_mean_minimum_distances(pdb_file_path, pdb = None:
    
    atoms_position = []
    residues = {}
     
    with open(pdb_file_path, 'r') as fd:
        file = fd.readlines()
        
        for line in file:
            
            if line.startswith('ATOM'):
                sequence = line[21].replace(" ", "")
                
                res_num = int(line[22:26])
                res_name = line[17:20].replace(" ", "")
                residues = '_'.join([str(res_num), res_name])
                
                #print(residues)
                position_x = float(line[30:38])
                position_y = float(line[38:46])
                position_z = float(line[46:54])
                #print(position_x, position_y, position_z)
                
                atoms_position.append((position_x, position_y, position_z))
                
                res_id = line[6:11].replace(" ", "")
        #print(atoms_position)
                
    

calculate_pdb_chain_mean_minimum_distances('2y9h.pdb')

In [11]:
import sys

def calculate_pdb_chain_mean_minimum_distances(pdb_file_path):
    atoms_positions = {}
    
    with open(pdb_file_path, 'r') as fd:
        file = fd.readlines()
        
        for line in file:
            if line.startswith('ATOM'):
                chain_id = line[21]
                position_x = float(line[30:38].strip())
                position_y = float(line[38:46].strip())
                position_z = float(line[46:54].strip())
                
                if chain_id not in atoms_positions:
                    atoms_positions[chain_id] = []
                
                atoms_positions[chain_id].append((position_x, position_y, position_z))
    
    # Calculate mean minimum distances for each chain
    mean_distances = {}
    
    for chain_id, positions in atoms_positions.items():
        min_distance = float('inf')
        
        for i in range(len(positions) - 1):
            for j in range(i + 1, len(positions)):
                distance = ((positions[i][0] - positions[j][0])**2 +
                            (positions[i][1] - positions[j][1])**2 +
                            (positions[i][2] - positions[j][2])**2) ** 0.5
                
                if distance < min_distance:
                    min_distance = distance
        
        mean_distances[chain_id] = min_distance

    # Print the result in the specified format
    for chain_id, mean_distance in mean_distances.items():
        print(f"{chain_id}: {mean_distance:.4f}")

    return mean_distances

if __name__ == "__main__":
    import sys
    
    # Check if a PDB file path is provided as a command line argument
    if len(sys.argv) > 1:
        pdb_file_path = sys.argv[1]
    else:
        # Read the PDB file from standard input if no file path is provided
        pdb_file_path = None

    # Call the function and print the mean distances
    calculate_pdb_chain_mean_minimum_distances('2y9h.pdb')
    
    #python pdb_parser.py 2y9h.pdb


A: 1.1745
B: 1.2158
C: 1.1526
D: 1.2060
E: 1.1718
F: 1.2107
G: 1.1713
H: 1.2117
I: 1.2024
J: 1.2078
K: 1.2051
L: 1.2172
M: 1.2128
N: 1.2041
O: 1.2221
P: 1.2178


In [12]:
import sys
import math
def calculate_pdb_chain_mean_minimum_distances(pdb_file_path):
    atoms_positions = {}

    with open(pdb_file_path, 'r') as fd:
        file = fd.readlines()

        for line in file:
            if line.startswith('ATOM'):
                chain_id = line[21]
                position_x = float(line[30:38].strip())
                position_y = float(line[38:46].strip())
                position_z = float(line[46:54].strip())

                if chain_id not in atoms_positions:
                    atoms_positions[chain_id] = []

                atoms_positions[chain_id].append((position_x, position_y, position_z))

    # Calculate mean minimum distances for each chain
    mean_distances = {}

    for chain_id, positions in atoms_positions.items():
        total_distance = 0
        pair_count = 0

        for i in range(len(positions) - 1):
            for j in range(i + 1, len(positions)):
                distance = ((positions[i][0] - positions[j][0])**2 +
                            (positions[i][1] - positions[j][1])**2 +
                            (positions[i][2] - positions[j][2])**2) ** 0.5

                total_distance += distance
                pair_count += 1

        if pair_count > 0:
            mean_distances[chain_id] = total_distance / pair_count
        else:
            mean_distances[chain_id] = 0

    # Print the result in the specified format
    for chain_id, mean_distance in mean_distances.items():
        print(f"{chain_id}: {mean_distance:.4f}")

    return mean_distances

if __name__ == "__main__":
    import sys

    # Check if a PDB file path is provided as a command line argument
    if len(sys.argv) > 1:
        pdb_file_path = sys.argv[1]
    else:
        # Read the PDB file from standard input if no file path is provided
        pdb_file_path = None

    # Call the function and print the mean distances
    calculate_pdb_chain_mean_minimum_distances('2y9h.pdb')


A: 23.6736
B: 15.4708
C: 23.6569
D: 15.1061
E: 23.4883
F: 15.3731
G: 22.8161
H: 15.0981
I: 22.4109
J: 15.5233
K: 22.2154
L: 15.5913
M: 22.3007
N: 15.1877
O: 21.6406
P: 15.2511


In [11]:
import sys
def calculate_pdb_chain_mean_minimum_distances(pdb_file_path):
    atoms_positions = {}

    with open(pdb_file_path, 'r') as fd:
        file = fd.readlines()

        for line in file:
            if line.startswith('ATOM'):
                chain_id = line[21]
                position_x = float(line[30:38].strip())
                position_y = float(line[38:46].strip())
                position_z = float(line[46:54].strip())

                if chain_id not in atoms_positions:
                    atoms_positions[chain_id] = []

                atoms_positions[chain_id].append((position_x, position_y, position_z))

    # Calculate mean minimum distances for each chain
    mean_distances = {}

    for chain_id, positions in atoms_positions.items():
        #print(f"Chain {chain_id} positions: {positions}")
        total_distance = 0
        pair_count = 0

        for i in range(len(positions) - 1):
            for j in range(i + 1, len(positions)):
                distance = ((positions[i][0] - positions[j][0])**2 +
                            (positions[i][1] - positions[j][1])**2 +
                            (positions[i][2] - positions[j][2])**2) ** 0.5

                total_distance += distance
                pair_count += 1

        if pair_count > 0:
            mean_distance = total_distance / pair_count
            mean_distances[chain_id] = mean_distance
        else:
            mean_distances[chain_id] = 0

    # Print the result in the specified format
    for chain_id, mean_distance in mean_distances.items():
        print(f"{chain_id}: {mean_distance:.4f}")

    return mean_distances

if __name__ == "__main__":
    import sys

    # Check if a PDB file path is provided as a command line argument
    if len(sys.argv) > 1:
        pdb_file_path = sys.argv[1]
    else:
        # Read the PDB file from standard input if no file path is provided
        pdb_file_path = None

    # Call the function and print the mean distances
    calculate_pdb_chain_mean_minimum_distances('2y9h.pdb')


Chain A positions: [(-19.432, -5.094, 35.75), (-19.331, -6.518, 35.475), (-20.065, -6.881, 34.198), (-20.426, -5.995, 33.419), (-20.278, -8.177, 33.973), (-21.03, -8.644, 32.807), (-22.11, -9.617, 33.254), (-22.123, -10.03, 34.415), (-20.104, -9.313, 31.806), (-23.005, -9.991, 32.339), (-24.05, -10.951, 32.659), (-24.243, -12.004, 31.577), (-23.74, -11.858, 30.46), (-25.352, -10.224, 32.898), (-25.208, -9.023, 33.799), (-26.824, -8.309, 34.093), (-27.756, -9.785, 34.502), (-24.974, -13.067, 31.904), (-25.171, -14.163, 30.955), (-26.605, -14.653, 30.855), (-27.298, -14.808, 31.858), (-24.238, -15.325, 31.278), (-22.801, -14.964, 31.102), (-22.014, -14.308, 31.991), (-21.985, -15.224, 29.949), (-20.753, -14.144, 31.468), (-20.709, -14.705, 30.219), (-22.213, -15.854, 28.719), (-19.661, -14.786, 29.305), (-21.168, -15.936, 27.811), (-19.909, -15.408, 28.111), (-27.048, -14.881, 29.627), (-28.381, -15.403, 29.398), (-28.239, -16.68, 28.638), (-27.465, -16.759, 27.684), (-29.229, -14.443, 2

In [12]:
import sys

def calculate_pdb_chain_mean_minimum_distances(pdb_file_path):
    residues_positions = {}

    with open(pdb_file_path, 'r') as fd:
        file = fd.readlines()

        for line in file:
            if line.startswith('ATOM'):
                chain_id = line[21]
                residue_num = int(line[22:26])
                residue_name = line[17:20].strip()
                position_x = float(line[30:38].strip())
                position_y = float(line[38:46].strip())
                position_z = float(line[46:54].strip())

                residue_key = f"{residue_num}_{residue_name}"

                if chain_id not in residues_positions:
                    residues_positions[chain_id] = {}

                if residue_key not in residues_positions[chain_id]:
                    residues_positions[chain_id][residue_key] = []

                residues_positions[chain_id][residue_key].append((position_x, position_y, position_z))

    # Calculate minimum distances for each pair of residues
    min_distances = {}

    for chain_id, residues in residues_positions.items():
        min_distances[chain_id] = {}

        for residue_key_i, positions_i in residues.items():
            for residue_key_j, positions_j in residues.items():
                if residue_key_i != residue_key_j:
                    min_distance = float('inf')

                    for position_i in positions_i:
                        for position_j in positions_j:
                            distance = sum((a - b) ** 2 for a, b in zip(position_i, position_j)) ** 0.5
                            min_distance = min(min_distance, distance)

                    min_distances[chain_id][(residue_key_i, residue_key_j)] = min_distance

    # Calculate mean distance for each chain
    mean_distances = {}

    for chain_id, distances in min_distances.items():
        mean_distance = sum(distances.values()) / len(distances)
        mean_distances[chain_id] = mean_distance

    # Print the result in the specified format
    for chain_id, mean_distance in mean_distances.items():
        print(f"{chain_id}: {mean_distance:.4f}")

    return mean_distances

if __name__ == "__main__":
    import sys

    # Check if a PDB file path is provided as a command line argument
    if len(sys.argv) > 1:
        pdb_file_path = sys.argv[1]
    else:
        # Read the PDB file from standard input if no file path is provided
        pdb_file_path = None

    # Call the function and print the mean distances
    calculate_pdb_chain_mean_minimum_distances('2y9h.pdb')


A: 20.4243
B: 11.1688
C: 20.3878
D: 10.9331
E: 20.2510
F: 11.1213
G: 19.4661
H: 10.8282
I: 19.0957
J: 11.1429
K: 18.8803
L: 11.0262
M: 18.9889
N: 10.1459
O: 19.3353
P: 10.0516
