In [1]:
import os
import numpy as np
import pandas as pd

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [19]:
def process_pdb(pdb_content):
    atom_data = []
    chain_data = []
    current_chain = []
    chain_id = 1

    for line in pdb_content:
        if line.startswith("ATOM"):
            current_chain.append(line)
        elif line.startswith("TER") and current_chain:
            chain_data.append((current_chain, chain_id))
            current_chain = []
            chain_id += 1

    if current_chain:
        chain_data.append((current_chain, chain_id))

    for chain_atoms, cid in chain_data:
        for atom in chain_atoms:
            atom_info = {
                'Atom': 'ATOM',
                'Atom Number': atom[6:11].strip(),
                'Atom Name': atom[12:16].strip(),
                'Residue Name': atom[17:20].strip(),
                'Chain ID': str(cid),
                'Residue Sequence Number': int(atom[22:26].strip()),
                'X': float(atom[30:38].strip()),
                'Y': float(atom[38:46].strip()),
                'Z': float(atom[46:54].strip()),
                'Occupancy': float(atom[54:60].strip()),
                'Temp Factor': float(atom[60:66].strip()),
                'Atom Type': atom[76:].strip()
            }
            atom_data.append(atom_info)

    return pd.DataFrame(atom_data)

In [22]:
def process_pdb_files(input_folder, output_folder):
  for filename in os.listdir(input_folder):
    if filename.endswith('.pdb'):
        with open(os.path.join(input_folder, filename), 'r') as file:
            pdb_content = file.readlines()

        df_pdb = process_pdb(pdb_content)
        output_file_path = os.path.join(output_folder, filename.replace('.pdb', '.csv'))
        df_pdb.to_csv(output_file_path, index=False)
        print(f"Processed and saved: {output_file_path}")
  print('All files processed.')

In [26]:
input_folder = '/content/drive/MyDrive/weekend_project/pdb_folder/A_DNA'
output_folder = '/content/drive/MyDrive/weekend_project/csv_folder/A -DNA '
process_pdb_files(input_folder, output_folder)

Processed and saved: /content/drive/MyDrive/weekend_project/csv_folder/A -DNA /A_0.csv
Processed and saved: /content/drive/MyDrive/weekend_project/csv_folder/A -DNA /A_1.csv
Processed and saved: /content/drive/MyDrive/weekend_project/csv_folder/A -DNA /A_2.csv
Processed and saved: /content/drive/MyDrive/weekend_project/csv_folder/A -DNA /A_3.csv
Processed and saved: /content/drive/MyDrive/weekend_project/csv_folder/A -DNA /A_4.csv
Processed and saved: /content/drive/MyDrive/weekend_project/csv_folder/A -DNA /A_5.csv
Processed and saved: /content/drive/MyDrive/weekend_project/csv_folder/A -DNA /A_6.csv
Processed and saved: /content/drive/MyDrive/weekend_project/csv_folder/A -DNA /A_7.csv
Processed and saved: /content/drive/MyDrive/weekend_project/csv_folder/A -DNA /A_8.csv
Processed and saved: /content/drive/MyDrive/weekend_project/csv_folder/A -DNA /A_9.csv
All files processed.
