In [5]:
def get_residues(list_of_files,location,folder,interacting_chain):
    interacting_residues = []
    output_list = []
    count_sites = {}
    for file_name in list_of_files:
        
        if file_name.endswith(".cif"):
            
            cmd.reinitialize()
        
            cmd.load(f'{location}/{folder}/{file_name}')
            cmd.select('chainA', 'chain A')
            cmd.select('chainB', 'chain B')
            if interacting_chain == "A":
                cmd.select('close_residues', '(chainA within 5 of chainB)')
            else:
                cmd.select('close_residues', '(chainB within 5 of chainA)')
            output = []
            cmd.iterate('close_residues', 'output.append(f"{resi}\t{chain}\t{resn}")', space={'output': output})
            return(output)
            

In [6]:
from Bio import SeqIO
import os
import json
from pymol import cmd
import statistics
from joblib import Parallel, delayed

def process_folder(folder, location, interacting_chain, diapause_species, non_diapause_species, site_count_dictionary):
    species = "_".join(folder.split("_")[:-1])
    species = species[0].upper() + species[1:]
    
    print(f"Processing species: {species}")
    
    if species not in diapause_species and species not in non_diapause_species:
        print(species, "missing")
        return
    
    print(species)
    list_of_files_all = os.listdir(f"{location}/{folder}")
    all_interaction = {}
    
    if species not in site_count_dictionary:
        site_count_dictionary[species] = {}
    
    for file_name in list_of_files_all:
        if file_name.endswith(".cif"):
            print(f"Processing file: {file_name} for species {species}")
            list_of_files = [file_name]
            file_number = file_name.split("_")[-1][0]
            
            interacting_residues = set(get_residues(list_of_files, location, folder, interacting_chain))
            print(interacting_residues)
            assert False
            print(f"Number of interacting residues: {len(interacting_residues)}")
            
            iteraction_dictionary = get_interacting_residues_on_other_chain(
                list_of_files, location, folder, interacting_chain, interacting_residues, interacting_residues
            )
            
            for key, value in iteraction_dictionary.items():
                for target_site in value:
                    if species == "Nymphalis_c_album":
                        species = "Nymphalis_c-album"
                    site_pair = f"{Timeless_site_dict[species][int(key)]}_{cry2_site_dict[species][int(target_site)]}"
                    if species == "Nymphalis_c-album":
                        species = "Nymphalis_c_album"
                    site_count_dictionary[species].setdefault(site_pair, 0)
                    site_count_dictionary[species][site_pair] += 1
    
    return site_count_dictionary

location = "/mnt/d/from_desktop/alphafold_work/19.tim-cry2-all"

diapause_species = ["Danaus_plexippus","Nymphalis_c_album","Nymphalis_polychloros","Nymphalis_urticae","Nymphalis_io","Lysandra_coridon","Plebejus_argus","Aporia_crataegi","Argynnis_bischoffii_washingtonia","Aricia_artaxerxes","Aricia_agestis","Boloria_euphrosyne","Brenthis_hecate","Brenthis_daphne","Brenthis_ino","Coenonympha_glycerion","Colias_nastes","Cyaniris_semiargus","Erebia_aethiops","Erebia_ligea","Euphydryas_editha","Fabriciana_adippe","Hipparchia_semele","Lasiommata_megera","Lysandra_bellargus","Maniola_hyperantus","Maniola_jurtina","Melanargia_galathea","Melitaea_cinxia","Mellicta_athalia","Oeneis_ivallda","Parnassius_glacialis","Phengaris_arion","Polyommatus_icarus","Colias_eurytheme","Pararge_aegeria","Boloria_selene","Calycopis_cecrops","Anthocharis_cardamines","Battus_philenor","Celastrina_argiolus","Glaucopsyche_alexis","Leptidea_juvernica","Leptidea_sinapis","Leptidea_reali","Papilio_xuthus","Papilio_machaon","Papilio_glaucus","Pieris_napi","Pieris_mannii","Pieris_rapae","Pieris_brassicae","Hypolimnas_misippus","Pieris_melete"]
non_diapause_species = ["Bicyclus_anynana","Colias_croceus","Danaus_chrysippus","Dircenna_loreta","Dryadula_phaetusa","Dryas_iulia_moderata","Eueides_isabella","Eurema_hecabe","Heliconius_charithonia","Heliconius_nattereri","Heliconius_sara","Leptophobia_aripa","Mechanitis_messenoides","Mechanitis_mazaeus","Melinaea_menophilus","Melinaea_marsaeus_rileyi","Napeogenes_sylphis","Napeogenes_inachia","Ornithoptera_alexandrae","Ornithoptera_priamus","Papilio_demoleus","Papilio_protenor","Papilio_polytes","Papilio_memnon","Papilio_dardanus_tibullus","Papilio_elwesi","Philaethria_dido","Phoebis_sennae","Teinopalpus_imperialis","Troides_oblongomaculatus","Troides_aeacus","Vanessa_cardui","Vanessa_atalanta","Vanessa_tameamea","Zerene_cesonia"]

site_count_dictionary = {}
interacting_chain_list = ["A"]

for interacting_chain in interacting_chain_list:
    list_of_interaction_folders = [f for f in os.listdir(location) if f != "desktop.ini"]
    list_of_interaction_folders = ["anthocharis_cardamines_timelessxcry2","aporia_crataegi_timelessxcry2","argynnis_bischoffii_washingtonia_timelessxcry2","aricia_agestis_timelessxcry2"]
    # results = Parallel(n_jobs=4)(
    #     delayed(process_folder)(folder, location, interacting_chain, diapause_species, non_diapause_species, site_count_dictionary)
    #     for folder in list_of_interaction_folders
    # )
    for folder in list_of_interaction_folders:
        print(folder)
        print(process_folder(folder, location, interacting_chain, diapause_species, non_diapause_species, site_count_dictionary))
    for result in results:
        if result:
            for species, site_data in result.items():
                if species not in site_count_dictionary:
                    site_count_dictionary[species] = {}
                site_count_dictionary[species].update(site_data)

print(site_count_dictionary)

anthocharis_cardamines_timelessxcry2
Processing species: Anthocharis_cardamines
Anthocharis_cardamines
Processing file: fold_anthocharis_cardamines_timelessxcry2_model_0.cif for species Anthocharis_cardamines
{'44\tA\tTHR', '173\tA\tPRO', '508\tA\tLYS', '556\tA\tPRO', '570\tA\tGLU', '49\tA\tLEU', '1186\tA\tGLU', '1096\tA\tPHE', '1178\tA\tVAL', '11\tA\tHIS', '1220\tA\tPHE', '687\tA\tASP', '87\tA\tVAL', '1104\tA\tARG', '88\tA\tASN', '699\tA\tGLU', '9\tA\tGLN', '1095\tA\tGLU', '1047\tA\tVAL', '918\tA\tGLU', '1172\tA\tASP', '920\tA\tMET', '171\tA\tHIS', '245\tA\tMET', '2\tA\tGLU', '637\tA\tLEU', '1053\tA\tTYR', '4\tA\tVAL', '683\tA\tGLU', '738\tA\tASN', '1057\tA\tLYS', '1089\tA\tVAL', '1185\tA\tPRO', '492\tA\tLEU', '236\tA\tVAL', '1090\tA\tVAL', '695\tA\tHIS', '47\tA\tLYS', '238\tA\tTHR', '1216\tA\tPHE', '1102\tA\tHIS', '494\tA\tILE', '167\tA\tARG', '504\tA\tTHR', '12\tA\tSER', '114\tA\tGLU', '1211\tA\tARG', '1170\tA\tALA', '229\tA\tLEU', '55\tA\tSER', '106\tA\tTHR', '54\tA\tARG', '727\tA\

AssertionError: 