In [1]:
# %load_ext autotime
# !pip install autotime
%load_ext autoreload
%autoreload 2

import sys
from pathlib import Path
import warnings
import pandas as pd
import numpy as np
import time
import esm
import biotite.structure.io as bsio
from proteinttt.models.esmfold import ESMFoldTTT, DEFAULT_ESMFOLD_TTT_CFG
from proteinttt.utils.structure import calculate_tm_score, lddt_score
import torch
import argparse
import os


pd.set_option("display.max_columns", 500)
pd.set_option("display.max_rows", 100)
pd.set_option("max_colwidth", 1000)

  __import__("pkg_resources").declare_namespace(__name__)


In [2]:
base_path = Path("/scratch/project/open-35-8/pimenol1/ProteinTTT/ProteinTTT/data/bfvd/")
SUMMARY_PATH = base_path / 'proteinttt_results.tsv'
MSA_PATH = Path("/scratch/project/open-35-8/antonb/bfvd/bfvd_msa")
SUBSET_PATH = base_path / 'proteinttt_msa_testset.tsv'

# Choosing data

In [14]:
df = pd.read_csv(SUMMARY_PATH, sep="\t")
df.head(2)

Unnamed: 0,id,nmsa,pLDDT_AlphaFold,version,sequence,length,pLDDT_before,pLDDT_after,tm_score_before,lddt_before,tm_score_after,lddt_after,time,difference_plddt
0,A7IUX3,3,40.184184,BASE,MYKMSLLVLTQEFQEEDMPWFGTAFLHPHVPNRGPCPSFHLDNPKNLLWVCEWYTCIYLAWDQDNQYTLGMMHTEIRRIHLPSHDRCLGFPGDTSMCCQLGLS,103,28.709551,36.696726,0.28594,0.331706,0.33734,0.35068,36.788191,7.987175
1,A0A6M9TZY3,7,40.185052,BASE+LOGAN+12CY,MLLDKPDQHVITEFDAFMENDKIILNGIFNDTAKKALNLSLPLGFFSISNYIYFNLPLLEFPSFVNNYTFKDPLHKERIHYSFELSNVYNNLQTDYFNNVIRHPLPKTKPVITGIKVLPKITRSIEGPSSRINNSDKDFNHREVHLERKKPYDKKPNMRETDQQNMPPPPLPKMKKIKTTSAESAAAPSSSQPANINDIDAHLQKYHQLFTKLIQEKEFKKYQKFVDDFKVFVTPIYHVIQANTLKYKPLIETYNRYVLECFVRHWKIKTTDNLKSSLHAISNTEYDNFIASYHNFNEMYITLNDMSNIKEDPDYTIIKEFVHEIVRLIGINNNI,335,42.312089,58.246706,0.4324,0.533388,0.39168,0.544333,185.468657,15.934617


In [28]:
df1 = df.query('pLDDT_AlphaFold > 90').sample(250)

In [30]:
df2 = df1.query('pLDDT_before <50')
df3 = df1.query('pLDDT_before >50')

df_r = pd.concat([df2.sample(40), df3.sample(60)])

In [37]:
df_r.to_csv(SUBSET_PATH, sep="\t", index=False)

In [7]:
df['pLDDT_0.0004_4'].describe()

count    97.000000
mean     74.738417
std      14.479082
min      32.623285
25%      68.554261
50%      77.803222
75%      85.509580
max      93.753057
Name: pLDDT_0.0004_4, dtype: float64

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import glob

def plot_mean_plddt_vs_step(logs_dir, hyperparams=None, output_path=None):
    """
    Create plots showing mean plddt across all proteins at each step, 
    organized by hyperparameter combinations.
    
    Args:
        logs_dir: Path to directory containing log files (e.g., 'logs_msa_0.0004_4')
        hyperparams: Optional dict with parameter mappings if you want to organize by params
                    e.g., {'lr': [4e-5, 4e-4, 4e-3], 'ags': [4, 8, 16, 32]}
        output_path: Optional path to save the figure
    """
    
    # Read all log files
    log_files = glob.glob(str(Path(logs_dir) / '*_log.tsv'))
    
    if not log_files:
        print(f"No log files found in {logs_dir}")
        return
    
    # Collect data from all log files
    all_data = []
    for log_file in log_files:
        try:
            # Read with quoting to handle multiline pdb column
            df = pd.read_csv(log_file, sep='\t', quoting=1)  # QUOTE_MINIMAL
            all_data.append(df)
        except Exception as e:
            print(f"Error reading {log_file}: {e}")
            continue
    
    if not all_data:
        print("No valid log files could be read")
        return
    
    # Combine all dataframes
    combined_df = pd.concat(all_data, ignore_index=True)
    
    # Calculate mean plddt for each step across all proteins
    mean_plddt = combined_df.groupby('step')['plddt'].mean()
    
    # If hyperparams are provided, create subplots for each combination
    if hyperparams:
        # Extract parameter values from hyperparams dict
        param1_name = list(hyperparams.keys())[0]
        param1_values = hyperparams[param1_name]
        param2_name = list(hyperparams.keys())[1]
        param2_values = hyperparams[param2_name]
        
        nrows, ncols = len(param2_values), len(param1_values)
        fig, axes = plt.subplots(nrows, ncols, figsize=(15, 12))
        
        # Flatten axes for easier iteration
        axes = axes.flatten() if nrows > 1 or ncols > 1 else [axes]
        
        for i, (lr, ags) in enumerate([(lr, ags) for ags in param2_values for lr in param1_values]):
            # Filter data for this hyperparameter combination
            subset_df = combined_df[combined_df['step'].notna()]  # Placeholder - you'd need to filter by actual hyperparams
            mean_plddt_subset = subset_df.groupby('step')['plddt'].mean()
            
            ax = axes[i]
            ax.plot(mean_plddt_subset.index, mean_plddt_subset.values, 
                   'o-', color='green', linewidth=2, markersize=5, label='Mean pLDDT')
            ax.set_xlabel('Step', fontsize=10)
            ax.set_ylabel('Mean pLDDT', fontsize=10, color='green')
            ax.tick_params(axis='y', labelcolor='green')
            ax.set_title(f'{param1_name}: {lr}, {param2_name}: {ags}', fontsize=9)
            ax.grid(True, alpha=0.3)
            ax.legend(loc='upper left')
        
        plt.tight_layout()
    else:
        # Simple single plot
        fig, ax = plt.subplots(figsize=(10, 6))
        ax.plot(mean_plddt.index, mean_plddt.values, 
               'o-', color='green', linewidth=2, markersize=5, label='Mean pLDDT')
        ax.set_xlabel('Step', fontsize=12)
        ax.set_ylabel('Mean pLDDT', fontsize=12, color='green')
        ax.tick_params(axis='y', labelcolor='green')
        ax.set_title('Mean pLDDT vs Step (All Proteins)', fontsize=14)
        ax.grid(True, alpha=0.3)
        ax.legend(loc='best')
        plt.tight_layout()
    
    if output_path:
        plt.savefig(output_path, dpi=300, bbox_inches='tight')
    
    plt.show()
    
    return mean_plddt


# Example usage:
if __name__ == "__main__":
    from pathlib import Path
    
    base_path = Path("/scratch/project/open-35-8/pimenol1/ProteinTTT/ProteinTTT/data/bfvd/")
    
    # Simple plot for a specific log directory
    plot_mean_plddt_vs_step(base_path / 'logs_msa_0.0004_4')
    
    # Or with hyperparameter organization
    hyperparams = {
        'lr': [4e-5, 4e-4, 4e-3],
        'ags': [4, 8, 16, 32]
    }
    # plot_mean_plddt_vs_step(base_path, hyperparams=hyperparams)

Unnamed: 0,step,accumulated_step,loss,score_seq_time,eval_step_time,plddt,pdb
0,0,0,,0.0,0.896465,56.824909,PARENT N/A\nATOM 1 N MET A 1 6.278 11.512 -0.976 1.00 48.36 N \nATOM 2 CA MET A 1 7.444 10.711 -1.337 1.00 48.81 C \nATOM 3 C MET A 1 7.021 9.354 -1.891 1.00 49.58 C \nATOM 4 CB MET A 1 8.361 10.519 -0.128 1.00 44.87 C \nATOM 5 O MET A 1 6.069 8.746 -1.396 1.00 48.07 O \nATOM 6 CG MET A 1 9.826 10.341 -0.492 1.00 43.01 C \nATOM 7 SD MET A 1 10.908 10.221 0.985 1.00 50.26 S \nATOM 8 CE MET A 1 11.333 8.458 0.928 1.00 40.17 C \nATOM 9 N SER A 2 7.295 9.083 -3.179 1.00 52.88 N \nATOM 10 CA SER A 2 6.908 7.842 -3.843 1.00 52.61 C \nATOM 11 C SER A 2 8.014 6.797 -3.748 1.00 53.69 C \nATOM 12 CB SER A 2 6.566 8.103 -5.310 1.00 50.52 C \n...
1,1,2,2.378906,0.0,0.901289,88.542816,PARENT N/A\nATOM 1 N MET A 1 13.045 -4.260 3.781 1.00 84.54 N \nATOM 2 CA MET A 1 12.433 -5.395 4.466 1.00 86.60 C \nATOM 3 C MET A 1 11.079 -5.734 3.854 1.00 89.92 C \nATOM 4 CB MET A 1 12.273 -5.100 5.959 1.00 82.19 C \nATOM 5 O MET A 1 10.311 -4.838 3.498 1.00 89.25 O \nATOM 6 CG MET A 1 11.711 -6.265 6.757 1.00 71.12 C \nATOM 7 SD MET A 1 11.628 -5.910 8.555 1.00 66.55 S \nATOM 8 CE MET A 1 10.520 -4.473 8.547 1.00 61.84 C \nATOM 9 N SER A 2 10.803 -7.009 3.620 1.00 90.84 N \nATOM 10 CA SER A 2 9.534 -7.495 3.086 1.00 92.15 C \nATOM 11 C SER A 2 8.510 -7.701 4.197 1.00 91.80 C \nATOM 12 CB SER A 2 9.740 -8.804 2.322 1.00 88.99 C \n...
2,2,4,2.716797,0.0,0.897574,57.763851,PARENT N/A\nATOM 1 N MET A 1 3.945 12.541 -0.458 1.00 51.57 N \nATOM 2 CA MET A 1 5.191 12.006 -1.000 1.00 52.10 C \nATOM 3 C MET A 1 4.984 10.602 -1.558 1.00 52.77 C \nATOM 4 CB MET A 1 6.281 11.987 0.074 1.00 48.03 C \nATOM 5 O MET A 1 4.199 9.823 -1.013 1.00 50.98 O \nATOM 6 CG MET A 1 7.693 11.964 -0.487 1.00 46.20 C \nATOM 7 SD MET A 1 8.972 12.103 0.822 1.00 52.05 S \nATOM 8 CE MET A 1 10.079 10.752 0.333 1.00 43.44 C \nATOM 9 N SER A 2 5.350 10.361 -2.850 1.00 55.59 N \nATOM 10 CA SER A 2 5.116 9.089 -3.527 1.00 55.23 C \nATOM 11 C SER A 2 6.397 8.267 -3.616 1.00 56.43 C \nATOM 12 CB SER A 2 4.552 9.323 -4.929 1.00 53.14 C \n...
3,3,6,2.359375,0.0,0.897136,66.626373,PARENT N/A\nATOM 1 N MET A 1 7.899 12.151 0.100 1.00 65.74 N \nATOM 2 CA MET A 1 9.035 11.295 -0.231 1.00 67.81 C \nATOM 3 C MET A 1 8.651 10.267 -1.289 1.00 68.67 C \nATOM 4 CB MET A 1 9.557 10.587 1.021 1.00 60.37 C \nATOM 5 O MET A 1 7.528 9.758 -1.289 1.00 64.69 O \nATOM 6 CG MET A 1 10.830 9.789 0.787 1.00 54.82 C \nATOM 7 SD MET A 1 11.504 9.072 2.336 1.00 56.52 S \nATOM 8 CE MET A 1 10.509 7.558 2.447 1.00 46.90 C \nATOM 9 N SER A 2 9.419 10.094 -2.315 1.00 70.62 N \nATOM 10 CA SER A 2 9.133 9.169 -3.407 1.00 71.66 C \nATOM 11 C SER A 2 9.982 7.907 -3.298 1.00 71.98 C \nATOM 12 CB SER A 2 9.377 9.842 -4.758 1.00 67.29 C \n...
4,4,8,2.732422,0.0,0.893203,69.801064,PARENT N/A\nATOM 1 N MET A 1 9.687 10.562 -0.684 1.00 68.60 N \nATOM 2 CA MET A 1 10.689 9.546 -0.994 1.00 71.40 C \nATOM 3 C MET A 1 10.148 8.537 -2.000 1.00 72.28 C \nATOM 4 CB MET A 1 11.137 8.827 0.280 1.00 63.38 C \nATOM 5 O MET A 1 8.962 8.204 -1.975 1.00 68.14 O \nATOM 6 CG MET A 1 12.262 7.829 0.057 1.00 56.89 C \nATOM 7 SD MET A 1 12.842 7.062 1.620 1.00 59.00 S \nATOM 8 CE MET A 1 11.463 5.930 1.952 1.00 48.67 C \nATOM 9 N SER A 2 10.897 8.175 -3.002 1.00 73.83 N \nATOM 10 CA SER A 2 10.470 7.236 -4.035 1.00 75.76 C \nATOM 11 C SER A 2 11.140 5.878 -3.858 1.00 75.52 C \nATOM 12 CB SER A 2 10.781 7.790 -5.426 1.00 70.45 C \n...
5,5,10,2.464844,0.0,0.892166,69.855995,PARENT N/A\nATOM 1 N MET A 1 8.444 11.145 0.630 1.00 68.34 N \nATOM 2 CA MET A 1 9.570 10.266 0.329 1.00 71.15 C \nATOM 3 C MET A 1 9.214 9.288 -0.786 1.00 72.15 C \nATOM 4 CB MET A 1 10.004 9.499 1.579 1.00 63.03 C \nATOM 5 O MET A 1 8.073 8.830 -0.875 1.00 68.04 O \nATOM 6 CG MET A 1 11.244 8.644 1.373 1.00 56.57 C \nATOM 7 SD MET A 1 11.797 7.824 2.919 1.00 59.05 S \nATOM 8 CE MET A 1 10.544 6.517 3.052 1.00 48.43 C \nATOM 9 N SER A 2 10.062 9.098 -1.750 1.00 73.78 N \nATOM 10 CA SER A 2 9.817 8.203 -2.877 1.00 75.66 C \nATOM 11 C SER A 2 10.616 6.911 -2.740 1.00 75.46 C \nATOM 12 CB SER A 2 10.168 8.892 -4.196 1.00 70.38 C \n...
6,6,12,2.613281,0.0,0.89388,59.769146,PARENT N/A\nATOM 1 N MET A 1 8.124 7.143 -1.212 1.00 52.03 N \nATOM 2 CA MET A 1 9.047 6.013 -1.281 1.00 52.55 C \nATOM 3 C MET A 1 8.287 4.692 -1.336 1.00 53.68 C \nATOM 4 CB MET A 1 9.997 6.020 -0.082 1.00 47.86 C \nATOM 5 O MET A 1 7.258 4.535 -0.676 1.00 51.53 O \nATOM 6 CG MET A 1 11.303 5.284 -0.330 1.00 45.39 C \nATOM 7 SD MET A 1 12.480 5.449 1.068 1.00 51.17 S \nATOM 8 CE MET A 1 13.168 3.771 1.107 1.00 42.03 C \nATOM 9 N SER A 2 8.447 3.924 -2.396 1.00 53.85 N \nATOM 10 CA SER A 2 7.714 2.683 -2.625 1.00 53.55 C \nATOM 11 C SER A 2 8.582 1.465 -2.323 1.00 54.91 C \nATOM 12 CB SER A 2 7.211 2.615 -4.067 1.00 51.37 C \n...
7,7,14,2.601562,0.0,0.892834,70.663506,PARENT N/A\nATOM 1 N MET A 1 7.070 12.497 1.752 1.00 68.04 N \nATOM 2 CA MET A 1 8.311 11.766 1.513 1.00 70.58 C \nATOM 3 C MET A 1 8.157 10.805 0.339 1.00 71.66 C \nATOM 4 CB MET A 1 8.733 10.997 2.766 1.00 62.55 C \nATOM 5 O MET A 1 7.089 10.222 0.143 1.00 67.34 O \nATOM 6 CG MET A 1 10.085 10.314 2.639 1.00 56.32 C \nATOM 7 SD MET A 1 10.599 9.478 4.190 1.00 58.66 S \nATOM 8 CE MET A 1 9.760 7.882 3.987 1.00 48.06 C \nATOM 9 N SER A 2 9.096 10.759 -0.555 1.00 73.64 N \nATOM 10 CA SER A 2 9.048 9.898 -1.732 1.00 75.34 C \nATOM 11 C SER A 2 10.015 8.727 -1.600 1.00 75.31 C \nATOM 12 CB SER A 2 9.373 10.698 -2.994 1.00 70.15 C \n...
8,8,16,2.275391,0.0,0.894274,70.522575,PARENT N/A\nATOM 1 N MET A 1 2.875 12.093 6.706 1.00 67.82 N \nATOM 2 CA MET A 1 4.266 11.650 6.757 1.00 69.98 C \nATOM 3 C MET A 1 4.720 11.126 5.399 1.00 71.07 C \nATOM 4 CB MET A 1 4.448 10.569 7.823 1.00 62.47 C \nATOM 5 O MET A 1 3.931 10.529 4.664 1.00 66.87 O \nATOM 6 CG MET A 1 5.896 10.165 8.046 1.00 56.55 C \nATOM 7 SD MET A 1 6.084 8.934 9.394 1.00 58.59 S \nATOM 8 CE MET A 1 6.189 7.407 8.420 1.00 48.56 C \nATOM 9 N SER A 2 5.881 11.490 4.948 1.00 72.99 N \nATOM 10 CA SER A 2 6.407 11.082 3.649 1.00 74.23 C \nATOM 11 C SER A 2 7.528 10.060 3.802 1.00 74.42 C \nATOM 12 CB SER A 2 6.916 12.297 2.872 1.00 69.49 C \n...
9,9,18,2.5,0.0,0.900132,69.842278,PARENT N/A\nATOM 1 N MET A 1 6.022 12.263 4.278 1.00 68.82 N \nATOM 2 CA MET A 1 7.302 11.599 4.050 1.00 71.15 C \nATOM 3 C MET A 1 7.285 10.822 2.738 1.00 72.26 C \nATOM 4 CB MET A 1 7.634 10.658 5.210 1.00 63.47 C \nATOM 5 O MET A 1 6.256 10.263 2.355 1.00 68.02 O \nATOM 6 CG MET A 1 9.017 10.033 5.115 1.00 57.15 C \nATOM 7 SD MET A 1 9.423 9.001 6.577 1.00 58.92 S \nATOM 8 CE MET A 1 8.637 7.439 6.093 1.00 48.55 C \nATOM 9 N SER A 2 8.299 10.926 1.938 1.00 73.82 N \nATOM 10 CA SER A 2 8.389 10.252 0.647 1.00 75.27 C \nATOM 11 C SER A 2 9.349 9.069 0.707 1.00 75.39 C \nATOM 12 CB SER A 2 8.841 11.231 -0.438 1.00 70.47 C \n...


In [6]:
log = pd.read_csv(base_path / 'logs_df' / 'A0A1S5R1Q9_log.tsv', sep="\t")
log

Unnamed: 0,step,accumulated_step,loss,score_seq_time,eval_step_time,plddt,pdb
0,0,0,,0.0,0.831112,58.079447,PARENT N/A\nATOM 1 N MET A 1 4.124 11.013 0.023 1.00 51.28 N \nATOM 2 CA MET A 1 5.332 10.461 -0.583 1.00 52.09 C \nATOM 3 C MET A 1 5.087 9.043 -1.089 1.00 52.83 C \nATOM 4 CB MET A 1 6.487 10.465 0.420 1.00 47.32 C \nATOM 5 O MET A 1 4.320 8.289 -0.489 1.00 50.89 O \nATOM 6 CG MET A 1 7.861 10.524 -0.228 1.00 45.04 C \nATOM 7 SD MET A 1 9.214 10.664 1.004 1.00 51.77 S \nATOM 8 CE MET A 1 10.158 9.164 0.614 1.00 41.51 C \nATOM 9 N SER A 2 5.302 8.769 -2.364 1.00 55.37 N \nATOM 10 CA SER A 2 5.020 7.483 -2.994 1.00 55.17 C \nATOM 11 C SER A 2 6.301 6.687 -3.223 1.00 56.51 C \nATOM 12 CB SER A 2 4.291 7.684 -4.323 1.00 52.95 C \n...
1,1,4,2.412109,0.0,0.830936,77.051393,PARENT N/A\nATOM 1 N MET A 1 8.152 -3.251 -3.175 1.00 72.41 N \nATOM 2 CA MET A 1 7.914 -4.572 -3.751 1.00 75.78 C \nATOM 3 C MET A 1 6.649 -4.574 -4.603 1.00 77.62 C \nATOM 4 CB MET A 1 7.806 -5.628 -2.650 1.00 68.62 C \nATOM 5 O MET A 1 5.689 -3.863 -4.298 1.00 73.95 O \nATOM 6 CG MET A 1 7.660 -7.047 -3.175 1.00 61.75 C \nATOM 7 SD MET A 1 7.663 -8.297 -1.831 1.00 63.89 S \nATOM 8 CE MET A 1 6.149 -7.824 -0.950 1.00 52.91 C \nATOM 9 N SER A 2 6.651 -5.109 -5.715 1.00 76.70 N \nATOM 10 CA SER A 2 5.529 -5.147 -6.647 1.00 79.32 C \nATOM 11 C SER A 2 4.917 -6.541 -6.719 1.00 79.44 C \nATOM 12 CB SER A 2 5.976 -4.705 -8.042 1.00 73.66 C \n...
2,2,8,2.025391,0.0,0.823701,72.327595,PARENT N/A\nATOM 1 N MET A 1 10.159 -7.829 2.264 1.00 70.77 N \nATOM 2 CA MET A 1 9.372 -9.059 2.280 1.00 73.58 C \nATOM 3 C MET A 1 8.330 -9.053 1.167 1.00 74.82 C \nATOM 4 CB MET A 1 8.688 -9.244 3.636 1.00 65.89 C \nATOM 5 O MET A 1 7.770 -8.004 0.841 1.00 70.65 O \nATOM 6 CG MET A 1 7.920 -10.549 3.763 1.00 58.85 C \nATOM 7 SD MET A 1 7.179 -10.770 5.426 1.00 60.50 S \nATOM 8 CE MET A 1 5.611 -9.885 5.198 1.00 49.74 C \nATOM 9 N SER A 2 8.182 -10.116 0.470 1.00 76.31 N \nATOM 10 CA SER A 2 7.220 -10.232 -0.620 1.00 78.49 C \nATOM 11 C SER A 2 6.012 -11.066 -0.205 1.00 78.30 C \nATOM 12 CB SER A 2 7.879 -10.853 -1.853 1.00 73.14 C \n...
3,3,12,1.980469,0.0,0.82527,78.295115,PARENT N/A\nATOM 1 N MET A 1 4.305 3.155 -6.099 1.00 68.76 N \nATOM 2 CA MET A 1 4.448 2.272 -7.253 1.00 71.29 C \nATOM 3 C MET A 1 3.088 1.764 -7.721 1.00 72.79 C \nATOM 4 CB MET A 1 5.360 1.091 -6.917 1.00 63.95 C \nATOM 5 O MET A 1 2.208 1.491 -6.904 1.00 68.18 O \nATOM 6 CG MET A 1 5.785 0.280 -8.131 1.00 58.10 C \nATOM 7 SD MET A 1 7.009 -1.021 -7.713 1.00 60.04 S \nATOM 8 CE MET A 1 8.540 -0.201 -8.240 1.00 51.50 C \nATOM 9 N SER A 2 2.733 1.994 -8.826 1.00 73.56 N \nATOM 10 CA SER A 2 1.471 1.555 -9.413 1.00 74.87 C \nATOM 11 C SER A 2 1.638 0.237 -10.161 1.00 75.30 C \nATOM 12 CB SER A 2 0.920 2.622 -10.360 1.00 70.11 C \n...
4,4,16,2.054688,0.0,0.82688,65.11729,PARENT N/A\nATOM 1 N MET A 1 8.064 5.712 -0.384 1.00 55.76 N \nATOM 2 CA MET A 1 8.532 4.360 -0.677 1.00 55.84 C \nATOM 3 C MET A 1 7.400 3.349 -0.527 1.00 58.06 C \nATOM 4 CB MET A 1 9.695 3.982 0.241 1.00 50.58 C \nATOM 5 O MET A 1 6.619 3.421 0.423 1.00 54.93 O \nATOM 6 CG MET A 1 10.618 2.924 -0.342 1.00 47.53 C \nATOM 7 SD MET A 1 12.077 2.600 0.722 1.00 52.07 S \nATOM 8 CE MET A 1 12.864 1.259 -0.214 1.00 44.81 C \nATOM 9 N SER A 2 6.980 2.658 -1.580 1.00 62.11 N \nATOM 10 CA SER A 2 5.863 1.721 -1.637 1.00 62.09 C \nATOM 11 C SER A 2 6.353 0.286 -1.803 1.00 63.38 C \nATOM 12 CB SER A 2 4.917 2.081 -2.783 1.00 59.38 C \n...
5,5,20,1.514648,0.0,0.831136,70.934065,PARENT N/A\nATOM 1 N MET A 1 6.648 -5.700 -8.319 1.00 59.48 N \nATOM 2 CA MET A 1 5.660 -6.677 -8.766 1.00 60.12 C \nATOM 3 C MET A 1 4.368 -5.988 -9.192 1.00 60.87 C \nATOM 4 CB MET A 1 5.370 -7.694 -7.661 1.00 54.88 C \nATOM 5 O MET A 1 3.934 -5.025 -8.558 1.00 57.44 O \nATOM 6 CG MET A 1 4.998 -9.073 -8.180 1.00 52.01 C \nATOM 7 SD MET A 1 4.780 -10.297 -6.829 1.00 50.90 S \nATOM 8 CE MET A 1 4.342 -11.767 -7.798 1.00 45.05 C \nATOM 9 N SER A 2 3.944 -6.105 -10.433 1.00 64.45 N \nATOM 10 CA SER A 2 2.745 -5.457 -10.953 1.00 64.31 C \nATOM 11 C SER A 2 1.562 -6.420 -10.975 1.00 65.09 C \nATOM 12 CB SER A 2 2.996 -4.914 -12.361 1.00 61.30 C \n...
6,6,24,1.302734,0.0,0.826986,75.834294,PARENT N/A\nATOM 1 N MET A 1 6.462 -5.973 -4.613 1.00 65.80 N \nATOM 2 CA MET A 1 5.820 -6.946 -5.491 1.00 67.67 C \nATOM 3 C MET A 1 4.824 -6.263 -6.423 1.00 68.71 C \nATOM 4 CB MET A 1 5.113 -8.027 -4.672 1.00 61.14 C \nATOM 5 O MET A 1 4.141 -5.319 -6.024 1.00 64.24 O \nATOM 6 CG MET A 1 4.796 -9.286 -5.462 1.00 56.56 C \nATOM 7 SD MET A 1 4.121 -10.627 -4.407 1.00 58.11 S \nATOM 8 CE MET A 1 5.649 -11.221 -3.628 1.00 51.81 C \nATOM 9 N SER A 2 4.934 -6.342 -7.643 1.00 68.03 N \nATOM 10 CA SER A 2 4.093 -5.688 -8.640 1.00 68.49 C \nATOM 11 C SER A 2 3.000 -6.625 -9.142 1.00 69.15 C \nATOM 12 CB SER A 2 4.938 -5.199 -9.817 1.00 64.63 C \n...
7,7,28,1.433594,0.0,0.829498,77.828454,PARENT N/A\nATOM 1 N MET A 1 -8.631 0.945 2.346 1.00 70.80 N \nATOM 2 CA MET A 1 -9.427 0.885 1.124 1.00 74.50 C \nATOM 3 C MET A 1 -9.098 -0.369 0.322 1.00 75.77 C \nATOM 4 CB MET A 1 -9.194 2.130 0.267 1.00 66.27 C \nATOM 5 O MET A 1 -7.962 -0.845 0.345 1.00 70.95 O \nATOM 6 CG MET A 1 -10.127 2.236 -0.928 1.00 59.57 C \nATOM 7 SD MET A 1 -9.881 3.788 -1.876 1.00 62.89 S \nATOM 8 CE MET A 1 -10.560 4.992 -0.701 1.00 52.11 C \nATOM 9 N SER A 2 -10.009 -1.073 -0.083 1.00 72.46 N \nATOM 10 CA SER A 2 -9.836 -2.292 -0.867 1.00 74.83 C \nATOM 11 C SER A 2 -10.212 -2.068 -2.328 1.00 74.96 C \nATOM 12 CB SER A 2 -10.678 -3.428 -0.284 1.00 68.67 C \n...
8,8,32,0.595703,0.0,0.826776,83.771889,PARENT N/A\nATOM 1 N MET A 1 -4.711 -8.160 2.606 1.00 69.55 N \nATOM 2 CA MET A 1 -5.529 -8.500 1.445 1.00 71.56 C \nATOM 3 C MET A 1 -4.671 -8.616 0.190 1.00 74.09 C \nATOM 4 CB MET A 1 -6.625 -7.454 1.234 1.00 65.50 C \nATOM 5 O MET A 1 -3.748 -7.824 -0.012 1.00 69.65 O \nATOM 6 CG MET A 1 -7.808 -7.960 0.425 1.00 60.83 C \nATOM 7 SD MET A 1 -9.146 -6.713 0.280 1.00 63.33 S \nATOM 8 CE MET A 1 -10.216 -7.229 1.651 1.00 57.77 C \nATOM 9 N SER A 2 -4.602 -9.678 -0.392 1.00 72.13 N \nATOM 10 CA SER A 2 -3.844 -9.919 -1.615 1.00 72.89 C \nATOM 11 C SER A 2 -4.717 -9.734 -2.852 1.00 73.74 C \nATOM 12 CB SER A 2 -3.248 -11.328 -1.608 1.00 68.15 C \n...
9,9,36,0.210815,0.0,0.829422,76.083111,PARENT N/A\nATOM 1 N MET A 1 6.313 -5.974 -3.098 1.00 67.85 N \nATOM 2 CA MET A 1 5.616 -6.957 -3.922 1.00 70.99 C \nATOM 3 C MET A 1 4.668 -6.273 -4.901 1.00 71.75 C \nATOM 4 CB MET A 1 4.840 -7.942 -3.045 1.00 63.15 C \nATOM 5 O MET A 1 4.075 -5.242 -4.580 1.00 66.94 O \nATOM 6 CG MET A 1 4.400 -9.199 -3.778 1.00 57.37 C \nATOM 7 SD MET A 1 3.569 -10.403 -2.670 1.00 59.48 S \nATOM 8 CE MET A 1 5.013 -11.132 -1.848 1.00 51.81 C \nATOM 9 N SER A 2 4.774 -6.539 -6.108 1.00 71.74 N \nATOM 10 CA SER A 2 3.925 -5.963 -7.146 1.00 73.06 C \nATOM 11 C SER A 2 2.834 -6.940 -7.573 1.00 73.49 C \nATOM 12 CB SER A 2 4.762 -5.559 -8.360 1.00 68.23 C \n...


In [None]:
df

Unnamed: 0,id,nmsa,pLDDT_AlphaFold,version,sequence,length,pLDDT_before,pLDDT_after,tm_score_before,lddt_before,tm_score_after,lddt_after,time,difference_plddt
0,A0A0H4SQ70,10124,91.406506,BASE,MQSIDFGRALDFTLRWEGGYVNHPSDPGGATNRGITQVTYNQWRTQKGLPTREVRLIEEDEVRSIYWQFYWAPVEGRTAPSWVQFRVCLFDTFVQFGVFGGTFLWQKVCGVPADGQWGPVTSRATENLVSTKGPLWSGMALVGERVRYRAQRVSQNRSQLAFLQGWLNRDSDLLLYLLNLR,181,85.136031,87.117266,0.98978,0.990052,0.98613,0.981236,67.120121,1.981235
1,Q80B01,5665,91.960021,BASE,ENRAFDENVTERVMHVLVAAGADVNAASVVDRTPLHVCLTGMSTHPGTIAALLRFGADVNAVDLCGMSPLAVLVRSRAATAELVRMLLDAGADAHAVDSRLDSLLHQHFQSARPRPEVVRELIRHGCSPRARNRIGNTPLHEAAKHSSCKHSLVGPLLAAGASVDARNNTGRTPLHLAAASNPRACRRLIALGADVVARSYAGVTPLAQLIADNNSALVTAALDTQPEPRAVAESLRATTPVGETACSRLCVAYVVARAPSEVLGEPERALHAAFVAECLAEVAAIHAVRCGTPPVSLLEILVSARTPRSLLSRRARRLAESRTTVYRAPLRARIAAMRHRSRLVERALRTLRGCVLPREVLERVLRCLSTQDLRASGLAE,381,90.829035,90.912645,0.88055,0.896696,0.88207,0.897574,0.639445,0.083609
2,A0A1S5R1Q9,57,94.034389,BASE+LOGAN,MSLDIPKGMNIHSAATYARDYAAKQRANYVTIVFNDINLTVYATSNPDDIALIYYLESEVRRLKR,65,58.079447,83.771889,0.50373,0.623064,0.67245,0.756843,34.663036,25.692443
3,A0A2I4Q1U7,17,91.027945,BASE+LOGAN,MNYCDIAHELRMEREKQEKRIIKKMAVLLAHYKADKQPTHDEFVDFCNMYLNVSKATGYRWLKALNDGEL,70,78.155634,87.247534,0.60368,0.743309,0.94927,0.969441,35.922151,9.091901
4,A0A218MN90,538,90.865686,BASE,METFVGFVLYLYTSAGTLLEFTPKDSLSDCLRIKRTIERTDPPQRGKERWVCKQGKLKLKVIDGKKYPVEVLDY,74,81.071421,81.071421,0.95585,0.950043,0.95585,0.950043,34.210736,0.0
5,A0A3S7URH3,1016,90.055809,BASE,MPHEIWGQRFVSTREPAWHRIGFHFAEPILPSEAVAIAGFEEPELAPLFLEDGTPVDYYAVVMGDEVFGVVSRYWRLFRLEEIVPTLDELSRTFPLSAAGQLKKGRIVFFAFEQRTEILGEDYIRYLVVLHSYEPGRSWKVLYTPTRVVCMNTLVASEKDREWEYRVYHNAPRGILEAQIVMAQYRVLQETVDKKLEAFARIGDFDAQLDVLFEHVYPYPEPPSELDKQRYGKEVEERYERAMERVKQVRNLALQSYQRFNDEFPKFGNTAYAALQAVTEVADWRGNLKSKESPLVGPRAKEKQAAWQFLSTLI,314,77.239656,77.239656,0.48237,0.768121,0.48237,0.768121,160.64179,0.0
6,A0A5B9NC75,582,91.237163,BASE,MVSSDFFEADDLFIMDSYAFGDRGILMSQDKDSWLSPMARFDIPTGTVWPALDNPFGWIKWDDTQAMPVRAHGLKFFWWQMLAGDDADNVKGITLLDGKPCGKRTAFDAIYPITSEQDAAEFVVAAYARNNQDVLAEAECLFLRRSQSDSAYQYLMSLLTTPSLRDWVQSLHEYHKQHIQWIQEHPDNGENDS,193,87.290006,90.58224,0.97195,0.940903,0.98274,0.969921,69.603494,3.292234
7,Q5GQY6,718,90.340547,BASE+LOGAN+12CY,MKINFKSHQITDREVDFTQDDQVRLFEIMKQEFINHITYSRFDSIIHLTKFEQQLYNFCKDYNVDIEYSKDRVAFFTALIKEMKYQ,86,57.228919,60.270521,0.39317,0.64958,0.39527,0.648882,35.569681,3.041602
8,K8DUZ9,123,90.277649,BASE,MNAFEKRAQLKDIKPGAILYEVFSINGVKAEMGPKKIITGLPFQHLSIGLFVDAITVYDDWEGRQHMSLMDHNVIGRNNYNFHALFLSKKDAQEYVDQINNDQLPPELRETSRKMHREWIVRRAEDALYDM,131,67.014851,73.684647,0.78825,0.73338,0.8896,0.899537,44.113523,6.669796
9,A0A0A0RPE0,30,90.647829,BASE+LOGAN+12CY,MLILIMMVVAVVGPLVALIVTEVCDAKWDRKYNEVMEQRYAERRVRKWEQRYNAHSDTLAS,61,74.841414,74.841414,0.87703,1.0,0.87703,1.0,34.537524,0.0


In [11]:
import ast

# First, load the log file, as it is defined in the cell below
log = pd.read_csv(base_path / 'logs_df' / 'A0A1S5R1Q9_log.tsv', sep="\t")
# The 'pdb' column is a string representation of a list.


Path('./t_df.pdb').write_text(log.iloc[0].pdb)

42486

In [16]:
from proteinttt.utils.structure import calculate_tm_score, lddt_score

lddt_score(Path('/scratch/project/open-35-8/pimenol1/ProteinTTT/ProteinTTT/notebooks/t_df.pdb'), Path('/scratch/project/open-35-8/pimenol1/ProteinTTT/ProteinTTT/data/bfvd/predicted_structures/A0A1S5R1Q9.pdb'))

0.9319476176082502

In [17]:
log = pd.read_csv(base_path / 'logs_msa' / 'A0A0H4SQ70_log.tsv', sep="\t")
log.head(10)

Unnamed: 0,step,accumulated_step,loss,eval_step_time,plddt,pdb
0,0,0,,66.267741,49.100296,['PARENT N/A\nATOM 1 N TYR A 521 -12.885 -18.125 12.595 1.00 54.20 N \nATOM 2 CA TYR A 521 -11.848 -17.834 11.612 1.00 48.39 C \nATOM 3 C TYR A 521 -10.808 -16.877 12.183 1.00 52.31 C \nATOM 4 CB TYR A 521 -12.463 -17.238 10.342 1.00 46.73 C \nATOM 5 O TYR A 521 -9.741 -16.688 11.595 1.00 50.11 O \nATOM 6 CG TYR A 521 -11.487 -17.109 9.198 1.00 43.25 C \nATOM 7 CD1 TYR A 521 -10.830 -15.905 8.949 1.00 38.72 C \nATOM 8 CD2 TYR A 521 -11.219 -18.188 8.363 1.00 45.23 C \nATOM 9 CE1 TYR A 521 -9.931 -15.780 7.895 1.00 51.69 C \nATOM 10 CE2 TYR A 521 -10.322 -18.075 7.306 1.00 53.00 C \nATOM 11 OH TYR A 521 -8.794 -16.751 6.036 1.00 63.32 O \nATOM 12 CZ TYR A 521 -9.683 -16.869 7.081 1.00 35.15 C ...
1,1,1,1.710938,64.769179,43.330227,['PARENT N/A\nATOM 1 N TYR A 521 8.269 -8.280 4.499 1.00 49.75 N \nATOM 2 CA TYR A 521 7.403 -7.114 4.635 1.00 49.37 C \nATOM 3 C TYR A 521 6.092 -7.486 5.317 1.00 49.05 C \nATOM 4 CB TYR A 521 7.119 -6.492 3.264 1.00 44.05 C \nATOM 5 O TYR A 521 5.208 -8.081 4.696 1.00 46.02 O \nATOM 6 CG TYR A 521 6.592 -5.080 3.336 1.00 41.38 C \nATOM 7 CD1 TYR A 521 5.222 -4.826 3.339 1.00 40.29 C \nATOM 8 CD2 TYR A 521 7.462 -3.997 3.399 1.00 44.29 C \nATOM 9 CE1 TYR A 521 4.732 -3.526 3.403 1.00 54.64 C \nATOM 10 CE2 TYR A 521 6.983 -2.692 3.464 1.00 50.04 C \nATOM 11 OH TYR A 521 5.138 -1.179 3.528 1.00 58.87 O \nATOM 12 CZ TYR A 521 5.618 -2.468 3.465 1.00 55.88 C ...
2,2,2,1.480469,64.768631,57.404964,['PARENT N/A\nATOM 1 N TYR A 521 9.223 7.274 -18.997 1.00 63.40 N \nATOM 2 CA TYR A 521 8.070 6.840 -18.216 1.00 62.27 C \nATOM 3 C TYR A 521 8.237 5.397 -17.753 1.00 61.62 C \nATOM 4 CB TYR A 521 6.782 6.979 -19.033 1.00 56.85 C \nATOM 5 O TYR A 521 8.093 4.464 -18.546 1.00 57.25 O \nATOM 6 CG TYR A 521 5.524 6.810 -18.217 1.00 53.65 C \nATOM 7 CD1 TYR A 521 4.880 5.576 -18.142 1.00 50.67 C \nATOM 8 CD2 TYR A 521 4.975 7.881 -17.521 1.00 52.27 C \nATOM 9 CE1 TYR A 521 3.719 5.416 -17.393 1.00 58.90 C \nATOM 10 CE2 TYR A 521 3.815 7.732 -16.769 1.00 55.21 C \nATOM 11 OH TYR A 521 2.046 6.344 -15.968 1.00 65.76 O \nATOM 12 CZ TYR A 521 3.195 6.497 -16.711 1.00 53.57 C ...
3,3,3,1.748047,64.912733,47.754387,['PARENT N/A\nATOM 1 N TYR A 521 -0.202 -13.613 1.488 1.00 62.93 N \nATOM 2 CA TYR A 521 -0.898 -12.397 1.894 1.00 62.07 C \nATOM 3 C TYR A 521 -0.147 -11.156 1.425 1.00 63.77 C \nATOM 4 CB TYR A 521 -1.071 -12.359 3.415 1.00 54.88 C \nATOM 5 O TYR A 521 0.887 -10.798 1.993 1.00 57.21 O \nATOM 6 CG TYR A 521 -2.081 -11.341 3.886 1.00 52.41 C \nATOM 7 CD1 TYR A 521 -3.443 -11.633 3.894 1.00 54.49 C \nATOM 8 CD2 TYR A 521 -1.676 -10.084 4.323 1.00 58.96 C \nATOM 9 CE1 TYR A 521 -4.377 -10.697 4.325 1.00 66.40 C \nATOM 10 CE2 TYR A 521 -2.601 -9.140 4.756 1.00 63.30 C \nATOM 11 OH TYR A 521 -4.868 -8.525 5.182 1.00 67.78 O \nATOM 12 CZ TYR A 521 -3.947 -9.456 4.754 1.00 69.11 C ...
4,4,4,1.652344,64.854294,52.95586,['PARENT N/A\nATOM 1 N TYR A 521 1.727 -17.393 23.501 1.00 47.78 N \nATOM 2 CA TYR A 521 2.962 -16.982 22.842 1.00 46.77 C \nATOM 3 C TYR A 521 2.720 -16.684 21.367 1.00 46.27 C \nATOM 4 CB TYR A 521 4.034 -18.066 22.986 1.00 44.43 C \nATOM 5 O TYR A 521 2.548 -17.602 20.562 1.00 44.73 O \nATOM 6 CG TYR A 521 5.432 -17.580 22.691 1.00 41.91 C \nATOM 7 CD1 TYR A 521 5.975 -17.693 21.413 1.00 38.82 C \nATOM 8 CD2 TYR A 521 6.214 -17.008 23.689 1.00 40.24 C \nATOM 9 CE1 TYR A 521 7.263 -17.248 21.136 1.00 42.26 C \nATOM 10 CE2 TYR A 521 7.504 -16.559 23.424 1.00 44.53 C \nATOM 11 OH TYR A 521 9.295 -16.241 21.879 1.00 43.16 O \nATOM 12 CZ TYR A 521 8.019 -16.683 22.147 1.00 39.58 C ...
5,5,5,1.800781,64.848545,64.168854,['PARENT N/A\nATOM 1 N TYR A 521 -16.536 0.929 13.565 1.00 66.82 N \nATOM 2 CA TYR A 521 -17.047 0.318 14.787 1.00 65.09 C \nATOM 3 C TYR A 521 -17.727 -1.013 14.489 1.00 65.53 C \nATOM 4 CB TYR A 521 -18.030 1.261 15.488 1.00 59.19 C \nATOM 5 O TYR A 521 -18.921 -1.053 14.185 1.00 60.93 O \nATOM 6 CG TYR A 521 -18.174 0.997 16.968 1.00 57.47 C \nATOM 7 CD1 TYR A 521 -19.202 0.193 17.455 1.00 56.81 C \nATOM 8 CD2 TYR A 521 -17.283 1.550 17.881 1.00 62.03 C \nATOM 9 CE1 TYR A 521 -19.340 -0.052 18.817 1.00 74.75 C \nATOM 10 CE2 TYR A 521 -17.411 1.313 19.245 1.00 67.44 C \nATOM 11 OH TYR A 521 -18.572 0.272 21.053 1.00 71.16 O \nATOM 12 CZ TYR A 521 -18.440 0.511 19.703 1.00 79.64 C ...
6,6,6,1.961914,64.731618,46.030392,['PARENT N/A\nATOM 1 N TYR A 521 -16.269 -19.637 13.863 1.00 47.05 N \nATOM 2 CA TYR A 521 -17.259 -20.069 14.844 1.00 47.13 C \nATOM 3 C TYR A 521 -16.727 -21.226 15.682 1.00 46.93 C \nATOM 4 CB TYR A 521 -18.560 -20.482 14.150 1.00 41.84 C \nATOM 5 O TYR A 521 -16.700 -22.371 15.225 1.00 44.01 O \nATOM 6 CG TYR A 521 -19.767 -20.451 15.055 1.00 41.07 C \nATOM 7 CD1 TYR A 521 -20.195 -21.599 15.717 1.00 39.19 C \nATOM 8 CD2 TYR A 521 -20.483 -19.274 15.248 1.00 43.03 C \nATOM 9 CE1 TYR A 521 -21.309 -21.575 16.551 1.00 44.27 C \nATOM 10 CE2 TYR A 521 -21.598 -19.239 16.079 1.00 47.84 C \nATOM 11 OH TYR A 521 -23.105 -20.363 17.549 1.00 44.98 O \nATOM 12 CZ TYR A 521 -22.002 -20.392 16.725 1.00 41.23 C ...
7,7,7,1.330078,64.730856,49.906944,['PARENT N/A\nATOM 1 N TYR A 521 -5.148 -15.786 12.166 1.00 57.19 N \nATOM 2 CA TYR A 521 -5.542 -14.382 12.137 1.00 59.02 C \nATOM 3 C TYR A 521 -6.871 -14.203 11.414 1.00 57.63 C \nATOM 4 CB TYR A 521 -4.461 -13.534 11.460 1.00 53.82 C \nATOM 5 O TYR A 521 -6.966 -13.431 10.457 1.00 52.99 O \nATOM 6 CG TYR A 521 -4.506 -12.075 11.843 1.00 51.84 C \nATOM 7 CD1 TYR A 521 -5.208 -11.153 11.070 1.00 48.65 C \nATOM 8 CD2 TYR A 521 -3.846 -11.615 12.978 1.00 49.81 C \nATOM 9 CE1 TYR A 521 -5.251 -9.807 11.418 1.00 50.69 C \nATOM 10 CE2 TYR A 521 -3.882 -10.271 13.335 1.00 50.26 C \nATOM 11 OH TYR A 521 -4.625 -8.045 12.900 1.00 54.39 O \nATOM 12 CZ TYR A 521 -4.586 -9.376 12.551 1.00 54.42 C ...
8,8,8,1.604492,64.725734,55.303181,['PARENT N/A\nATOM 1 N TYR A 521 -1.475 0.166 12.190 1.00 72.96 N \nATOM 2 CA TYR A 521 -2.091 1.483 12.311 1.00 76.95 C \nATOM 3 C TYR A 521 -3.606 1.367 12.431 1.00 76.30 C \nATOM 4 CB TYR A 521 -1.728 2.358 11.107 1.00 70.67 C \nATOM 5 O TYR A 521 -4.298 2.370 12.622 1.00 70.41 O \nATOM 6 CG TYR A 521 -2.127 3.805 11.267 1.00 68.30 C \nATOM 7 CD1 TYR A 521 -3.340 4.273 10.766 1.00 66.64 C \nATOM 8 CD2 TYR A 521 -1.293 4.707 11.918 1.00 69.19 C \nATOM 9 CE1 TYR A 521 -3.711 5.606 10.909 1.00 74.36 C \nATOM 10 CE2 TYR A 521 -1.653 6.042 12.067 1.00 66.23 C \nATOM 11 OH TYR A 521 -3.225 7.802 11.705 1.00 68.98 O \nATOM 12 CZ TYR A 521 -2.863 6.482 11.561 1.00 81.85 C ...
9,9,9,1.40625,64.724621,50.192623,['PARENT N/A\nATOM 1 N TYR A 521 -0.411 -32.356 17.402 1.00 50.01 N \nATOM 2 CA TYR A 521 -0.524 -31.355 18.457 1.00 46.85 C \nATOM 3 C TYR A 521 -1.396 -30.188 18.010 1.00 49.72 C \nATOM 4 CB TYR A 521 0.861 -30.846 18.866 1.00 45.72 C \nATOM 5 O TYR A 521 -0.987 -29.387 17.166 1.00 47.19 O \nATOM 6 CG TYR A 521 0.901 -30.238 20.247 1.00 41.70 C \nATOM 7 CD1 TYR A 521 0.760 -28.864 20.429 1.00 38.12 C \nATOM 8 CD2 TYR A 521 1.081 -31.035 21.372 1.00 40.20 C \nATOM 9 CE1 TYR A 521 0.798 -28.299 21.700 1.00 47.87 C \nATOM 10 CE2 TYR A 521 1.121 -30.481 22.648 1.00 47.09 C \nATOM 11 OH TYR A 521 1.016 -28.561 24.061 1.00 61.13 O \nATOM 12 CZ TYR A 521 0.978 -29.115 22.801 1.00 38.19 C ...


In [40]:
log = pd.read_csv(base_path / 'logs_df' / 'A0A0H4SQ70_log.tsv', sep="\t")
log

Unnamed: 0,step,accumulated_step,loss,score_seq_time,eval_step_time,plddt,pdb
0,0,0,,0.0,1.979763,85.136031,PARENT N/A\nATOM 1 N MET A 1 -6.334 -13.737 14.853 1.00 30.43 N \nATOM 2 CA MET A 1 -5.347 -14.366 13.981 1.00 40.25 C \nATOM 3 C MET A 1 -5.152 -13.556 12.704 1.00 31.69 C \nATOM 4 CB MET A 1 -5.768 -15.795 13.634 1.00 30.67 C \nATOM 5 O MET A 1 -6.104 -13.335 11.953 1.00 31.98 O \nATOM 6 CG MET A 1 -4.754 -16.851 14.044 1.00 30.73 C \nATOM 7 SD MET A 1 -5.402 -18.557 13.861 1.00 18.27 S \nATOM 8 CE MET A 1 -4.304 -19.179 12.557 1.00 18.19 C \nATOM 9 N GLN A 2 -4.185 -12.569 12.812 1.00 53.99 N \nATOM 10 CA GLN A 2 -3.840 -11.817 11.610 1.00 55.16 C \nATOM 11 C GLN A 2 -3.514 -12.753 10.450 1.00 55.60 C \nATOM 12 CB GLN A 2 -2.659 -10.884 11.881 1.00 46.79 C \n...
1,1,4,1.396484,0.0,1.963782,84.821798,PARENT N/A\nATOM 1 N MET A 1 -6.324 -14.365 14.345 1.00 31.27 N \nATOM 2 CA MET A 1 -5.386 -14.990 13.416 1.00 40.29 C \nATOM 3 C MET A 1 -5.192 -14.126 12.175 1.00 32.28 C \nATOM 4 CB MET A 1 -5.875 -16.382 13.013 1.00 30.81 C \nATOM 5 O MET A 1 -6.154 -13.830 11.463 1.00 32.54 O \nATOM 6 CG MET A 1 -4.909 -17.499 13.374 1.00 30.87 C \nATOM 7 SD MET A 1 -5.617 -19.165 13.075 1.00 18.64 S \nATOM 8 CE MET A 1 -4.472 -19.777 11.807 1.00 20.03 C \nATOM 9 N GLN A 2 -4.170 -13.184 12.296 1.00 54.12 N \nATOM 10 CA GLN A 2 -3.833 -12.368 11.134 1.00 55.47 C \nATOM 11 C GLN A 2 -3.538 -13.240 9.917 1.00 55.73 C \nATOM 12 CB GLN A 2 -2.634 -11.468 11.439 1.00 46.81 C \n...
2,2,8,1.220703,0.0,1.963898,85.219783,PARENT N/A\nATOM 1 N MET A 1 -7.434 -14.632 13.660 1.00 31.07 N \nATOM 2 CA MET A 1 -6.526 -15.275 12.714 1.00 40.43 C \nATOM 3 C MET A 1 -6.271 -14.377 11.508 1.00 32.44 C \nATOM 4 CB MET A 1 -7.091 -16.620 12.255 1.00 31.00 C \nATOM 5 O MET A 1 -7.206 -14.005 10.797 1.00 32.61 O \nATOM 6 CG MET A 1 -6.252 -17.815 12.679 1.00 31.03 C \nATOM 7 SD MET A 1 -7.061 -19.415 12.291 1.00 18.34 S \nATOM 8 CE MET A 1 -5.883 -20.086 11.085 1.00 18.67 C \nATOM 9 N GLN A 2 -5.231 -13.457 11.706 1.00 54.60 N \nATOM 10 CA GLN A 2 -4.829 -12.606 10.591 1.00 55.89 C \nATOM 11 C GLN A 2 -4.523 -13.438 9.348 1.00 56.10 C \nATOM 12 CB GLN A 2 -3.610 -11.763 10.970 1.00 47.16 C \n...
3,3,12,1.214844,0.0,1.965569,85.962069,PARENT N/A\nATOM 1 N MET A 1 -9.782 -11.386 10.232 1.00 51.29 N \nATOM 2 CA MET A 1 -8.379 -11.026 10.046 1.00 51.52 C \nATOM 3 C MET A 1 -7.554 -12.243 9.642 1.00 53.52 C \nATOM 4 CB MET A 1 -7.808 -10.407 11.323 1.00 44.64 C \nATOM 5 O MET A 1 -6.790 -12.776 10.449 1.00 50.50 O \nATOM 6 CG MET A 1 -8.360 -9.025 11.634 1.00 41.71 C \nATOM 7 SD MET A 1 -7.713 -8.348 13.212 1.00 46.27 S \nATOM 8 CE MET A 1 -9.213 -7.560 13.861 1.00 36.66 C \nATOM 9 N GLN A 2 -8.061 -13.139 8.778 1.00 62.85 N \nATOM 10 CA GLN A 2 -7.582 -14.484 8.478 1.00 65.31 C \nATOM 11 C GLN A 2 -7.247 -14.631 6.997 1.00 64.09 C \nATOM 12 CB GLN A 2 -8.621 -15.528 8.888 1.00 51.83 C \n...
4,4,16,0.666016,0.0,1.964066,86.218772,PARENT N/A\nATOM 1 N MET A 1 -10.276 -12.599 9.095 1.00 50.67 N \nATOM 2 CA MET A 1 -8.922 -12.095 8.887 1.00 51.52 C \nATOM 3 C MET A 1 -7.978 -13.221 8.480 1.00 52.82 C \nATOM 4 CB MET A 1 -8.403 -11.409 10.153 1.00 44.66 C \nATOM 5 O MET A 1 -7.457 -13.940 9.335 1.00 50.02 O \nATOM 6 CG MET A 1 -9.052 -10.063 10.431 1.00 42.26 C \nATOM 7 SD MET A 1 -8.471 -9.314 12.002 1.00 45.40 S \nATOM 8 CE MET A 1 -9.946 -8.366 12.469 1.00 37.49 C \nATOM 9 N GLN A 2 -8.307 -14.075 7.503 1.00 62.04 N \nATOM 10 CA GLN A 2 -7.733 -15.371 7.158 1.00 64.13 C \nATOM 11 C GLN A 2 -7.245 -15.390 5.712 1.00 63.07 C \nATOM 12 CB GLN A 2 -8.755 -16.487 7.381 1.00 51.24 C \n...
5,5,20,0.564453,0.0,1.96479,86.328141,PARENT N/A\nATOM 1 N MET A 1 -10.722 -11.004 10.500 1.00 52.56 N \nATOM 2 CA MET A 1 -9.354 -10.683 10.102 1.00 53.59 C \nATOM 3 C MET A 1 -8.597 -11.942 9.694 1.00 54.92 C \nATOM 4 CB MET A 1 -8.616 -9.974 11.238 1.00 46.18 C \nATOM 5 O MET A 1 -8.024 -12.630 10.541 1.00 51.37 O \nATOM 6 CG MET A 1 -9.124 -8.569 11.517 1.00 43.48 C \nATOM 7 SD MET A 1 -8.270 -7.780 12.937 1.00 46.61 S \nATOM 8 CE MET A 1 -9.700 -7.182 13.879 1.00 39.76 C \nATOM 9 N GLN A 2 -9.125 -12.753 8.723 1.00 63.71 N \nATOM 10 CA GLN A 2 -8.755 -14.137 8.450 1.00 65.99 C \nATOM 11 C GLN A 2 -8.304 -14.310 7.002 1.00 65.20 C \nATOM 12 CB GLN A 2 -9.924 -15.076 8.751 1.00 53.00 C \n...
6,6,24,0.369873,0.0,1.962968,86.517463,PARENT N/A\nATOM 1 N MET A 1 -10.659 -14.350 6.946 1.00 41.71 N \nATOM 2 CA MET A 1 -10.747 -14.686 5.527 1.00 45.58 C \nATOM 3 C MET A 1 -9.390 -14.536 4.848 1.00 43.73 C \nATOM 4 CB MET A 1 -11.782 -13.802 4.829 1.00 40.06 C \nATOM 5 O MET A 1 -8.750 -13.489 4.954 1.00 43.27 O \nATOM 6 CG MET A 1 -13.069 -14.529 4.473 1.00 40.38 C \nATOM 7 SD MET A 1 -14.344 -13.405 3.783 1.00 22.40 S \nATOM 8 CE MET A 1 -14.384 -13.983 2.064 1.00 34.86 C \nATOM 9 N GLN A 2 -8.678 -15.495 4.870 1.00 52.77 N \nATOM 10 CA GLN A 2 -7.332 -15.936 4.519 1.00 54.30 C \nATOM 11 C GLN A 2 -7.089 -15.819 3.017 1.00 53.65 C \nATOM 12 CB GLN A 2 -7.103 -17.378 4.977 1.00 44.40 C \n...
7,7,28,0.220459,0.0,1.96493,86.860556,PARENT N/A\nATOM 1 N MET A 1 -11.257 -11.143 10.202 1.00 53.61 N \nATOM 2 CA MET A 1 -9.910 -10.865 9.714 1.00 55.42 C \nATOM 3 C MET A 1 -9.233 -12.141 9.226 1.00 56.20 C \nATOM 4 CB MET A 1 -9.065 -10.210 10.808 1.00 47.57 C \nATOM 5 O MET A 1 -8.717 -12.921 10.029 1.00 52.34 O \nATOM 6 CG MET A 1 -9.496 -8.793 11.150 1.00 45.30 C \nATOM 7 SD MET A 1 -8.520 -8.075 12.528 1.00 47.31 S \nATOM 8 CE MET A 1 -9.829 -7.187 13.418 1.00 42.44 C \nATOM 9 N GLN A 2 -9.741 -12.829 8.120 1.00 63.90 N \nATOM 10 CA GLN A 2 -9.516 -14.231 7.785 1.00 65.90 C \nATOM 11 C GLN A 2 -8.871 -14.372 6.409 1.00 65.49 C \nATOM 12 CB GLN A 2 -10.830 -15.013 7.831 1.00 53.48 C \n...
8,8,32,0.119141,0.0,1.965095,86.353019,PARENT N/A\nATOM 1 N MET A 1 -11.858 -17.439 5.509 1.00 35.54 N \nATOM 2 CA MET A 1 -10.634 -17.864 4.836 1.00 40.51 C \nATOM 3 C MET A 1 -9.889 -16.667 4.254 1.00 36.67 C \nATOM 4 CB MET A 1 -10.950 -18.871 3.728 1.00 32.86 C \nATOM 5 O MET A 1 -10.421 -15.953 3.402 1.00 35.93 O \nATOM 6 CG MET A 1 -10.291 -20.226 3.925 1.00 32.29 C \nATOM 7 SD MET A 1 -10.856 -21.468 2.698 1.00 20.14 S \nATOM 8 CE MET A 1 -9.302 -21.772 1.812 1.00 25.92 C \nATOM 9 N GLN A 2 -9.071 -16.028 5.161 1.00 54.49 N \nATOM 10 CA GLN A 2 -8.270 -14.900 4.697 1.00 55.80 C \nATOM 11 C GLN A 2 -7.528 -15.245 3.409 1.00 55.53 C \nATOM 12 CB GLN A 2 -7.276 -14.467 5.776 1.00 48.31 C \n...
9,9,36,0.122437,0.0,1.965097,86.658853,PARENT N/A\nATOM 1 N MET A 1 -13.210 -14.999 8.514 1.00 34.66 N \nATOM 2 CA MET A 1 -12.060 -15.653 7.895 1.00 39.84 C \nATOM 3 C MET A 1 -11.234 -14.654 7.092 1.00 35.72 C \nATOM 4 CB MET A 1 -12.515 -16.801 6.992 1.00 32.33 C \nATOM 5 O MET A 1 -11.740 -14.034 6.154 1.00 35.17 O \nATOM 6 CG MET A 1 -11.966 -18.157 7.402 1.00 31.91 C \nATOM 7 SD MET A 1 -12.672 -19.530 6.410 1.00 19.70 S \nATOM 8 CE MET A 1 -11.264 -19.899 5.327 1.00 24.78 C \nATOM 9 N GLN A 2 -10.324 -13.969 7.873 1.00 53.11 N \nATOM 10 CA GLN A 2 -9.472 -13.026 7.156 1.00 54.59 C \nATOM 11 C GLN A 2 -8.804 -13.692 5.956 1.00 54.10 C \nATOM 12 CB GLN A 2 -8.411 -12.442 8.090 1.00 46.60 C \n...


In [3]:
from Bio import SeqIO


def check_a3m_lengths(filepath):
    lengths = {}
    try:
        for record in SeqIO.parse(filepath, format="fasta"):
            # DELETE all lowercase letters from the sequence
            for char in record.seq:
                if char.islower():
                    record.seq = record.seq.replace(char, "")
            lengths[record.id] = len(record.seq)

        first_record_id = list(lengths.keys())[0]
        reference_length = lengths[first_record_id]

        print(f"Reference sequence: '{first_record_id}' (Length: {reference_length})")

        is_consistent = True
        for seq_id, length in lengths.items():
            if length != reference_length:
                print(f"MISMATCH: '{seq_id}' has length {length}")
                is_consistent = False

        if is_consistent:
            print(f"All {len(lengths)} sequences have the same length: {reference_length}")

    except Exception as e:
        print(f"An error occurred: {e}")

file_to_check = "/scratch/project/open-35-8/antonb/bfvd/bfvd_msa/A0A1M7XUY2.a3m"
check_a3m_lengths(file_to_check)

Reference sequence: 'UniRef100_A0A1M7XUY2' (Length: 437)
All 2527 sequences have the same length: 437
