In [8]:
import numpy as np
import pandas as pd
from PDB import PDB
import os
import copy
from json import loads

In [9]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [10]:
pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)
mean = 'mean'
std = 'std'
idx_slice = pd.IndexSlice

In [15]:
protocols = ['design_profile', 'no_constraint']

In [11]:
def read_scores(file, key='decoy'):
    """Take a json formatted score.sc file and incorporate into dictionary of dictionaries with 'key' value as outer key

    Args:
        file (str): Location on disk of scorefile
    Keyword Args:
        key='decoy' (str): Name of the json key to use as outer dictionary identifier
    Returns:
        score_dict (dict): {design_name: {all_score_metric_keys: all_score_metric_values}, ...}
    """
    with open(file, 'r') as f:
        score_dict = {}
        for score in f.readlines():
            entry = loads(score)
            design = entry[key]  # entry['decoy'].split('_')[-1]
            if design not in score_dict:
                score_dict[design] = entry
            else:
                # to ensure old trajectories don't have lingering protocol info TODO clean Rosetta protocol generation
                for protocol in protocols:
                    if protocol in entry.keys():
                        for rm_protocol in protocols:
                            try:
                                score_dict[design].pop(rm_protocol)
                            except KeyError:
                                pass
                score_dict[design].update(entry)

    return score_dict

In [12]:
residue_template = {'energy': {'complex': 0, 'unbound': 0, 'fsp': 0, 'cst': 0},
                    'sasa': {'polar': {'complex': 0, 'unbound': 0}, 'hydrophobic': {'complex': 0, 'unbound': 0},
                             'total': {'complex': 0, 'unbound': 0}},
                    'type': None, 'hbond': 0, 'core': 0, 'interior': 0, 'rim': 0, 'support': 0}  # , 'hot_spot': 0}

In [13]:
def dirty_dirty_residue_processing(score_dict, offset=None, hbonds=None):  # mutations,
    """Process Residue Metrics from Rosetta score dictionary

    One-indexed residues
    Args:
        score_dict (dict): {'0001': {'buns': 2.0, 'per_res_energy_15': -3.26, ...,
                            'yhh_planarity':0.885, 'hbonds_res_selection': '15A,21A,26A,35A,...'}, ...}
        mutations (dict): {'reference': {mutation_index: {'from': 'A', 'to: 'K'}, ...},
                           '0001': {mutation_index: {}, ...}, ...}
    Keyword Args:
        offset=None (dict): {'A': 0, 'B': 102}
        hbonds=None (dict): {'0001': [34, 54, 67, 68, 106, 178], ...}
    Returns:
        residue_dict (dict): {'0001': {15: {'type': 'T', 'energy_delta': -2.771, 'bsa_polar': 13.987, 'bsa_hydrophobic': 
            22.29, 'bsa_total': 36.278, 'hbond': 0, 'core': 0, 'rim': 1, 'support': 0}, ...}, ...}  # , 'hot_spot': 1
    """
    total_residue_dict = {}
    for entry in score_dict:
        residue_dict = {}
        # for column in columns:
        for key, value in score_dict[entry].items():
            # metadata = column.split('_')
            if key.startswith('per_res_'):
                metadata = key.split('_')
                res = int(metadata[-1])
                r_type = metadata[2]  # energy or sasa
                pose_state = metadata[-2]  # oligomer or complex
                if pose_state == 'unbound' and offset:
                    res += offset[metadata[-3]]  # get oligomer chain offset
                if res not in residue_dict:
                    residue_dict[res] = copy.deepcopy(residue_template)
                if r_type == 'sasa':
                    # Ex. per_res_sasa_hydrophobic_1_unbound_15 or per_res_sasa_hydrophobic_complex_15
                    polarity = metadata[3]
                    residue_dict[res][r_type][polarity][pose_state] = round(score_dict[entry][key], 3)
                    # residue_dict[res][r_type][polarity][pose_state] = round(score_dict[entry][column], 3)
                else:
                    # Ex. per_res_energy_1_unbound_15 or per_res_energy_complex_15
                    residue_dict[res][r_type][pose_state] = round(score_dict[entry][key], 3)
        # if residue_dict:
#         for res in residue_dict:
# #             try:
# #                 residue_dict[res]['type'] = mutations[entry][res]
# #             except KeyError:
# #                 residue_dict[res]['type'] = mutations['reference'][res]  # fill with aa from wild_type sequence
#             if hbonds:
#                 if res in hbonds[entry]:
#                     residue_dict[res]['hbond'] = 1
#             residue_dict[res]['energy_delta'] = residue_dict[res]['energy']['complex'] \
#                 - residue_dict[res]['energy']['unbound']  # - residue_dict[res]['energy']['fsp']
#             rel_oligomer_sasa = calc_relative_sa(residue_dict[res]['type'],
#                                                  residue_dict[res]['sasa']['total']['unbound'])
#             rel_complex_sasa = calc_relative_sa(residue_dict[res]['type'],
#                                                 residue_dict[res]['sasa']['total']['complex'])
#             for polarity in residue_dict[res]['sasa']:
#                 # convert sasa measurements into bsa measurements
#                 residue_dict[res]['bsa_%s' % polarity] = round(residue_dict[res]['sasa'][polarity]['unbound']
#                                                                - residue_dict[res]['sasa'][polarity]['complex'], 2)
#             if residue_dict[res]['bsa_total'] > 0:
#                 if rel_oligomer_sasa < 0.25:
#                     residue_dict[res]['support'] = 1
#                 elif rel_complex_sasa < 0.25:
#                     residue_dict[res]['core'] = 1
#                 else:
#                     residue_dict[res]['rim'] = 1
#             else:  # Todo remove res from dictionary as no interface design should be done? keep interior res constant?
#                 if rel_complex_sasa < 0.25:
#                     residue_dict[res]['interior'] = 1
#                 # else:
#                 #     residue_dict[res]['surface'] = 1

#             residue_dict[res].pop('sasa')
#             residue_dict[res].pop('energy')
#             # if residue_dict[res]['energy'] <= hot_spot_energy:
#             #     residue_dict[res]['hot_spot'] = 1
        total_residue_dict[entry] = residue_dict

    return total_residue_dict

In [123]:
old_score_dict = read_scores('/home/kylemeador/designs/2gtr_2dj6/DEGEN_1_2/ROT_19_29/tx_72/scores/design_scores_without_sym_residues.sc')
sym_new_score_dict = read_scores('/home/kylemeador/designs/2gtr_2dj6/DEGEN_1_2/ROT_19_29/tx_72/scores/design_scores_with_only_design_sym_residues.sc')
correct_asu_sym_new_score_dict = read_scores('/home/kylemeador/designs/2gtr_2dj6/DEGEN_1_2/ROT_19_29/tx_72/scores/design_scores_correct_asu.sc')
correct_asu_old_score_dict = read_scores('/home/kylemeador/designs/2gtr_2dj6/DEGEN_1_2/ROT_19_29/tx_72/scores/design_scores_correct_asu_no_sym.sc')

In [124]:
old_residues = dirty_dirty_residue_processing(old_score_dict)
sym_new_residues = dirty_dirty_residue_processing(sym_new_score_dict)
correct_asu_sym_new_residues = dirty_dirty_residue_processing(correct_asu_sym_new_score_dict) 
correct_asu_old_residues = dirty_dirty_residue_processing(correct_asu_old_score_dict)

In [113]:
# old_residues
# sym_new_residues
new_d, same_d, different_d = {}, {}, {}
for design in old_residues:
    new_d[design] = {residue: {'energy_sym': residue_info['energy']['complex'], 
                               'sasa_sym': {'polar': residue_info['sasa']['polar']['complex'],
                                        'hydrophobic': residue_info['sasa']['hydrophobic']['complex'],
                                        'total': residue_info['sasa']['total']['complex']}
                              } 
                     for residue, residue_info in sym_new_residues[design].items()}
    same_d[design], different_d[design] = {}, {}
    for residue, residue_info in old_residues[design].items():
        new_d[design][residue]['energy'] = residue_info['energy']['complex']
        new_d[design][residue]['sasa'] = {'polar': residue_info['sasa']['polar']['complex'],
                                          'hydrophobic': residue_info['sasa']['hydrophobic']['complex'],
                                          'total': residue_info['sasa']['total']['complex']}
    for residue, residue_info in new_d[design].items():
        if 'energy_sym' in residue_info and 'energy' in residue_info:  # new_d[design][residue] and 'energy' in new_d[design][residue]:
            same_d[design][residue] = new_d[design][residue]
        else:
            different_d[design][residue] = new_d[design][residue]

In [137]:
# print(correct_asu_sym_new_residues)
clean_correct_asu_sym_d, clean_same_d, clean_different_d = {}, {}, {}
for design in correct_asu_sym_new_residues:
    clean_correct_asu_sym_d[design] = {residue: {'energy_sym': residue_info['energy']['complex'], 
                                                 'sasa_sym': {'polar': residue_info['sasa']['polar']['complex'],
                                                              'hydrophobic': residue_info['sasa']['hydrophobic']['complex'],
                                                              'total': residue_info['sasa']['total']['complex']}
                                                }
                                       for residue, residue_info in correct_asu_sym_new_residues[design].items()}
    for residue, residue_info in correct_asu_old_residues[design].items():
        clean_correct_asu_sym_d[design][residue]['energy'] = residue_info['energy']['complex']
        clean_correct_asu_sym_d[design][residue]['sasa'] = {'polar': residue_info['sasa']['polar']['complex'],
                                                            'hydrophobic': residue_info['sasa']['hydrophobic']['complex'],
                                                            'total': residue_info['sasa']['total']['complex']}
    clean_same_d[design], clean_different_d[design] = {}, {}
    for residue, residue_info in clean_correct_asu_sym_d[design].items():
        if 'energy_sym' in residue_info and 'energy' in residue_info:  # new_d[design][residue] and 'energy' in new_d[design][residue]:
            clean_same_d[design][residue] = clean_correct_asu_sym_d[design][residue]
        else:
            clean_different_d[design][residue] = clean_correct_asu_sym_d[design][residue]

In [114]:
# OLD VERSION
clean_correct_asu_sym_d = {design: 
                           {residue: {'energy_sym': residue_info['energy']['complex'], 
                                      'sasa_sym': {'polar': residue_info['sasa']['polar']['complex'],
                                                   'hydrophobic': residue_info['sasa']['hydrophobic']['complex'],
                                                   'total': residue_info['sasa']['total']['complex']}
                                     }
                            for residue, residue_info in residue_data.items()}
                           for design, residue_data in correct_asu_sym_new_residues.items()}
clean_same_d = {design: {residue: data for residue, data in residue_info.items() if residue in same_d['clean_asu_for_refine_design_0001']} 
                for design, residue_info in clean_correct_asu_sym_d.items()}  
clean_different_d = {design: {residue: data for residue, data in residue_info.items() if residue not in same_d['clean_asu_for_refine_design_0001']} 
                for design, residue_info in clean_correct_asu_sym_d.items()}

In [87]:
new_residue_d = {}
second_chain = 249
for residue, info in sym_new_residues[design].items():
    if residue % 353 >= second_chain:
        chain_idx = (int(residue / 353) * 2) + 1
    else:
        chain_idx = int(residue / 353) * 2
    chain = PDB.available_letters[chain_idx]
    new_residue_d[residue] = '%s%d' % (chain, residue % 353)
#     else:
#         new_residue_d[residue] = '%s%d' % (chain, residue % 353)

In [115]:
# new_residue_d
modify_dictionaries = [new_d, same_d, different_d]
for dict_ in modify_dictionaries:
    for design, residue_info in list(dict_.items()):
        for residue_number, data in list(residue_info.items()):
            dict_[design][new_residue_d[residue_number]] = dict_[design].pop(residue_number)

In [138]:
modify_dictionaries = [clean_correct_asu_sym_d, clean_same_d, clean_different_d]
for dict_ in modify_dictionaries:
    for design, residue_info in list(dict_.items()):
        for residue_number, data in list(residue_info.items()):
            dict_[design][new_residue_d[residue_number]] = dict_[design].pop(residue_number)

In [117]:
print(len(clean_same_d['clean_asu_for_refine_design_correct_asu_0001']))
print(len(same_d['clean_asu_for_refine_design_0001']))

76
76


In [171]:
sum_energy_sym, sum_energy = {}, {}
for design, residue_info in same_d.items():
    sum_energy_sym[design], sum_energy[design] = 0, 0
    for residue, data in residue_info.items():
        sum_energy_sym[design] += data['energy_sym']
        sum_energy[design] += data['energy']
        
sum_energy_sym_diff = {}
for design, residue_info in different_d.items():
    sum_energy_sym_diff[design] = 0
    for residue, data in residue_info.items():
        sum_energy_sym_diff[design] += data['energy_sym']

In [132]:
print(clean_correct_asu_sym_d)

{}


In [172]:
clean_sum_energy_sym_same, clean_sum_energy_all = {}, {}
for design, residue_info in clean_same_d.items():
    clean_sum_energy_sym_same[design], clean_sum_energy_all[design] = 0, 0
    for residue, data in residue_info.items():
        clean_sum_energy_sym_same[design] += data['energy_sym']
        clean_sum_energy_all[design] += data['energy']
        

clean_sum_energy_sym_different = {}
for design, residue_info in clean_different_d.items():
    clean_sum_energy_sym_different[design] = 0
    for residue, data in residue_info.items():
        clean_sum_energy_sym_different[design] += data['energy_sym']
        
clean_sum_energy_sym_all = {}
for design, residue_info in clean_correct_asu_sym_d.items():
    clean_sum_energy_sym_all[design] = 0
    for residue, data in residue_info.items():
        clean_sum_energy_sym_all[design] += data['energy_sym']
        
print(design)
print(clean_sum_energy_sym_all[design])
print(clean_sum_energy_sym_different[design])
print(clean_sum_energy_sym_same[design])
print('Not Symmetric')
print(clean_sum_energy_all[design])
print(sum_energy_sym['clean_asu_for_refine_design_0001'])
print(sum_energy_sym_diff['clean_asu_for_refine_design_0001'])
print(sum_energy['clean_asu_for_refine_design_0001'])

clean_asu_for_refine_design_correct_asu_0001
-270.34800000000007
-2.35
-267.99800000000005
Not Symmetric
-267.99800000000005
-218.44300000000007
-51.90199999999998
-205.208


In [119]:
print([(residue, data) for residue, data in clean_different_d[design].items() if abs(data['energy_sym']) > 0])

[('L312', {'energy_sym': -0.116, 'sasa_sym': {'polar': 17.686, 'hydrophobic': 18.961, 'total': 36.647}}), ('L314', {'energy_sym': -1.081, 'sasa_sym': {'polar': 0.0, 'hydrophobic': 18.444, 'total': 18.444}}), ('M44', {'energy_sym': -0.305, 'sasa_sym': {'polar': 35.273, 'hydrophobic': 25.071, 'total': 60.344}}), ('N281', {'energy_sym': -0.04, 'sasa_sym': {'polar': 35.285, 'hydrophobic': 16.046, 'total': 51.331}}), ('N282', {'energy_sym': -0.512, 'sasa_sym': {'polar': 6.046, 'hydrophobic': 1.537, 'total': 7.583}}), ('N283', {'energy_sym': -0.366, 'sasa_sym': {'polar': 0.794, 'hydrophobic': 3.076, 'total': 3.871}}), ('N285', {'energy_sym': 0.076, 'sasa_sym': {'polar': 0.608, 'hydrophobic': 0.0, 'total': 0.608}}), ('N288', {'energy_sym': 0.005, 'sasa_sym': {'polar': 23.035, 'hydrophobic': 73.733, 'total': 96.768}}), ('N291', {'energy_sym': -0.013, 'sasa_sym': {'polar': 28.521, 'hydrophobic': 16.079, 'total': 44.6}}), ('E234', {'energy_sym': 0.003, 'sasa_sym': {'polar': 0.0, 'hydrophobic': 0

In [86]:
print(sum_energy_sym)
print(sum_energy)

{'clean_asu_for_refine': -101.84899999999998, 'clean_asu_for_refine_design_0001': -218.44300000000007, 'clean_asu_for_refine_design_0002': -213.414, 'clean_asu_for_refine_design_0003': -237.09699999999992, 'clean_asu_for_refine_design_0004': -224.85000000000005, 'clean_asu_for_refine_design_0005': -199.25999999999996, 'clean_asu_for_refine_design_0006': -210.45799999999997}
{'clean_asu_for_refine': -78.47000000000001, 'clean_asu_for_refine_design_0001': -205.208, 'clean_asu_for_refine_design_0002': -201.921, 'clean_asu_for_refine_design_0003': -227.22100000000003, 'clean_asu_for_refine_design_0004': -212.59599999999992, 'clean_asu_for_refine_design_0005': -184.35900000000004, 'clean_asu_for_refine_design_0006': -199.05400000000006}


In [54]:
# len(same_d['clean_asu_for_refine'])
# same_d['clean_asu_for_refine']
impacted_sym_residues = {}
for residue, data in different_d['clean_asu_for_refine_design_0001'].items():
    if abs(data['energy_sym']) > 0:
        impacted_sym_residues[residue] = data

In [55]:
print(list(impacted_sym_residues.keys()))
print([impacted_sym_residues[residue]['energy_sym'] for residue in impacted_sym_residues])

['J281', 'J282', 'J283', 'J285', 'J288', 'J291', 'R280', 'R281', 'R282', 'R283', 'R285', 'R286', 'R288', 'R289', 'R290', 'R291', 'R292', 'R293', 'R294', 'R295', 'R296', 'R297', 'R299', 'R300', 'R302', 'R309', 'R321', 'R322', 'R324', 'R325', 'R326', 'R327', 'R328', 'R329', 'R330', 'R331', 'R332', 'R333', 'T312', 'T314', 'W29', 'W30', 'W32', 'W33', 'W34', 'W36', 'W37', 'W40', 'W41', 'W43', 'W44', 'W45', 'W71', 'W75', 'W77', 'W78', 'W79', 'W81', 'W82', 'W83', 'W84', 'W85', 'W86', 'W88', 'W89', 'W90', 'W92', 'W93', 'W96', 'W224', 'W227', 'W228', 'W231', 'W232', 'W235', 'W236', 'W239', 'D312', 'D314', 'E44', 'E234', 'E236']
[-0.04, -0.512, -0.366, 0.076, 0.005, -0.013, -0.3, -0.825, -0.033, -0.607, -1.877, -1.209, -1.728, -0.715, -0.281, -0.517, -1.658, -0.718, -0.02, -0.519, -1.026, -0.604, -1.844, -0.684, -0.026, -0.958, -0.435, -0.017, -0.558, -1.029, -0.023, -0.049, -2.923, -1.43, -0.47, -0.579, -0.446, -0.065, -0.068, -0.823, -0.517, -0.002, -0.006, -0.269, 0.002, -0.339, -1.722, -1.22

In [149]:
# same_d['clean_asu_for_refine_design_0001']
target_sym_residues = ['R280', 'R281', 'R282', 'R283', 'R285', 'R286', 'R288', 'R289', 'R290', 'R291', 'R292', 'R293', 'R294', 'R295', 'R296', 'R297', 'R299', 'R300', 'R302', 'R309', 'D312', 'D314']
asu_check = [same_d['clean_asu_for_refine_design_0001'][residue.replace('R', 'B').replace('D', 'B')]['energy_sym'] 
             for residue in target_sym_residues]
non_sym_asu = [same_d['clean_asu_for_refine_design_0001'][residue.replace('R', 'B').replace('D', 'B')]['energy'] 
               for residue in target_sym_residues]
clean_sym_asu = [clean_same_d['clean_asu_for_refine_design_correct_asu_0001'][residue.replace('R', 'B').replace('D', 'B')]['energy_sym'] 
               for residue in target_sym_residues]
clean_asu = [clean_same_d['clean_asu_for_refine_design_correct_asu_0001'][residue.replace('R', 'B').replace('D', 'B')]['energy'] 
               for residue in target_sym_residues]

In [156]:
print([(letter, clean_different_d['clean_asu_for_refine_design_correct_asu_0001']['%s314' % letter]) for letter in PDB.available_letters[3:24:2]])

[('D', {'energy_sym': 0.0, 'sasa_sym': {'polar': 0.0, 'hydrophobic': 21.776, 'total': 21.776}}), ('F', {'energy_sym': 0.0, 'sasa_sym': {'polar': 0.0, 'hydrophobic': 20.239, 'total': 20.239}}), ('H', {'energy_sym': 0.0, 'sasa_sym': {'polar': 0.0, 'hydrophobic': 19.214, 'total': 19.214}}), ('J', {'energy_sym': 0.0, 'sasa_sym': {'polar': 0.0, 'hydrophobic': 12.296, 'total': 12.296}}), ('L', {'energy_sym': -1.081, 'sasa_sym': {'polar': 0.0, 'hydrophobic': 18.444, 'total': 18.444}}), ('N', {'energy_sym': 0.0, 'sasa_sym': {'polar': 0.0, 'hydrophobic': 16.14, 'total': 16.14}}), ('P', {'energy_sym': 0.0, 'sasa_sym': {'polar': 0.0, 'hydrophobic': 13.066, 'total': 13.066}}), ('R', {'energy_sym': 0.0, 'sasa_sym': {'polar': 0.0, 'hydrophobic': 10.247, 'total': 10.247}}), ('T', {'energy_sym': 0.0, 'sasa_sym': {'polar': 0.675, 'hydrophobic': 15.37, 'total': 16.045}}), ('V', {'energy_sym': 0.0, 'sasa_sym': {'polar': 0.0, 'hydrophobic': 13.833, 'total': 13.833}}), ('X', {'energy_sym': 0.0, 'sasa_sym':

In [159]:
clean_asu_different = [clean_different_d['clean_asu_for_refine_design_correct_asu_0001'][residue.replace('D', 'L')]['energy_sym']
                       if residue in clean_different_d['clean_asu_for_refine_design_correct_asu_0001'] else 0
                       for residue in target_sym_residues]

In [179]:
# sym_energy_residue_check = [impacted_sym_residues[residue]['energy_sym'] for residue in impacted_sym_residues
#                             if residue in target_sym_residues]
sym_energy_residue_check = [data['energy_sym'] for residue, data in different_d['clean_asu_for_refine_design_0001'].items()
                            if residue in target_sym_residues]
print('    NO clean dirty  SYM clean_asu dirty_asu MATES clean_asu_sym dirty_sym_sym')
for idx, value in enumerate(sym_energy_residue_check):
    print('%s| %.3f %.3f %s    %.3f    %.3f %s     %.3f        %.3f' % (target_sym_residues[idx].replace('R', 'B').replace('D', 'B'), clean_asu[idx], non_sym_asu[idx], ' | ', clean_sym_asu[idx], asu_check[idx], '  |      ', clean_asu_different[idx], value))

    NO clean dirty  SYM clean_asu dirty_asu MATES clean_asu_sym dirty_sym_sym
B280| -1.297 -0.982  |     -1.297    -0.997   |           0.000        -0.300
B281| -2.534 -1.662  |     -2.534    -1.710   |           0.000        -0.825
B282| -2.366 -2.260  |     -2.366    -2.333   |           0.000        -0.033
B283| -1.959 -1.028  |     -1.959    -1.351   |           0.000        -0.607
B285| -1.196 1.322  |     -1.196    0.682   |           0.000        -1.877
B286| -4.071 -2.565  |     -4.071    -2.862   |           0.000        -1.209
B288| -1.707 0.092  |     -1.707    0.021   |           0.000        -1.728
B289| -2.960 -1.904  |     -2.960    -2.246   |           0.000        -0.715
B290| -4.345 -3.910  |     -4.345    -4.064   |           0.000        -0.281
B291| -5.778 -5.151  |     -5.778    -5.261   |           0.000        -0.517
B292| -5.224 -3.354  |     -5.224    -3.566   |           0.000        -1.658
B293| -5.023 -4.196  |     -5.023    -4.305   |           0.000     

For the original input, with the asu chains separated by a random amount:
B matches with J, T, and W
A matches with C (same), E (same), and R

In [67]:
print(len(sym_new_residues[design]) / 12)  # all instances of residue measurement
print(len(same_d[design]))  # all the same (ASU)

76.0
76


Now I am checking for the SASA measurement

In [188]:
# same_d['clean_asu_for_refine_design_0001']
target_sym_residues = ['R280', 'R281', 'R282', 'R283', 'R285', 'R286', 'R288', 'R289', 'R290', 'R291', 'R292', 'R293', 'R294', 'R295', 'R296', 'R297', 'R299', 'R300', 'R302', 'R309', 'D312', 'D314']
clean_asu_sasa = [clean_same_d['clean_asu_for_refine_design_correct_asu_0001'][residue.replace('R', 'B').replace('D', 'B')]['sasa'] 
                  for residue in target_sym_residues]
asu_sasa = [same_d['clean_asu_for_refine_design_0001'][residue.replace('R', 'B').replace('D', 'B')]['sasa'] 
                    for residue in target_sym_residues]
clean_sym_asu_sasa = [clean_same_d['clean_asu_for_refine_design_correct_asu_0001'][residue.replace('R', 'B').replace('D', 'B')]['sasa_sym'] 
                      for residue in target_sym_residues]
asu_sym_sasa = [same_d['clean_asu_for_refine_design_0001'][residue.replace('R', 'B').replace('D', 'B')]['sasa_sym'] 
                  for residue in target_sym_residues]

In [156]:
print([(letter, clean_different_d['clean_asu_for_refine_design_correct_asu_0001']['%s314' % letter]) for letter in PDB.available_letters[3:24:2]])

[('D', {'energy_sym': 0.0, 'sasa_sym': {'polar': 0.0, 'hydrophobic': 21.776, 'total': 21.776}}), ('F', {'energy_sym': 0.0, 'sasa_sym': {'polar': 0.0, 'hydrophobic': 20.239, 'total': 20.239}}), ('H', {'energy_sym': 0.0, 'sasa_sym': {'polar': 0.0, 'hydrophobic': 19.214, 'total': 19.214}}), ('J', {'energy_sym': 0.0, 'sasa_sym': {'polar': 0.0, 'hydrophobic': 12.296, 'total': 12.296}}), ('L', {'energy_sym': -1.081, 'sasa_sym': {'polar': 0.0, 'hydrophobic': 18.444, 'total': 18.444}}), ('N', {'energy_sym': 0.0, 'sasa_sym': {'polar': 0.0, 'hydrophobic': 16.14, 'total': 16.14}}), ('P', {'energy_sym': 0.0, 'sasa_sym': {'polar': 0.0, 'hydrophobic': 13.066, 'total': 13.066}}), ('R', {'energy_sym': 0.0, 'sasa_sym': {'polar': 0.0, 'hydrophobic': 10.247, 'total': 10.247}}), ('T', {'energy_sym': 0.0, 'sasa_sym': {'polar': 0.675, 'hydrophobic': 15.37, 'total': 16.045}}), ('V', {'energy_sym': 0.0, 'sasa_sym': {'polar': 0.0, 'hydrophobic': 13.833, 'total': 13.833}}), ('X', {'energy_sym': 0.0, 'sasa_sym':

In [189]:
clean_asu_different_sasa = [clean_different_d['clean_asu_for_refine_design_correct_asu_0001'][residue.replace('D', 'L')]['sasa_sym']
                            if residue in clean_different_d['clean_asu_for_refine_design_correct_asu_0001'] else 0
                            for residue in target_sym_residues]
clean_asu_different_sasa2 = [clean_different_d['clean_asu_for_refine_design_correct_asu_0001'][residue.replace('R', 'D')]['sasa_sym']
                             if residue in clean_different_d['clean_asu_for_refine_design_correct_asu_0001'] else 0
                             for residue in target_sym_residues]

In [191]:
# sym_energy_residue_check = [impacted_sym_residues[residue]['energy_sym'] for residue in impacted_sym_residues
#                             if residue in target_sym_residues]
sym_energy_residue_check = [data['sasa_sym'] for residue, data in different_d['clean_asu_for_refine_design_0001'].items()
                            if residue in target_sym_residues]
for sasa_type in ['total']:  # , 'polar', 'hydrophobic']:
    print(sasa_type)
    print('    NO clean dirty  SYM clean_asu dirty_asu MATES clean_asu_sym dirty_sym_sym |  clean_different')
    for idx, value in enumerate(sym_energy_residue_check):
        print('%s| %.3f %.3f %s    %.3f    %.3f %s     %.3f        %.3f |   %.3f' 
              % (target_sym_residues[idx].replace('R', 'B').replace('D', 'B'), 
                 clean_asu_sasa[idx][sasa_type], asu_sasa[idx][sasa_type], 
                 ' | ', clean_sym_asu_sasa[idx][sasa_type], asu_sym_sasa[idx][sasa_type], 
                 '  |      ', clean_asu_different_sasa[idx][sasa_type], value[sasa_type], clean_asu_different_sasa2[idx][sasa_type]))

total
    NO clean dirty  SYM clean_asu dirty_asu MATES clean_asu_sym dirty_sym_sym |  clean_different
B280| 71.485 67.891  |     71.485    67.891   |           73.787        71.485 |   75.069
B281| 51.951 49.681  |     51.951    49.681   |           59.264        51.951 |   55.093
B282| 6.396 8.543  |     6.396    8.543   |           12.809        6.396 |   13.744
B283| 4.871 7.459  |     4.871    7.459   |           1.025        4.871 |   3.613
B285| 1.216 1.824  |     1.216    1.824   |           0.000        1.216 |   1.216
B286| 22.295 19.474  |     22.295    20.498   |           15.632        22.295 |   22.040
B288| 115.117 101.556  |     115.117    101.556   |           93.232        115.117 |   110.025
B289| 0.000 0.000  |     0.000    0.000   |           0.000        0.000 |   0.000
B290| 0.000 0.000  |     0.000    0.000   |           0.000        0.000 |   1.025
B291| 53.219 55.422  |     53.219    54.652   |           43.679        53.219 |   56.222
B292| 0.000 0.000  |    

In [193]:
symmetric_test_analysis_file = '/home/kylemeador/designs/metrics_testing_analysis.csv'
sym_test_df = pd.read_csv(symmetric_test_analysis_file, index_col=0, header=[0,1,2])

In [195]:
pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)
mean = 'mean'
std = 'std'
idx = pd.IndexSlice

In [196]:
sym_test_df

Unnamed: 0_level_0,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose
Unnamed: 0_level_1,energy_distance,mean,mean,mean,mean,mean,mean,mean,mean,seq_distance,similarity,stats,stats,stats,stats,stats,stats,stats,stats,stats,stats,stats,std,std,std,std,std,std,std,std,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,mean,mean,mean,mean,mean,mean,mean,mean,stats,stats,stats,stats,stats,stats,stats,stats,stats,stats,stats,std,std,std,std,std,std,std,std,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,seq_design,seq_design,seq_design,seq_design,seq_design,seq_design,seq_design,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std
Unnamed: 0_level_2,no_constraint,buns_total,contact_count,interface_area_total,interface_energy,number_hbonds,percent_interface_area_hydrophobic,shape_complementarity,solvation_energy,no_constraint,no_constraint,core,divergence_design_per_res,divergence_evolution_per_res,divergence_fragment_per_res,divergence_interface_per_res,observations,observed_design,observed_evolution,observed_fragment,rim,support,buns_total,contact_count,interface_area_total,interface_energy,number_hbonds,percent_interface_area_hydrophobic,shape_complementarity,solvation_energy,buns_total,contact_count,interface_area_total,interface_energy,number_hbonds,percent_interface_area_hydrophobic,shape_complementarity,solvation_energy,buns_total,contact_count,interface_area_total,interface_energy,number_hbonds,percent_interface_area_hydrophobic,shape_complementarity,solvation_energy,core,divergence_design_per_res,divergence_evolution_per_res,divergence_fragment_per_res,divergence_interface_per_res,observations,observed_design,observed_evolution,observed_fragment,rim,support,buns_total,contact_count,interface_area_total,interface_energy,number_hbonds,percent_interface_area_hydrophobic,shape_complementarity,solvation_energy,interface_b_factor_per_res,multiple_fragment_ratio,nanohedra_score,nanohedra_score_center,nanohedra_score_center_per_res_center,nanohedra_score_per_res,number_fragment_residues_center,number_fragment_residues_total,number_of_fragments,observations,percent_fragment,percent_fragment_coil,percent_fragment_helix,percent_fragment_strand,percent_residues_fragment_center,percent_residues_fragment_total,symmetry,total_interface_residues,total_non_fragment_interface_residues,buns_per_ang,buns_total,contact_count,core,cst_weight,favor_residue_energy,fsp_energy,int_area_hydrophobic,int_area_polar,int_area_res_summary_hydrophobic_1_unbound,int_area_res_summary_hydrophobic_2_unbound,int_area_res_summary_polar_1_unbound,int_area_res_summary_polar_2_unbound,int_area_res_summary_total_1_unbound,int_area_res_summary_total_2_unbound,int_area_total,int_composition_similarity,int_energy_context_1_unbound,int_energy_context_2_unbound,int_energy_res_summary_1_unbound,int_energy_res_summary_2_unbound,int_energy_res_summary_complex,int_sc,int_sc_median_dist,interaction_energy_complex,interface_area_hydrophobic,interface_area_polar,interface_area_total,interface_connectivity_1,interface_connectivity_2,interface_energy,interface_energy_1_bound,interface_energy_1_unbound,interface_energy_2_bound,interface_energy_2_unbound,interface_energy_complex,interface_energy_density,interface_energy_no_intra_residue_score,interface_energy_unbound,interface_separation,number_hbonds,number_of_mutations,observed_design,observed_evolution,observed_fragment,percent_interface_area_hydrophobic,percent_interface_area_polar,ref,rim,rmsd,shape_complementarity,solvation_energy,support,total_interface_residues,divergence_design_per_res,divergence_evolution_per_res,divergence_fragment_per_res,divergence_interface_per_res,protocol_energy_distance_sum,protocol_seq_distance_sum,protocol_similarity_sum,buns_per_ang,buns_total,contact_count,core,cst_weight,favor_residue_energy,fsp_energy,int_area_hydrophobic,int_area_polar,int_area_res_summary_hydrophobic_1_unbound,int_area_res_summary_hydrophobic_2_unbound,int_area_res_summary_polar_1_unbound,int_area_res_summary_polar_2_unbound,int_area_res_summary_total_1_unbound,int_area_res_summary_total_2_unbound,int_area_total,int_composition_similarity,int_energy_context_1_unbound,int_energy_context_2_unbound,int_energy_res_summary_1_unbound,int_energy_res_summary_2_unbound,int_energy_res_summary_complex,int_sc,int_sc_median_dist,interaction_energy_complex,interface_area_hydrophobic,interface_area_polar,interface_area_total,interface_connectivity_1,interface_connectivity_2,interface_energy,interface_energy_1_bound,interface_energy_1_unbound,interface_energy_2_bound,interface_energy_2_unbound,interface_energy_complex,interface_energy_density,interface_energy_no_intra_residue_score,interface_energy_unbound,interface_separation,number_hbonds,number_of_mutations,observed_design,observed_evolution,observed_fragment,percent_interface_area_hydrophobic,percent_interface_area_polar,ref,rim,rmsd,shape_complementarity,solvation_energy,support,total_interface_residues
Projects-metrics_testing_Designs-2gtr_2dj6-DEGEN_1_2-ROT_19_29-tx_72,8.249736,4.333333,4.890231,2440.94455,-61.491881,6.0,0.796724,0.673831,1.421085e-14,3.314069,0.382125,30.0,0.633,0.653,0.0,0.738,3.0,0.961,0.961,0.0,12.0,20.666667,2.081666,0.152021,15.117976,3.877835,1.0,0.010785,0.019235,6.512232e-14,0.519,0.407,0.11,0.897,0.0,0.93,0.194,0.0,6.0,4.976939,2416.05216,-71.491092,6.0,0.821101,0.685743,2.306895e-12,33.333333,0.672,0.69,0.0,0.756,3.0,0.917,0.912,0.0,10.0,19.666667,3.0,0.150287,150.719892,4.925882,0.0,0.006827,0.034246,4.192957e-12,20.0,5.066667,49.662177,34.489386,1.149646,1.034629,30,48,76,6,0.491228,0.0,1.0,0.0,0.394737,0.631579,point_group,76,46,0.002128,5.166667,4.933585,31.666667,,0.0,,,,,,,,,,,0.723245,,,,,,,,-81.018555,1964.311072,464.187283,2428.498355,19.03018,16.375527,-66.491486,-110.14132,-118.476726,-69.572418,-75.764081,-260.732293,-0.027389,-81.018555,-194.240807,0.553609,6.0,57.666667,0.938667,0.9365,0.0,0.808912,0.191088,177.19153,11.0,0.105125,0.679787,1.160553e-12,20.166667,57.0,0.558,0.581,0.0,0.631,8.249736,3.314069,0.382125,0.00104,2.483277,0.143297,3.076795,,0.0,,,,,,,,,,,0.073491,,,,,,,,9.285293,84.453341,43.103902,96.767272,0.086268,0.11284,6.761363,6.24176,5.747078,8.83641,8.350908,14.60665,0.002622,9.285293,11.659431,0.038054,0.632456,1.032796,0.027274,0.03052,0.0,0.015603,0.015603,4.049839,1.67332,0.01062,0.025684,2.934446e-12,1.722401,0.0
Projects-metrics_testing_Designs-2gtr_2dj6-DEGEN_1_2-ROT_19_29-tx_72-correct-asu,8.349393,11.333333,4.908106,2256.948615,-61.227145,6.0,0.796809,0.673751,-2.842171e-14,3.314069,0.42075,29.0,0.649,0.669,0.0,0.736,3.0,0.961,0.961,0.0,11.0,22.333333,1.527525,0.173933,35.483948,5.965895,1.0,0.009716,0.01927,1.136868e-13,0.097,0.407,0.0,0.894,0.0,0.894,0.294,0.78,10.333333,5.003572,2250.124605,-71.82962,6.0,0.825836,0.685736,4.007461e-12,31.333333,0.686,0.705,0.0,0.754,3.0,0.917,0.912,0.0,9.666667,22.0,3.511885,0.147965,129.419383,5.41189,0.0,0.004444,0.034212,6.609942e-12,20.0,5.066667,49.662177,34.489386,1.149646,1.034629,30,48,76,6,0.509091,0.0,1.0,0.0,0.394737,0.631579,point_group,76,46,0.00481,10.833333,4.955839,30.166667,,0.0,,,,,,,,,,,0.671159,,,,,,,,-81.018555,1828.24664,425.28997,2253.53661,19.032163,16.388186,-66.528382,-110.14132,-118.522799,-69.572418,-75.681112,-260.732293,-0.029521,-81.018555,-194.203911,0.553655,6.0,57.666667,0.938667,0.9365,0.0,0.811322,0.188678,177.19153,10.333333,0.105398,0.679743,1.98952e-12,22.166667,55.0,0.569,0.593,0.0,0.629,8.349393,3.314069,0.42075,0.001128,2.483277,0.153599,2.562551,,0.0,,,,,,,,,,,0.046386,,,,,,,,9.285293,77.645225,43.21595,84.955076,0.083205,0.11509,7.725019,6.24176,6.578008,8.83641,8.326557,14.60665,0.00318,9.285293,12.301918,0.037997,0.632456,1.032796,0.027274,0.03052,0.0,0.017275,0.017275,4.049839,1.21106,0.01051,0.025687,4.729504e-12,2.926887,0.0
Projects-metrics_testing_Designs-outF_10,7.485495,2.0,4.886904,907.24957,-27.682385,0.0,0.865286,0.71175,3.552714e-15,2.136145,0.3055,16.5,0.66,0.66,0.0,0.813,2.0,0.646,0.646,0.0,2.5,3.0,1.414214,0.263421,33.404419,0.703147,0.0,0.005014,0.043239,5.024296e-15,0.0,0.353,0.0,0.697,0.0,0.697,0.0,0.697,1.0,4.624204,894.607208,-25.335622,0.0,0.827805,0.720906,-1.421085e-14,14.0,0.661,0.661,0.0,0.816,1.0,0.667,0.667,0.0,5.0,3.0,,,,,,,,,91.26,0.0,0.0,0.0,0.0,0.0,0,0,0,3,0.0,0.0,0.0,0.0,0.0,0.0,point_group,17,17,0.001862,1.666667,4.799337,15.666667,4.518925,0.0,0.0,1686.696737,652.565999,478.176716,627.967727,173.679588,141.703347,651.856305,769.671074,2339.262695,0.391188,-8.439053,-12.62768,-12.417245,-17.197604,-45.595119,0.851727,0.397138,-32.810647,770.263146,132.772303,903.035449,19.321428,16.566666,-26.900131,-8.931391,-12.923711,-16.069775,-17.98797,-57.811813,-0.029786,-32.810647,-30.911682,0.521653,0.0,18.666667,0.653,0.653,0.0,0.852793,0.147207,179.982533,3.333333,0.029547,0.714802,-2.368476e-15,3.0,22.0,0.589,0.589,0.0,0.722,7.485495,2.136145,0.3055,0.001328,1.154701,0.240206,1.527525,0.212776,0.0,0.0,33.274906,19.394488,21.3879,11.453155,25.060033,24.101317,12.847322,33.517654,23.867135,0.123662,1.742752,0.318579,1.692651,0.278857,0.539086,0.000808,0.001199,1.354112,34.946525,18.424316,24.722541,0.128769,0.028868,1.443251,0.909509,1.156007,0.135836,0.240939,0.899102,0.001293,1.354112,1.064498,0.036404,0.0,0.57735,0.024249,0.024249,0.0,0.021928,0.021928,1.851701,1.527525,0.000565,0.031028,1.085372e-14,1.0,0.0
Projects-metrics_testing_Designs-outF_10-odd-asu,8.001939,2.0,4.886904,895.803111,-27.682385,0.0,0.866821,0.71066,-7.105427e-15,2.136145,0.286375,15.5,0.651,0.651,0.0,0.811,2.0,0.646,0.646,0.0,2.5,4.0,1.414214,0.263421,14.177673,0.703147,0.0,0.010987,0.043377,1.004859e-14,0.0,0.332,0.0,0.653,0.0,0.653,0.0,0.653,1.0,4.624204,889.204002,-24.480528,0.0,0.83109,0.719896,-3.197442e-14,14.0,0.652,0.652,0.0,0.813,1.0,0.667,0.667,0.0,5.0,3.0,,,,,,,,,91.21,0.0,0.0,0.0,0.0,0.0,0,0,0,3,0.0,0.0,0.0,0.0,0.0,0.0,point_group,17,17,0.001872,1.666667,4.799337,15.0,4.518925,0.0,0.0,1686.696737,652.565999,478.176716,627.967727,173.679588,141.703347,651.856305,769.671074,2339.262695,0.432879,-8.439053,-12.62768,-12.417245,-17.197604,-45.595119,0.851727,0.397138,-32.810647,764.055633,129.547775,893.603408,19.369048,16.566666,-26.6151,-8.931391,-13.208743,-16.069775,-17.98797,-57.811813,-0.029777,-32.810647,-31.196713,0.52307,0.0,18.666667,0.653,0.653,0.0,0.854911,0.145089,178.264423,3.333333,0.029312,0.713739,-1.539509e-14,3.666667,21.0,0.583,0.583,0.0,0.723,8.001939,2.136145,0.286375,0.001312,1.154701,0.240206,1.0,0.212776,0.0,0.0,33.274906,19.394488,21.3879,11.453155,25.060033,24.101317,12.847322,33.517654,23.867135,0.097872,1.742752,0.318579,1.692651,0.278857,0.539086,0.000808,0.001199,1.354112,26.747128,18.744774,10.724704,0.209267,0.028868,1.914289,0.909509,1.647313,0.135836,0.240939,0.899102,0.001956,1.354112,1.541114,0.036217,0.0,0.57735,0.024249,0.024249,0.0,0.022044,0.022044,1.151298,1.527525,0.000569,0.031132,1.602007e-14,0.57735,0.0


TO SOLVE
Buns_total is very incorrect somehow. Higher values with good ASU input. I think this is the last straggler that rosetta_scripts needs tweaking
STDEV is larger with the correct-asu interface_area_total, interface_energy. Is the PoseDeletionMover working correctly?

Checking out Trajectories.csv / Residues.csv to understand
Energy of residues may be slightly off with symmetry
Interface area is messed up slightly. Think error is more than orientational - Chain B values are the only ones affected!

SOLVED
interface_energy may be off (different packing in metrics could be the cause. yes POSE metrics would argue so. Bound is the exact same, while unbound (packed) have changed)
Solvation energy is missing. interaction_energy_complex = -81.018555, interface_energy_no_intra_residue_score = -81.018555, interface_energy = -66.49 (this is old delta) - NOT actually calculating
shape_complementarity is slightly different with the correct versus odd-asu. Probably due to molecular dots issues


RENAME int_composition_similarity to full "interface". ref to rosetta_reference_energy
Names of all PERCENT_ columns with 0,1 bound as percent?

!!Compute the delta of each interface upon binding. 
For total, this the interaction_energy_complex - interface_energy
For each individual bound - unbound = positive value (transition energy to reach the complex)!!

residue_type classification is different depending on the values of SASA (I believe) which causes issues in all of these!
!!Pose mean total_interface_residues is different (for 2gtr 57 with odd-asu, 55 with correct). Dock total_interface_residues is 76!!
The interior residues seem to be the difference between these two...
nanohedra metric "percent_fragment" is different in 2gtr. Higher in correct despite no difference in other metrics? 
divergence (design/evolution) is slightly higher with correct ASU configuration (fewer residues)
divergence of fragment information is missing (observed_fragment too)
observed_design/evolution are the same. Do they use non-interior residues?

maybe remove:
interface_energy_no_intra_residue_score, interface_energy_unbound
remove:
cst_weight, fsp_energy, int_area_res_summary_hydrophobic_1/2_unbound, int_area_res_summary_polar_1/2_unbound, int_area_res_summary_total_1/2_unbound, int_area_total/polar/hydrophobic, 
int_energy_context_1/2_unbound, int_energy_res_summary_1/2_unbound, int_energy_res_summary_complex, int_sc, int_sc_median_dist, 

In [199]:
symmetric_test_analysis_file = '/home/kylemeador/designs/metrics_testing_analysisV2.csv'
sym_test_df2 = pd.read_csv(symmetric_test_analysis_file, index_col=0, header=[0,1,2])

In [200]:
sym_test_df2

Unnamed: 0_level_0,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,no_constraint,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose
Unnamed: 0_level_1,energy_distance,mean,mean,mean,mean,mean,mean,mean,mean,seq_distance,similarity,stats,stats,stats,stats,stats,stats,stats,stats,stats,stats,stats,std,std,std,std,std,std,std,std,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,design_profile,mean,mean,mean,mean,mean,mean,mean,mean,stats,stats,stats,stats,stats,stats,stats,stats,stats,stats,stats,std,std,std,std,std,std,std,std,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,seq_design,seq_design,seq_design,seq_design,seq_design,seq_design,seq_design,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std,std
Unnamed: 0_level_2,no_constraint,contact_count,interface_area_total,interface_buried_hbonds,interface_energy,number_hbonds,percent_interface_area_hydrophobic,shape_complementarity,solvation_energy,no_constraint,no_constraint,core,divergence_design_per_res,divergence_evolution_per_res,divergence_fragment_per_res,divergence_interface_per_res,observations,observed_design,observed_evolution,observed_fragment,rim,support,contact_count,interface_area_total,interface_buried_hbonds,interface_energy,number_hbonds,percent_interface_area_hydrophobic,shape_complementarity,solvation_energy,contact_count,interface_area_total,interface_buried_hbonds,interface_energy,number_hbonds,percent_interface_area_hydrophobic,shape_complementarity,solvation_energy,contact_count,interface_area_total,interface_buried_hbonds,interface_energy,number_hbonds,percent_interface_area_hydrophobic,shape_complementarity,solvation_energy,core,divergence_design_per_res,divergence_evolution_per_res,divergence_fragment_per_res,divergence_interface_per_res,observations,observed_design,observed_evolution,observed_fragment,rim,support,contact_count,interface_area_total,interface_buried_hbonds,interface_energy,number_hbonds,percent_interface_area_hydrophobic,shape_complementarity,solvation_energy,interface_b_factor_per_res,multiple_fragment_ratio,nanohedra_score,nanohedra_score_center,nanohedra_score_center_per_res_center,nanohedra_score_per_res,number_fragment_residues_center,number_fragment_residues_total,number_of_fragments,observations,percent_fragment,percent_fragment_coil,percent_fragment_helix,percent_fragment_strand,percent_residues_fragment_center,percent_residues_fragment_total,symmetry,total_interface_residues,total_non_fragment_interface_residues,contact_count,core,favor_residue_energy,int_composition_similarity,interaction_energy_complex,interface_area_hydrophobic,interface_area_polar,interface_area_total,interface_buried_hbonds,interface_connectivity_1,interface_connectivity_2,interface_energy,interface_energy_complex,interface_energy_density,interface_energy_unbound,interface_separation,number_hbonds,number_of_mutations,observed_design,observed_evolution,observed_fragment,percent_interface_area_hydrophobic,percent_interface_area_polar,rim,rmsd,rosetta_reference_energy,shape_complementarity,solvation_energy,solvation_energy_bound,solvation_energy_complex,solvation_energy_unbound,support,total_interface_residues,divergence_design_per_res,divergence_evolution_per_res,divergence_fragment_per_res,divergence_interface_per_res,protocol_energy_distance_sum,protocol_seq_distance_sum,protocol_similarity_sum,contact_count,core,favor_residue_energy,int_composition_similarity,interaction_energy_complex,interface_area_hydrophobic,interface_area_polar,interface_area_total,interface_buried_hbonds,interface_connectivity_1,interface_connectivity_2,interface_energy,interface_energy_complex,interface_energy_density,interface_energy_unbound,interface_separation,number_hbonds,number_of_mutations,observed_design,observed_evolution,observed_fragment,percent_interface_area_hydrophobic,percent_interface_area_polar,rim,rmsd,rosetta_reference_energy,shape_complementarity,solvation_energy,solvation_energy_bound,solvation_energy_complex,solvation_energy_unbound,support,total_interface_residues
Projects-metrics_testing_Designs-2gtr_2dj6-DEGEN_1_2-ROT_19_29-tx_72,8.578222,4.890231,2440.94455,4.333333,-31.767482,6.0,0.796724,0.673831,-5.024095,3.314069,0.46,30.0,0.633,0.653,0.0,0.738,3.0,0.961,0.961,0.0,12.0,20.666667,0.152021,15.117976,1.527525,7.04461,1.0,0.010785,0.019235,4.562921,0.371,0.112,0.269,0.912,0.0,0.937,0.167,0.912,4.976939,2416.05216,5.666667,-52.51054,6.0,0.821101,0.685743,2.845872,33.333333,0.672,0.69,0.0,0.756,3.0,0.917,0.912,0.0,10.0,19.666667,0.150287,150.719892,3.05505,7.177631,0.0,0.006827,0.034246,0.774404,20.0,5.066667,49.662177,34.489386,1.149646,1.034629,30,48,76,6,0.491228,0.0,1.0,0.0,0.394737,0.631579,point_group,76,46,4.933585,31.666667,0.0,0.723245,-81.018555,1964.311072,464.187283,2428.498355,5.0,18.928212,16.405643,-42.139011,-260.732293,-0.017348,-218.593283,0.553609,6.0,57.666667,0.938667,0.9365,0.0,0.808912,0.191088,11.0,0.105125,177.19153,0.679787,-1.089111,239.167733,239.640186,238.551075,20.166667,57.0,0.558,0.581,0.0,0.631,8.578222,3.314069,0.46,0.143297,3.076795,0.0,0.073491,9.285293,84.453341,43.103902,96.767272,2.280351,0.0569,0.060124,13.020764,14.60665,0.00519,10.130617,0.038054,0.632456,1.032796,0.027274,0.03052,0.0,0.015603,0.015603,1.67332,0.01062,4.049839,0.025684,5.21046,4.09449,5.466562,4.112741,1.722401,0.0
Projects-metrics_testing_Designs-2gtr_2dj6-DEGEN_1_2-ROT_19_29-tx_72-correct-asu,8.540535,4.908106,2256.948615,6.0,-31.12001,6.0,0.796809,0.673751,-4.450584,3.314069,0.428625,29.0,0.649,0.669,0.0,0.736,3.0,0.961,0.961,0.0,11.0,22.333333,0.173933,35.483948,1.0,7.875665,1.0,0.009716,0.01927,4.172794,0.403,0.0,0.0,0.906,0.0,0.906,0.308,0.906,5.003572,2250.124605,6.0,-52.807712,6.0,0.825836,0.685736,2.027567,31.333333,0.686,0.705,0.0,0.754,3.0,0.917,0.912,0.0,9.666667,22.0,0.147965,129.419383,2.645751,7.339717,0.0,0.004444,0.034212,1.525202,20.0,5.066667,49.662177,34.489386,1.149646,1.034629,30,48,76,6,0.509091,0.0,1.0,0.0,0.394737,0.631579,point_group,76,46,4.955839,30.166667,0.0,0.671159,-81.018555,1828.24664,425.28997,2253.53661,6.0,18.910138,16.40981,-41.963861,-260.732293,-0.018592,-218.768432,0.553655,6.0,57.666667,0.938667,0.9365,0.0,0.811322,0.188678,10.333333,0.105398,177.19153,0.679743,-1.211508,239.167733,239.640186,238.428678,22.166667,55.0,0.569,0.593,0.0,0.629,8.540535,3.314069,0.428625,0.153599,2.562551,0.0,0.046386,9.285293,77.645225,43.21595,84.955076,1.788854,0.072012,0.060494,13.69182,14.60665,0.005828,10.466777,0.037997,0.632456,1.032796,0.027274,0.03052,0.0,0.017275,0.017275,1.21106,0.01051,4.049839,0.025687,4.52607,4.09449,5.466562,3.755503,2.926887,0.0
Projects-metrics_testing_Designs-outF_10,10.413234,3.024735,1015.105597,0.0,8.398264,2.0,0.839608,0.520344,2.598974,2.136145,0.215,33.5,0.648,0.648,0.0,0.812,2.0,0.718,0.718,0.0,9.0,11.5,0.0,0.97179,0.0,1.156238,0.0,0.000624,2.9e-05,0.128203,0.0,0.349,0.0,0.349,0.0,0.0,0.673,0.349,3.024735,1016.470161,0.0,9.394837,2.0,0.839492,0.520524,2.769464,31.0,0.649,0.649,0.0,0.813,1.0,0.727,0.727,0.0,11.0,12.0,,,,,,,,,88.57,0.0,0.0,0.0,0.0,0.0,0,0,0,3,0.0,0.0,0.0,0.0,0.0,0.0,point_group,17,17,3.024735,32.666667,0.0,0.560037,-27.073187,852.633642,162.926809,1015.560451,0.0,18.603174,17.163399,8.730455,-65.362188,0.008596,-74.092644,0.945632,2.0,18.666667,0.721,0.721,0.0,0.839569,0.160431,9.666667,0.029547,182.48971,0.520404,2.655804,104.548991,104.087999,106.743803,11.666667,54.0,0.62,0.62,0.0,0.775,10.413234,2.136145,0.215,0.0,1.527525,0.0,0.064587,0.006687,1.183885,0.389743,1.045403,0.0,0.013746,0.029952,0.999747,0.013762,0.000976,1.013434,0.000466,0.0,0.57735,0.010392,0.010392,0.0,0.000446,0.000446,1.527525,0.000565,1.623053,0.000106,0.133817,0.006996,0.00503,0.133565,0.57735,0.0
Projects-metrics_testing_Designs-outF_10-odd-asu,10.633061,3.010601,1056.620184,0.0,9.313586,2.0,0.844205,0.520344,2.822206,2.136145,0.125,33.5,0.645,0.645,0.0,0.811,2.0,0.718,0.718,0.0,9.0,11.5,0.0,2.443284,0.0,0.10127,0.0,0.000238,2.9e-05,0.106277,0.0,0.0,0.0,0.0,0.0,0.336,0.664,0.0,3.010601,1057.325511,0.0,9.383032,2.0,0.843886,0.520524,2.767051,32.0,0.645,0.645,0.0,0.811,1.0,0.727,0.727,0.0,11.0,11.0,,,,,,,,,88.51,0.0,0.0,0.0,0.0,0.0,0,0,0,3,0.0,0.0,0.0,0.0,0.0,0.0,point_group,17,17,3.010601,33.0,0.0,0.586406,-27.073187,892.090006,164.765287,1056.855293,0.0,18.579365,17.183006,9.336734,-65.362188,0.008834,-74.698923,0.945632,2.0,18.666667,0.721,0.721,0.0,0.844099,0.155901,9.666667,0.029312,182.48971,0.520404,2.803821,104.548991,104.087999,106.89182,11.333333,53.0,0.618,0.618,0.0,0.776,10.633061,2.136145,0.125,0.0,1.732051,0.0,0.061589,0.006687,1.289525,0.515925,1.775006,0.0,0.027493,0.011322,0.082069,0.013762,6.4e-05,0.095576,0.000466,0.0,0.57735,0.010392,0.010392,0.0,0.000249,0.000249,1.527525,0.000569,1.623053,0.000106,0.081618,0.006996,0.00503,0.084122,0.57735,0.0


Why increase in interface_area_total for outF_10? New interface was defined and it was not limited to the residues identified by Roger...

Interface energy went down significantly despite remaining the same between the two input styles. Did repacking with backbone flexibility really decrease the energy that drastically? Why is this the case?

## Examining the selection of BUNS using various flags such as symmetry and ignore surface residues
My gut instinct is that using the BUNS delta (subtracting unbound (not repacked) from complex) with ignore_surface residues, is going to be the best outcome.

In [11]:
symmetric_test_analysis_file = '/home/kylemeador/designs/metrics_testing_analysisV3_BUNS.csv'
sym_test_df3 = pd.read_csv(symmetric_test_analysis_file, index_col=0, header=[0,1,2])

In [14]:
sym_test_df3.loc[:, idx_slice['pose', 'mean', 
                              ['buns_nano', 'buns_nano_hpol', 'buns_asu', 'buns_asu_hpol', 
                               'buns_total', 'interface_buried_hbonds', 
                               'buns_1_unbound', 'buns_1_unbound_ignore', # 'buns_1_unbound_sym', 'buns_1_unbound_sym_ignore',
                               'buns_2_unbound', 'buns_2_unbound_ignore', # 'buns_2_unbound_sym', 'buns_2_unbound_sym_ignore',
                               'buns_complex', 'buns_complex_ignore', 'buns_complex_sym', 'buns_complex_sym_ignore']]]

Unnamed: 0_level_0,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose
Unnamed: 0_level_1,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean
Unnamed: 0_level_2,buns_nano,buns_nano_hpol,buns_asu,buns_asu_hpol,buns_total,interface_buried_hbonds,buns_1_unbound,buns_1_unbound_ignore,buns_2_unbound,buns_2_unbound_ignore,buns_complex,buns_complex_ignore,buns_complex_sym,buns_complex_sym_ignore
Projects-metrics_testing_Designs-2gtr_2dj6-DEGEN_1_2-ROT_19_29-tx_72,4.333333,0.833333,0.0,0.0,5.166667,5.0,2.0,1.166667,1.166667,0.666667,8.166667,7.166667,8.833333,7.166667
Projects-metrics_testing_Designs-2gtr_2dj6-DEGEN_1_2-ROT_19_29-tx_72-correct-asu,5.333333,0.333333,4.833333,0.333333,10.833333,6.0,2.0,1.166667,2.833333,2.333333,10.833333,9.833333,10.833333,9.833333
Projects-metrics_testing_Designs-outF_10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0
Projects-metrics_testing_Designs-outF_10-odd-asu,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Selection of queried residues is a one descriptor process. Can't specify which residues are on which side of the interface (i.e. residue_selector_1/2).
All analysis was done without the crystal variant which would be particularly helpful as this is where the original BUNS system was benchmarked.

NOTES:
Symmetry adds no benefit to the calculation of the individual interfaces, potentially some to the complexed interface (especially because the benefactor is the oddly oriented ASU). The complexed stat is where I think symmetry would matter, however when surface_residues are ignored, the difference is completely removed. This may have to do with how surface residues are calculated.
Interface 2 has different outcomes depending on the input for 2gtr. Both measurements seem to be tracking similarly due to missing 0.5 for both _ignore variants, but the difference needs resolving. Its particularly unsettling that the difference has nothing to do with symmetry, yet the difference between the two inputs is purely symmetrical. How could this affect the measurement of one side of the interface? Intriguingly, the difference between the two inputs is also present in the complex so it seems that the calculation is picking up something consistently. 

I think the correct answer for this custom deltaG measurement is 6.333 buns in the interface.
It seems the original calculation is working fine in the case where the input is correct, and where it isn't, the symmetry environment (_nano) is capturing the ASU completely, however this indicates that the _nano environment is neglected. Perhaps this is actually double counting and there may be buns in intra-oligomer contacts. I really need to run metrics on both of these with the tracer enabled to capture the real BUNS and get ground truth.

How is the threshold for ignore surface residues calculated? Might it be poor taste to use the SASA cone that Andrew Leaver-Fay developed?

In [5]:
symmetric_crystal_analysis_file = '/home/kylemeador/designs/metrics_testing_analysisV4_BUNS.csv'
sym_test_df3 = pd.read_csv(symmetric_crystal_analysis_file, index_col=0, header=[0,1,2])

In [6]:
sym_test_df3.loc[:, idx_slice['pose', 'mean', 
                              ['buns_nano', 'buns_nano_hpol', 'buns_asu', 'buns_asu_hpol', 
                               'buns_total', 'interface_buried_hbonds', 
                               'buns_1_unbound', 'buns_1_unbound_ignore', # 'buns_1_unbound_sym', 'buns_1_unbound_sym_ignore',
                               'buns_2_unbound', 'buns_2_unbound_ignore', # 'buns_2_unbound_sym', 'buns_2_unbound_sym_ignore',
                               'buns_complex', 'buns_complex_ignore', 'buns_complex_sym', 'buns_complex_sym_ignore']]]

Unnamed: 0_level_0,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose
Unnamed: 0_level_1,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean
Unnamed: 0_level_2,buns_nano,buns_nano_hpol,buns_asu,buns_asu_hpol,buns_total,interface_buried_hbonds,buns_1_unbound,buns_1_unbound_ignore,buns_2_unbound,buns_2_unbound_ignore,buns_complex,buns_complex_ignore,buns_complex_sym,buns_complex_sym_ignore
metrics_testing_Designs-1o5j_4grd-DEGEN_1_1-ROT_39_1-tx_123,2.875,1.5,0.875,0.375,5.625,3.875,3.0,0.0,0.75,0.75,7.625,4.75,7.875,4.75
metrics_testing_Designs-1o5j_4grd-DEGEN_1_1-ROT_39_1-tx_123-non-contact,8.0,3.25,0.0,0.0,11.25,9.5,3.5,1.25,0.75,0.75,13.75,11.75,14.0,11.75
metrics_testing_Designs-1o5j_4grd-DEGEN_1_1-ROT_39_1-tx_123-odd-asu,3.0,1.5,0.0,0.0,4.5,1.5,2.75,0.0,0.75,0.75,5.0,3.25,5.25,3.25
metrics_testing_Designs-2gtr_2dj6-DEGEN_1_2-ROT_19_29-tx_72,4.333333,0.833333,0.0,0.0,5.166667,5.0,2.0,1.166667,1.166667,0.666667,8.166667,7.166667,8.833333,7.166667
metrics_testing_Designs-2gtr_2dj6-DEGEN_1_2-ROT_19_29-tx_72-correct-asu,5.5,0.333333,5.0,0.333333,11.166667,6.166667,2.0,1.166667,5.833333,2.666667,14.0,13.0,14.0,13.0


## Results
Well the use of the crystal didn't clarify anything. It seems that this is flawed based on what the symmetry of the ASU is as well...

I need to figure out what the truth set is. As these are pose averages, the designs themselves will have the accurate counts per design. I am going to pull the trajectories up and count one of the structures output in the crystalline environment.

Individual Trajectories

The results for the crystal are highly dependent on the choice of oligomer. In the three cases, there are two instances where the entity1 1o5j and two instances where the entity1 4grd are in the exact same position. When they are in the same position, they have the same number calculated. Symmetry doesn't seem to help anything as the symmetric residue selector returns the exact same numbers for each of the asu types. This is the same for the complex. From the documentation
"""The filter is now Symmetry aware. The default for Symmetric case is that symmetry is auto-detected and will only count totals for the ASU."""
Everytime I read this page, I think I get dummer. It's written so poorly and the terms are vague, especially for symmetry
"""protocols.simple_filters.BuriedUnsatHbondFilter: (0)  DETECTED THAT POSE IS SYMMETRIC:  
protocols.simple_filters.BuriedUnsatHbondFilter: (0)     if symmetric pose and only_interface=true (default for symmetric case), then will only look at symmetric interface residues 
protocols.simple_filters.BuriedUnsatHbondFilter: (0)     if symmetric pose and only_interface=false (set explicitly in XML), then will add up all unsats in ASU 
protocols.simple_filters.BuriedUnsatHbondFilter: (0)  LOOKING FOR UNSATS ONLY AT RESIDUES DEFINED BY YOUR residue_selector:"""
I'm still not sure whether this means that only_interface will overlap with residue_selector or if the residue_selector alone will be used.

OKAY. It seems that residue_selector trumps all other options. This is great news as this is how I am getting away with every calculation in Rosetta!

There are large differences with the non-contacting ASU and others. Most of the h-bonds identified in non-contact are valid...
protocols.rosetta_scripts.ParsedProtocol: (0) =======================BEGIN MOVER SwitchMover - symmetry_switch=======================
spacegroup: (0) Overriding input crystal parameters [ 125.958,125.958,125.958 , 90,90,90 ] with [ 125.958,125.958,125.958 , 90,90,90 ]
protocols.cryst.refinable_lattice: (0) Initializing 755 jumps.
core.conformation.symmetry.util: (0) symmetrize_fold_tree(): setting anchor to 83
protocols.moves.MoverContainer: (0) Switch symmetry_switch changes to mover make_lattice

protocols.simple_filters.ShapeComplementarityFilter: (0) Using residues for molecule surface (rosetta numbering):
protocols.simple_filters.ShapeComplementarityFilter: (0)   Surface 1: 1, 2, 6, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 43, 44, 46, 49, 51, 55, 57, 58, 59, 61, 62, 63, 65, 66, 67, 69, 70, 71, 73, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 95, 96, 97, 98, 99, 268, 269, 273, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 288, 289, 290, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 310, 311, 313, 316, 318, 322, 324, 325, 326, 328, 329, 330, 332, 333, 334, 336, 337, 338, 340, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 362, 363, 364, 365, 366, 522, 526, 535, 536, 540, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 555, 556, 557, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 577, 578, 580, 583, 585, 589, 591, 592, 593, 595, 596, 597, 599, 600, 601, 603, 604, 605, 607, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 629, 630, 631, 632, 633, 802, 803, 807, 811, 812, 813, 814, 815, 816, 817, 818, 819, 820, 822, 823, 824, 828, 829, 830, 831, 832, 833, 834, 835, 836, 837, 838, 839, 840, 844, 845, 847, 850, 852, 856, 858, 859, 860, 862, 863, 864, 866, 867, 868, 870, 871, 872, 874, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 896, 897, 898, 899, 900, 947, 1069, 1070, 1074, 1078, 1079, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087, 1089, 1090, 1091, 1095, 1096, 1097, 1098, 1099, 1100, 1101, 1102, 1103, 1104, 1105, 1106, 1107, 1111, 1112, 1114, 1117, 1119, 1123, 1125, 1126, 1127, 1129, 1130, 1131, 1133, 1134, 1135, 1137, 1138, 1139, 1141, 1144, 1145, 1146, 1147, 1148, 1149, 1150, 1151, 1152, 1153, 1154, 1155, 1156, 1157, 1158, 1159, 1160, 1161, 1163, 1164, 1165, 1166, 1167, 1336, 1337, 1341, 1345, 1346, 1347, 1348, 1349, 1350, 1351, 1352, 1353, 1354, 1356, 1357, 1358, 1362, 1363, 1364, 1365, 1366, 1367, 1368, 1369, 1370, 1371, 1372, 1373, 1374, 1378, 1379, 1381, 1384, 1386, 1390, 1392, 1393, 1394, 1396, 1397, 1398, 1400, 1401, 1402, 1404, 1405, 1406, 1408, 1411, 1412, 1413, 1414, 1415, 1416, 1417, 1418, 1419, 1420, 1421, 1422, 1423, 1424, 1425, 1426, 1427, 1428, 1430, 1431, 1432, 1433, 1434, 1603, 1604, 1608, 1612, 1613, 1614, 1615, 1616, 1617, 1618, 1619, 1620, 1621, 1623, 1624, 1625, 1629, 1630, 1631, 1632, 1633, 1634, 1635, 1636, 1637, 1638, 1639, 1640, 1641, 1645, 1646, 1648, 1651, 1653, 1657, 1659, 1660, 1661, 1663, 1664, 1665, 1667, 1668, 1669, 1671, 1672, 1673, 1675, 1678, 1679, 1680, 1681, 1682, 1683, 1684, 1685, 1686, 1687, 1688, 1689, 1690, 1691, 1692, 1693, 1694, 1695, 1697, 1698, 1699, 1700, 1701, 1870, 1871, 1875, 1879, 1880, 1881, 1882, 1883, 1884, 1885, 1886, 1887, 1888, 1890, 1891, 1892, 1896, 1897, 1898, 1899, 1900, 1901, 1902, 1903, 1904, 1905, 1906, 1907, 1908, 1912, 1913, 1915, 1918, 1920, 1924, 1926, 1927, 1928, 1930, 1931, 1932, 1934, 1935, 1936, 1938, 1939, 1940, 1942, 1945, 1946, 1947, 1948, 1949, 1950, 1951, 1952, 1953, 1954, 1955, 1956, 1957, 1958, 1959, 1960, 1961, 1962, 1964, 1965, 1966, 1967, 1968, 
EXTRA!
2137, 2138, 2142, 2146, 2147, 2148, 2149, 2150, 2151, 2152, 2153, 2154, 2155, 2157, 2158, 2159, 2163, 2164, 2165, 2166, 2167, 2168, 2169, 2170, 2171, 2172, 2173, 2174, 2175, 2179, 2180, 2182, 2185, 2187, 2191, 2193, 2194, 2195, 2197, 2198, 2199, 2201, 2202, 2203, 2205, 2206, 2207, 2209, 2212, 2213, 2214, 2215, 2216, 2217, 2218, 2219, 2220, 2221, 2222, 2223, 2224, 2225, 2226, 2227, 2228, 2229, 2231, 2232, 2233, 2234, 2235, 2404, 2405, 2409, 2413, 2414, 2415, 2416, 2417, 2418, 2419, 2420, 2421, 2422, 2424, 2425, 2426, 2430, 2431, 2432, 2433, 2434, 2435, 2436, 2437, 2438, 2439, 2440, 2441, 2442, 2446, 2447, 2449, 2452, 2454, 2458, 2460, 2461, 2462, 2464, 2465, 2466, 2468, 2469, 2470, 2472, 2473, 2474, 2476, 2479, 2480, 2481, 2482, 2483, 2484, 2485, 2486, 2487, 2488, 2489, 2490, 2491, 2492, 2493, 2494, 2495, 2496, 2498, 2499, 2500, 2501, 2502, 2671, 2672, 2676, 2680, 2681, 2682, 2683, 2684, 2685, 2686, 2687, 2688, 2689, 2691, 2692, 2693, 2697, 2698, 2699, 2700, 2701, 2702, 2703, 2704, 2705, 2706, 2707, 2708, 2709, 2713, 2714, 2716, 2719, 2721, 2725, 2727, 2728, 2729, 2731, 2732, 2733, 2735, 2736, 2737, 2739, 2740, 2741, 2743, 2746, 2747, 2748, 2749, 2750, 2751, 2752, 2753, 2754, 2755, 2756, 2757, 2758, 2759, 2760, 2761, 2762, 2763, 2765, 2766, 2767, 2768, 2769, 2938, 2939, 2943, 2947, 2948, 2949, 2950, 2951, 2952, 2953, 2954, 2955, 2956, 2958, 2959, 2960, 2964, 2965, 2966, 2967, 2968, 2969, 2970, 2971, 2972, 2973, 2974, 2975, 2976, 2980, 2981, 2983, 2986, 2988, 2992, 2994, 2995, 2996, 2998, 2999, 3000, 3002, 3003, 3004, 3006, 3007, 3008, 3010, 3013, 3014, 3015, 3016, 3017, 3018, 3019, 3020, 3021, 3022, 3023, 3024, 3025, 3026, 3027, 3028, 3029, 3030, 3032, 3033, 3034, 3035, 3036
protocols.simple_filters.ShapeComplementarityFilter: (0)   Surface 2: 103, 105, 146, 148, 149, 152, 153, 155, 156, 245, 248, 249, 251, 252, 253, 255, 256, 259, 260, 370, 372, 413, 415, 416, 419, 420, 422, 423, 512, 515, 516, 518, 519, 520, 522, 523, 526, 527, 637, 639, 680, 682, 683, 686, 687, 689, 690, 779, 782, 783, 785, 786, 787, 789, 790, 793, 794, 904, 906, 947, 949, 950, 953, 954, 956, 957, 1046, 1049, 1050, 1052, 1053, 1054, 1056, 1057, 1060, 1061, 1171, 1173, 1214, 1216, 1217, 1220, 1221, 1223, 1224, 1313, 1316, 1317, 1319, 1320, 1321, 1323, 1324, 1327, 1328, 1346, 1349, 1350, 1353, 1354, 1357, 1405, 1406, 1438, 1440, 1481, 1483, 1484, 1487, 1488, 1490, 1491, 1580, 1583, 1584, 1586, 1587, 1588, 1590, 1591, 1594, 1595, 1604, 1612, 1615, 1616, 1619, 1620, 1623, 1624, 1629, 1630, 1631, 1632, 1633, 1634, 1657, 1684, 1685, 1686, 1687, 1688, 1689, 1690, 1692, 1693, 1694, 1695, 1699, 1705, 1707, 1748, 1750, 1751, 1754, 1755, 1757, 1758, 1847, 1850, 1851, 1853, 1854, 1855, 1857, 1858, 1861, 1862, 1972, 1974, 2015, 2017, 2018, 2021, 2022, 2024, 2025, 2114, 2117, 2118, 2120, 2121, 2122, 2124, 2125, 2128, 2129, 
EXTRA!
2137, 2142, 2168, 2169, 2170, 2171, 2172, 2173, 2174, 2175, 2180, 2182, 2185, 2187, 2197, 2201, 2205, 2212, 2213, 2214, 2215, 2216, 2217, 2218, 2219, 2239, 2241, 2282, 2284, 2285, 2288, 2289, 2291, 2292, 2381, 2384, 2385, 2387, 2388, 2389, 2391, 2392, 2395, 2396, 2446, 2447, 2449, 2506, 2508, 2549, 2551, 2552, 2555, 2556, 2558, 2559, 2648, 2651, 2652, 2654, 2655, 2656, 2658, 2659, 2662, 2663, 2773, 2775, 2816, 2818, 2819, 2822, 2823, 2825, 2826, 2915, 2918, 2919, 2921, 2922, 2923, 2925, 2926, 2929, 2930, 2948, 3008, 3010, 3040, 3042, 3083, 3085, 3086, 3089, 3090, 3092, 3093, 3182, 3185, 3186, 3188, 3189, 3190, 3192, 3193, 3196, 3197
protocols.rosetta_scripts.ParsedProtocol: (0) =======================END FILTER R_shape_complementarity=======================


normal 1o5j_4grd is missing 8 jumps...
protocols.rosetta_scripts.ParsedProtocol: (0) =======================BEGIN MOVER SwitchMover - symmetry_switch=======================
spacegroup: (0) Overriding input crystal parameters [ 125.958,125.958,125.958 , 90,90,90 ] with [ 125.958,125.958,125.958 , 90,90,90 ]
protocols.cryst.refinable_lattice: (0) Initializing 747 jumps.
core.conformation.symmetry.util: (0) symmetrize_fold_tree(): setting anchor to 83

protocols.simple_filters.ShapeComplementarityFilter: (0) Using residues for molecule surface (rosetta numbering):
protocols.simple_filters.ShapeComplementarityFilter: (0)   Surface 1: 11, 12, 14, 15, 16, 18, 19, 22, 23, 43, 44, 46, 57, 58, 59, 61, 62, 63, 66, 67, 69, 70, 71, 73, 89, 90, 92, 95, 96, 98, 99, 278, 279, 281, 282, 283, 285, 286, 289, 290, 310, 311, 313, 324, 325, 326, 328, 329, 330, 333, 334, 336, 337, 338, 340, 356, 357, 359, 362, 363, 365, 366, 522, 526, 545, 546, 548, 549, 550, 552, 553, 556, 557, 577, 578, 580, 591, 592, 593, 595, 596, 597, 600, 601, 603, 604, 605, 607, 623, 624, 626, 629, 630, 632, 633, 680, 812, 813, 815, 816, 817, 819, 820, 823, 824, 844, 845, 847, 858, 859, 860, 862, 863, 864, 867, 868, 870, 871, 872, 874, 890, 891, 893, 896, 897, 899, 900, 1079, 1080, 1082, 1083, 1084, 1086, 1087, 1090, 1091, 1111, 1112, 1114, 1125, 1126, 1127, 1129, 1130, 1131, 1134, 1135, 1137, 1138, 1139, 1141, 1157, 1158, 1160, 1163, 1164, 1166, 1167, 1346, 1347, 1349, 1350, 1351, 1353, 1354, 1357, 1358, 1378, 1379, 1381, 1392, 1393, 1394, 1396, 1397, 1398, 1401, 1402, 1404, 1405, 1406, 1408, 1424, 1425, 1427, 1430, 1431, 1433, 1434, 1613, 1614, 1616, 1617, 1618, 1620, 1621, 1624, 1625, 1645, 1646, 1648, 1659, 1660, 1661, 1663, 1664, 1665, 1668, 1669, 1671, 1672, 1673, 1675, 1691, 1692, 1694, 1697, 1698, 1700, 1701, 1880, 1881, 1883, 1884, 1885, 1887, 1888, 1891, 1892, 1912, 1913, 1915, 1926, 1927, 1928, 1930, 1931, 1932, 1935, 1936, 1938, 1939, 1940, 1942, 1958, 1959, 1961, 1964, 1965, 1967, 1968
protocols.simple_filters.ShapeComplementarityFilter: (0)   Surface 2: 103, 105, 146, 148, 149, 152, 153, 155, 156, 245, 248, 249, 251, 252, 253, 255, 256, 259, 260, 370, 372, 413, 415, 416, 419, 420, 422, 423, 512, 515, 516, 518, 519, 520, 522, 523, 526, 527, 637, 639, 680, 682, 683, 686, 687, 689, 690, 779, 782, 783, 785, 786, 787, 789, 790, 793, 794, 812, 815, 816, 819, 820, 823, 871, 872, 904, 906, 947, 949, 950, 953, 954, 956, 957, 1046, 1049, 1050, 1052, 1053, 1054, 1056, 1057, 1060, 1061, 1171, 1173, 1214, 1216, 1217, 1220, 1221, 1223, 1224, 1313, 1316, 1317, 1319, 1320, 1321, 1323, 1324, 1327, 1328, 1438, 1440, 1481, 1483, 1484, 1487, 1488, 1490, 1491, 1580, 1583, 1584, 1586, 1587, 1588, 1590, 1591, 1594, 1595, 1616, 1620, 1624, 1705, 1707, 1748, 1750, 1751, 1754, 1755, 1757, 1758, 1847, 1850, 1851, 1853, 1854, 1855, 1857, 1858, 1861, 1862, 1913, 1915, 1972, 1974, 2015, 2017, 2018, 2021, 2022, 2024, 2025, 2114, 2117, 2118, 2120, 2121, 2122, 2124, 2125, 2128, 2129

There are a number of extra asu's being generated in non-contact and this is all a consequence of using a distance of 5 for the symmetric expansion versus a distance of 7 (non-contact)
### This is something I never nailed down, and it seems here it is still a mystery. Just in time for me to implement a measurement of my own using symmetric centers of mass

OKAY another huge issue is that the definition of the interface from non-contact is way different because the 1o5j protomer in the asu is not obeying the symmetry transformations that I set up the pose with. Since I took a protomer from outside the original oligomer, the transformation is rotating the oligomer, however, none of the copies are actually touching, and therefore the oligomeric subtraction is messed up. This will be highly important consideration moving forward to ensure that the provided ASU is SymDesign derived, or actually makes up the oligomeric copy as intended

In [7]:
symmetric_crystal_analysis_file = '/home/kylemeador/designs/metrics_testing_analysisV5_BUNS.csv'
sym_test_df3 = pd.read_csv(symmetric_crystal_analysis_file, index_col=0, header=[0,1,2])

In [8]:
sym_test_df3.loc[:, idx_slice['pose', 'mean', 
                              ['buns_nano', 'buns_nano_hpol', 'buns_asu', 'buns_asu_hpol', 
                               'buns_total', 'interface_buried_hbonds', 
                               'buns_1_unbound', 'buns_1_unbound_ignore', # 'buns_1_unbound_sym', 'buns_1_unbound_sym_ignore',
                               'buns_2_unbound', 'buns_2_unbound_ignore', # 'buns_2_unbound_sym', 'buns_2_unbound_sym_ignore',
                               'buns_complex', 'buns_complex_ignore', 'buns_complex_sym', 'buns_complex_sym_ignore']]]

Unnamed: 0_level_0,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose
Unnamed: 0_level_1,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean
Unnamed: 0_level_2,buns_nano,buns_nano_hpol,buns_asu,buns_asu_hpol,buns_total,interface_buried_hbonds,buns_1_unbound,buns_1_unbound_ignore,buns_2_unbound,buns_2_unbound_ignore,buns_complex,buns_complex_ignore,buns_complex_sym,buns_complex_sym_ignore
metrics_testing_Designs-1o5j_4grd-DEGEN_1_1-ROT_39_1-tx_123,3.0,1.666667,1.0,0.333333,6.0,3.875,3.0,0.0,0.75,0.75,8.0,5.111111,8.222222,5.111111
metrics_testing_Designs-1o5j_4grd-DEGEN_1_1-ROT_39_1-tx_123-non-contact,6.2,2.8,0.0,0.0,9.0,4.5,2.0,0.0,0.75,0.75,9.4,7.2,9.6,7.2
metrics_testing_Designs-1o5j_4grd-DEGEN_1_1-ROT_39_1-tx_123-odd-asu,3.0,1.5,0.0,0.0,4.5,1.5,2.75,0.0,0.75,0.75,5.0,3.25,5.25,3.25
metrics_testing_Designs-2gtr_2dj6-DEGEN_1_2-ROT_19_29-tx_72,4.333333,0.833333,0.0,0.0,5.166667,5.0,2.0,1.166667,1.166667,0.666667,8.166667,7.166667,8.833333,7.166667
metrics_testing_Designs-2gtr_2dj6-DEGEN_1_2-ROT_19_29-tx_72-correct-asu,5.5,0.333333,5.0,0.333333,11.166667,6.166667,2.0,1.166667,5.833333,2.666667,14.0,13.0,14.0,13.0


This hasn't helped fully...
Tomorrow, continue where I left off by looking at the odd-asu complex [ignore]

Well there are two sites where the calculation is failing. The atoms at 
HEAVY polar atom at residue 18: ARG  NE 
Hpol polar atom at residue 18: ARG  HE 
HEAVY polar atom at residue 46: GLN  OE1
Are missed, or considered surface depending on the input ASU. This is very odd and may potentially be dependant on the SASA bug. Surprisingly, the odd-asu and the stadard asu have the same symmetry mate in the ASU for 1o5j (where these troublesome atoms are). The distance for these unit cells was different 4 for the odd-asu, 5 for the standard ASU.

If the modification of this parameter fixes the output, that would be a fantastic outcome. At least for crystal cases, the issue would be the difference in unit cell assembly.


In [9]:
fixed_asu_analysis_file = '/home/kylemeador/designs/metrics_testing_analysisV6_BUNS.csv'
fixed_asu_df = pd.read_csv(fixed_asu_analysis_file, index_col=0, header=[0,1,2])

In [10]:
fixed_asu_df.loc[:, idx_slice['pose', 'mean', 
                              ['buns_nano', 'buns_nano_hpol', 'buns_asu', 'buns_asu_hpol', 
                               'buns_total', 'interface_buried_hbonds', 
                               'buns_1_unbound', 'buns_1_unbound_ignore', # 'buns_1_unbound_sym', 'buns_1_unbound_sym_ignore',
                               'buns_2_unbound', 'buns_2_unbound_ignore', # 'buns_2_unbound_sym', 'buns_2_unbound_sym_ignore',
                               'buns_complex', 'buns_complex_ignore', 'buns_complex_sym', 'buns_complex_sym_ignore']]]

Unnamed: 0_level_0,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose
Unnamed: 0_level_1,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean
Unnamed: 0_level_2,buns_nano,buns_nano_hpol,buns_asu,buns_asu_hpol,buns_total,interface_buried_hbonds,buns_1_unbound,buns_1_unbound_ignore,buns_2_unbound,buns_2_unbound_ignore,buns_complex,buns_complex_ignore,buns_complex_sym,buns_complex_sym_ignore
metrics_testing_Designs-1o5j_4grd-DEGEN_1_1-ROT_39_1-tx_123,3.0,1.666667,1.0,0.333333,6.0,3.875,3.0,0.0,0.75,0.75,8.0,5.111111,8.222222,5.111111
metrics_testing_Designs-1o5j_4grd-DEGEN_1_1-ROT_39_1-tx_123-non-contact,6.2,2.8,0.0,0.0,9.0,4.5,2.0,0.0,0.75,0.75,9.4,7.2,9.6,7.2
metrics_testing_Designs-1o5j_4grd-DEGEN_1_1-ROT_39_1-tx_123-odd-asu,4.0,2.5,0.0,0.0,6.5,3.75,2.75,0.0,0.75,0.75,7.25,4.25,7.5,4.25
metrics_testing_Designs-2gtr_2dj6-DEGEN_1_2-ROT_19_29-tx_72,4.333333,0.833333,0.0,0.0,5.166667,5.0,2.0,1.166667,1.166667,0.666667,8.166667,7.166667,8.833333,7.166667
metrics_testing_Designs-2gtr_2dj6-DEGEN_1_2-ROT_19_29-tx_72-correct-asu,4.833333,0.666667,4.5,0.666667,10.666667,5.166667,2.833333,1.5,5.0,4.666667,13.0,11.833333,13.333333,11.833333


OKAY sym update. It actually appears to help in some instances where there is a modification to a residue which has interactions with a copy of itself. For instance, 2dj6 has interactions with itself in the assembly (albeit these are in the same 3-fold oligomer). However, as these interactions are close to the interface with 2gtr, the residues are up for design and therefore in some instances, were designed to produce a unsatisfied hydrogen bond. In these cases, counting this symmetry is very important! Because I have only seen it benefit design, and in all other cases the sym and non-sym are the same, I am going to make the symmetric residue selector the default in the BUNS calculation.

This has given me a great insight into an issue that I was overlooking. As I am designing residues at the interface of oligomers, the residues which are integral for contact could be modified by the design. I see for instance that the modification of a interface residue leaves a h-bond unsatisfied. I was not counting this residue as it doesn't interact in the interface, but nonetheless, it is affected. In most cases, these residue's energy differences will be important to the design success. I need to ensure none of this is overlooked in my narrow focus on the interface residues. In all honesty, with the way I am calculating bound and unbound energies, I could grab residue information for every single residue in the structure. 

OKAY given that issue was addressed, I am not ensuring the calculation of shapecomplementarity is not made arbitrarily low by the inclusion of excess molecular surface that is not involved in the interaction. I am trying to run metrics using a couple of different shape_complementarity residue selectors and compare the output in the different unit cell environments

In [22]:
# extra_res_sc_file = '/home/kylemeador/designs/metrics_testing_analysisV7_SC.csv'  # this one didn't work well as self_interfaces were defined poorly
extra_res_sc_file = '/home/kylemeador/designs/metrics_testing_analysisV8_SC.csv'
extra_res_sc_df = pd.read_csv(extra_res_sc_file, index_col=0, header=[0,1,2])

In [23]:
extra_res_sc_df.loc[:, idx_slice['pose', 'mean', 
                              ['buns_nano', 'buns_nano_hpol', 'buns_asu', 'buns_asu_hpol', 
                               'buns_total', 'interface_buried_hbonds', 
                               'buns_1_unbound', 'buns_1_unbound_ignore', # 'buns_1_unbound_sym', 'buns_1_unbound_sym_ignore',
                               'buns_2_unbound', 'buns_2_unbound_ignore', # 'buns_2_unbound_sym', 'buns_2_unbound_sym_ignore',
                               'buns_complex', 'buns_complex_ignore', 'buns_complex_sym', 'buns_complex_sym_ignore']]]

Unnamed: 0_level_0,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose
Unnamed: 0_level_1,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean
Unnamed: 0_level_2,buns_nano,buns_nano_hpol,buns_asu,buns_asu_hpol,buns_total,interface_buried_hbonds,buns_1_unbound,buns_1_unbound_ignore,buns_2_unbound,buns_2_unbound_ignore,buns_complex,buns_complex_ignore,buns_complex_sym,buns_complex_sym_ignore
metrics_testing_Designs-1o5j_4grd-DEGEN_1_1-ROT_39_1-tx_123,3.0,1.666667,1.0,0.333333,6.0,13.375,3.0,0.0,0.75,0.75,16.444444,5.111111,8.222222,5.111111
metrics_testing_Designs-1o5j_4grd-DEGEN_1_1-ROT_39_1-tx_123-non-contact,6.2,2.8,0.0,0.0,9.0,13.75,2.0,0.0,0.75,0.75,16.8,7.2,9.6,7.2
metrics_testing_Designs-1o5j_4grd-DEGEN_1_1-ROT_39_1-tx_123-odd-asu,4.0,2.5,0.0,0.0,6.5,13.5,2.75,0.0,0.75,0.75,17.0,4.25,7.5,4.25
metrics_testing_Designs-2gtr_2dj6-DEGEN_1_2-ROT_19_29-tx_72,4.333333,0.833333,0.0,0.0,5.166667,13.833333,2.0,1.166667,1.166667,0.666667,17.0,7.166667,8.833333,7.166667
metrics_testing_Designs-2gtr_2dj6-DEGEN_1_2-ROT_19_29-tx_72-correct-asu,4.833333,0.666667,4.5,0.666667,10.666667,10.333333,2.833333,1.5,5.0,4.666667,18.166667,11.833333,13.333333,11.833333


In [24]:
extra_res_sc_df.loc[:, idx_slice['pose', 'mean', 
                              ['shape_complementarity', 'shape_complementarity_self_interface1', 
                               'shape_complementarity_self_interface2', 'shape_complementarity_interface_only', 'shape_complementarity_neighbors', 'shape_complementarity_neighbors_sym']]]

Unnamed: 0_level_0,pose,pose,pose,pose,pose,pose
Unnamed: 0_level_1,mean,mean,mean,mean,mean,mean
Unnamed: 0_level_2,shape_complementarity,shape_complementarity_self_interface1,shape_complementarity_self_interface2,shape_complementarity_interface_only,shape_complementarity_neighbors,shape_complementarity_neighbors_sym
metrics_testing_Designs-1o5j_4grd-DEGEN_1_1-ROT_39_1-tx_123,0.504586,0.475444,0.454772,0.633107,0.585595,0.586607
metrics_testing_Designs-1o5j_4grd-DEGEN_1_1-ROT_39_1-tx_123-non-contact,0.374826,0.479061,0.45186,0.589664,-1.0,0.577528
metrics_testing_Designs-1o5j_4grd-DEGEN_1_1-ROT_39_1-tx_123-odd-asu,0.396568,0.464981,0.421943,0.583986,0.485141,0.577528
metrics_testing_Designs-2gtr_2dj6-DEGEN_1_2-ROT_19_29-tx_72,0.67655,0.60935,0.611653,0.679786,-1.0,0.681215
metrics_testing_Designs-2gtr_2dj6-DEGEN_1_2-ROT_19_29-tx_72-correct-asu,0.676484,0.579749,0.580567,0.679743,0.673292,0.681094


In [7]:
extra_res_sc_df.loc[:, idx_slice[:, 'dock', :]]

Unnamed: 0_level_0,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose,pose
Unnamed: 0_level_1,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock,dock
Unnamed: 0_level_2,design_dimension,entity_1_c_terminal_helix,entity_1_c_terminal_orientation,entity_1_max_radius,entity_1_min_radius,entity_1_n_terminal_helix,entity_1_n_terminal_orientation,entity_1_name,entity_1_number_of_residues,entity_1_radius,entity_1_symmetry,entity_2_c_terminal_helix,entity_2_c_terminal_orientation,entity_2_max_radius,entity_2_min_radius,entity_2_n_terminal_helix,entity_2_n_terminal_orientation,entity_2_name,entity_2_number_of_residues,entity_2_radius,entity_2_symmetry,entity_max_radius_average_deviation,entity_max_radius_ratio_1v2,entity_maximum_radius,entity_min_radius_average_deviation,entity_min_radius_ratio_1v2,entity_minimum_radius,entity_number_of_residues_average_deviation,entity_number_of_residues_ratio_1v2,entity_radius_average_deviation,entity_radius_ratio_1v2,entity_residue_length_total,interface_b_factor_per_residue,interface_secondary_structure_count,interface_secondary_structure_fragment_count,interface_secondary_structure_fragment_topology,interface_secondary_structure_fragment_topology_1,interface_secondary_structure_fragment_topology_2,interface_secondary_structure_topology,interface_secondary_structure_topology_1,interface_secondary_structure_topology_2,multiple_fragment_ratio,nanohedra_score,nanohedra_score_center,nanohedra_score_center_normalized,nanohedra_score_normalized,number_fragment_residues_center,number_fragment_residues_total,number_of_fragments,observations,percent_fragment,percent_fragment_coil,percent_fragment_helix,percent_fragment_strand,percent_residues_fragment_center,percent_residues_fragment_total,symmetry,symmetry_group_1,symmetry_group_2,total_interface_residues,total_non_fragment_interface_residues
NanohedraEntry54DockedPoses_Designs-1x25_4wcz-DEGEN_1_1-ROT_24_3-tx_218,0,0,-1,61.99727,17.481728,0,-1,1x25_1,124,41.626032,C3,0,1,76.649691,34.58149,0,1,4wcz_1,249,57.383116,C3,0.191161,0.808839,76.649691,0.494477,0.505523,17.481728,0.502008,0.497992,0.274594,0.725406,373,0.0,15,4,HHHH,HH,HH,TETCHCGHHHGCTHH,TETCHCGH,HHGCTHH,4.0,33.331886,7.071506,0.642864,0.900862,11,37,22,1,0.948718,0.0,1.0,0.0,0.261905,0.880952,point,C3,C3,39,31
NanohedraEntry54DockedPoses_Designs-1zvb_4ogg-DEGEN_1_1-ROT_38_32-tx_68,0,1,-1,62.707001,11.586255,1,1,1zvb_1,33,37.310587,C3,0,1,74.26914,24.306058,0,-1,4ogg_1,384,51.599005,C3,0.155679,0.844321,74.26914,0.523318,0.476682,11.586255,0.914062,0.085938,0.276913,0.723087,417,0.0,8,4,HCGH,H,CGH,HTTCGCGH,H,TTCGCGH,4.888889,45.292337,11.47048,0.637249,1.029371,18,44,44,1,1.257143,0.0,0.977273,0.022727,0.486486,1.0,point,C3,C3,35,19
NanohedraEntry54DockedPoses_Designs-2gtr_3qv0-DEGEN_1_1-ROT_11_28-tx_567,0,0,1,85.267052,40.625666,0,1,2gtr_1,249,64.08493,C3,1,1,53.83048,11.642901,1,1,3qv0_1,179,37.467626,C3,0.583992,1.583992,85.267052,2.489308,3.489308,11.642901,0.391061,1.391061,0.710408,1.710408,428,0.0,22,6,HHHHHH,HHH,HHH,HCHHEECCHHHCCHHCECTETC,HCHH,EECCHHHCCHHCECTETC,3.2,60.563926,13.880725,0.555229,0.903939,25,67,40,2,0.817073,0.0,1.0,0.0,0.268817,0.72043,point,C3,C3,82,68
NanohedraEntry54DockedPoses_Designs-4k2n_3m6n-DEGEN_1_2-ROT_6_33-tx_324,0,0,-1,96.042612,47.331812,0,1,4k2n_1,259,72.801901,C3,1,1,67.722127,22.803575,0,1,3m6n_1,261,47.180129,C3,0.418187,1.418187,96.042612,1.075631,2.075631,22.803575,0.007663,0.992337,0.543063,1.543063,520,0.0,10,4,HHHH,HH,HH,HGCHHCHHHC,HGCHH,CHHHC,2.888889,20.510078,4.402242,0.489138,0.707244,9,29,13,1,0.966667,0.038462,0.961538,0.0,0.290323,0.935484,point,C3,C3,30,22


# Testing for differences in new metrics collection protocol and old that gives positive values for interface energy

In [16]:
old_score_dict = read_scores('/home/kylemeador/designs/4nkj_4gvr-DEGEN_1_1-ROT_9_32-tx_447.sc')
sym_new_score_dict = read_scores('/home/kylemeador/designs/4nkj_4gvr-DEGEN_1_1-ROT_9_32-tx_447_SYM.sc')

In [22]:
from collections import defaultdict
from string import digits


def digit_keeper():
    table = defaultdict(type(None))
    table.update({ord(c): c for c in digits})

    return table


digit_translate_table = digit_keeper()

In [28]:
from copy import deepcopy

In [47]:
def dirty_residue_processing(score_dict, offset=None, hbonds=None):  # mutations,
    """Process Residue Metrics from Rosetta score dictionary

    One-indexed residues
    Args:
        score_dict (dict): {'0001': {'buns': 2.0, 'per_res_energy_15A': -3.26, ...,
                            'yhh_planarity':0.885, 'hbonds_res_selection_complex': '15A,21A,26A,35A,...'}, ...}
        mutations (dict): {'reference': {mutation_index: {'from': 'A', 'to: 'K'}, ...},
                           '0001': {mutation_index: {}, ...}, ...}
    Keyword Args:
        offset=None (dict[mapping[int, int]]): {1: 0, 2: 102, ...} The amount to offset each chain by
        hbonds=None (dict): {'0001': [34, 54, 67, 68, 106, 178], ...}
    Returns:
        (dict): {'0001': {15: {'type': 'T', 'energy_delta': -2.771, 'bsa_polar': 13.987, 'bsa_hydrophobic': 22.29,
                               'bsa_total': 36.278, 'hbond': 0, 'core': 0, 'rim': 1, 'support': 0},  # , 'hot_spot': 1
                          ...}, ...}
    """
    # pose_length = len(mutations['reference'])
    pose_length = 500  # PLACEHOLDER
    warn = False
    total_residue_dict = {}
    for design, scores in score_dict.items():
        residue_data = {}
        # for column in columns:
        for key, value in scores.items():
            # metadata = column.split('_')
            if key.startswith('per_res_'):
                metadata = key.split('_')
                # residue_number = int(metadata[-1])
                # residue_number = int(metadata[-1][:-1])  # remove the chain identifier used with rosetta_numbering="False"
                residue_number = int(metadata[-1].translate(digit_translate_table))  # remove chain_id in rosetta_numbering="False"
                if residue_number > pose_length:
                    if not warn:  # TODO this can move to residue_processing (clean) once instated
                        warn = True
                        print('Encountered %s which has residue number > the pose length (%d). Scores above '
                                       'will be discarded. Use pbd_numbering on all Rosetta PerResidue SimpleMetrics to'
                                       ' ensure that symmetric copies have the same residue number on symmetry mates.'
                                       % (key, pose_length))
                    continue
                metric = metadata[2]  # energy or sasa
                pose_state = metadata[-2]  # unbound or complex
                if pose_state == 'unbound' and offset:
                    residue_number += offset[metadata[-3]]  # get oligomer chain offset
                if residue_number not in residue_data:
                    residue_data[residue_number] = deepcopy(residue_template)
                if metric == 'sasa':
                    # Ex. per_res_sasa_hydrophobic_1_unbound_15 or per_res_sasa_hydrophobic_complex_15
                    polarity = metadata[3]
                    residue_data[residue_number][metric][polarity][pose_state] = value
                else:
                    # Ex. per_res_energy_1_unbound_15 or per_res_energy_complex_15
                    residue_data[residue_number][metric][pose_state] += value
        total_residue_dict[design] = residue_data
        
    return total_residue_dict


In [48]:
old_residues = dirty_residue_processing(old_score_dict)
sym_new_residues = dirty_residue_processing(sym_new_score_dict)
# correct_asu_sym_new_residues = dirty_dirty_residue_processing(correct_asu_sym_new_score_dict) 
# correct_asu_old_residues = dirty_dirty_residue_processing(correct_asu_old_score_dict)

In [64]:
# old_residues
# sym_new_residues
new_d, new_d_unbound = {}, {}
old_d, old_d_unbound = {}, {}
for design in old_residues:
    old_d[design] = {residue: {'energy_complex': residue_info['energy']['complex'], 
                               'sasa_complex': {'polar': residue_info['sasa']['polar']['complex'],
                                        'hydrophobic': residue_info['sasa']['hydrophobic']['complex'],
                                        'total': residue_info['sasa']['total']['complex']}
                              } 
                     for residue, residue_info in old_residues[design].items()}
    old_d_unbound[design] = {residue: {'energy_unbound': residue_info['energy']['unbound'], 
                               'sasa_unbound': {'polar': residue_info['sasa']['polar']['unbound'],
                                        'hydrophobic': residue_info['sasa']['hydrophobic']['unbound'],
                                        'total': residue_info['sasa']['total']['unbound']}
                              } 
                     for residue, residue_info in old_residues[design].items()}
    
    # NEW SCORES
    new_d[design] = {residue: {'energy_complex': residue_info['energy']['complex'], 
                               'sasa_complex': {'polar': residue_info['sasa']['polar']['complex'],
                                        'hydrophobic': residue_info['sasa']['hydrophobic']['complex'],
                                        'total': residue_info['sasa']['total']['complex']}
                              } 
                     for residue, residue_info in sym_new_residues[design].items()}
    new_d_unbound[design] = {residue: {'energy_unbound': residue_info['energy']['unbound'], 
                               'sasa_unbound': {'polar': residue_info['sasa']['polar']['unbound'],
                                        'hydrophobic': residue_info['sasa']['hydrophobic']['unbound'],
                                        'total': residue_info['sasa']['total']['unbound']}
                              } 
                     for residue, residue_info in sym_new_residues[design].items()}

## Looking at the number of different residues

In [58]:
des_of_interest = '4nkj_4gvr-DEGEN_1_1-ROT_9_32-tx_447_clean_asu_scout_structure_background_0002'
print(len(new_d[des_of_interest]))
print(new_d[des_of_interest].keys())
print(len(same_d[des_of_interest]))
print(same_d[des_of_interest].keys())
print(len(different_d[des_of_interest]))

83
dict_keys([10, 11, 12, 132, 133, 134, 135, 136, 137, 138, 139, 13, 140, 141, 142, 14, 15, 16, 17, 18, 19, 1, 20, 21, 22, 23, 2, 365, 366, 369, 373, 384, 385, 386, 387, 388, 389, 396, 397, 398, 399, 3, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 418, 436, 437, 438, 439, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 4, 5, 6, 7, 8, 9])
28
dict_keys([10, 134, 135, 136, 138, 13, 140, 17, 1, 386, 397, 401, 402, 405, 408, 409, 411, 412, 413, 446, 447, 450, 455, 4, 5, 6, 7, 9])
55


### There are 55 new residues that are identified by the different residue selector mechanism

In [57]:
sum_energy_sym, sum_energy = {}, {}
for design, residue_info in same_d.items():
    sum_energy_sym[design], sum_energy[design] = 0, 0
    for residue, data in residue_info.items():
#         sum_energy_sym[design] += data['energy_sym']
        sum_energy[design] += data['energy']
        
sum_energy_sym_diff = {}
for design, residue_info in different_d.items():
    sum_energy_sym_diff[design] = 0
    for residue, data in residue_info.items():
        sum_energy_sym_diff[design] += data['energy_sym']

In [132]:
print(clean_correct_asu_sym_d)

{}


In [172]:
clean_sum_energy_sym_same, clean_sum_energy_all = {}, {}
for design, residue_info in clean_same_d.items():
    clean_sum_energy_sym_same[design], clean_sum_energy_all[design] = 0, 0
    for residue, data in residue_info.items():
        clean_sum_energy_sym_same[design] += data['energy_sym']
        clean_sum_energy_all[design] += data['energy']
        

clean_sum_energy_sym_different = {}
for design, residue_info in clean_different_d.items():
    clean_sum_energy_sym_different[design] = 0
    for residue, data in residue_info.items():
        clean_sum_energy_sym_different[design] += data['energy_sym']
        
clean_sum_energy_sym_all = {}
for design, residue_info in clean_correct_asu_sym_d.items():
    clean_sum_energy_sym_all[design] = 0
    for residue, data in residue_info.items():
        clean_sum_energy_sym_all[design] += data['energy_sym']
        
print(design)
print(clean_sum_energy_sym_all[design])
print(clean_sum_energy_sym_different[design])
print(clean_sum_energy_sym_same[design])
print('Not Symmetric')
print(clean_sum_energy_all[design])

In [59]:
print(sum_energy_sym[des_of_interest])
print(sum_energy_sym_diff[des_of_interest])
print(sum_energy[des_of_interest])

-80.31569543846324
-153.99314824488226
-80.30511223360871


In [60]:
print([(residue, data) for residue, data in clean_different_d[design].items() if abs(data['energy_sym']) > 0])

KeyError: '4nkj_4gvr-DEGEN_1_1-ROT_9_32-tx_447_clean_asu_scout_structure_background_0005'

In [61]:
print(sum_energy_sym)
print(sum_energy)

{'4nkj_4gvr-DEGEN_1_1-ROT_9_32-tx_447_clean_asu_scout': -71.7237781524477, '4nkj_4gvr-DEGEN_1_1-ROT_9_32-tx_447_clean_asu_scout_structure_background_0001': -81.68754271159308, '4nkj_4gvr-DEGEN_1_1-ROT_9_32-tx_447_clean_asu_scout_structure_background_0002': -80.31569543846324, '4nkj_4gvr-DEGEN_1_1-ROT_9_32-tx_447_clean_asu_scout_structure_background_0003': -82.1151994708777, '4nkj_4gvr-DEGEN_1_1-ROT_9_32-tx_447_clean_asu_scout_structure_background_0004': -85.58478136857211, '4nkj_4gvr-DEGEN_1_1-ROT_9_32-tx_447_clean_asu_scout_structure_background_0005': -79.24435775143307}
{'4nkj_4gvr-DEGEN_1_1-ROT_9_32-tx_447_clean_asu_scout': -71.71553917505125, '4nkj_4gvr-DEGEN_1_1-ROT_9_32-tx_447_clean_asu_scout_structure_background_0001': -81.67867762769045, '4nkj_4gvr-DEGEN_1_1-ROT_9_32-tx_447_clean_asu_scout_structure_background_0002': -80.30511223360871, '4nkj_4gvr-DEGEN_1_1-ROT_9_32-tx_447_clean_asu_scout_structure_background_0003': -82.10026186497439, '4nkj_4gvr-DEGEN_1_1-ROT_9_32-tx_447_clean

In [62]:
# len(same_d['clean_asu_for_refine'])
# same_d['clean_asu_for_refine']
impacted_sym_residues = {}
for residue, data in different_d[des_of_interest].items():
    if abs(data['energy_sym']) > 0:
        impacted_sym_residues[residue] = data

In [63]:
print(list(impacted_sym_residues.keys()))
print([impacted_sym_residues[residue]['energy_sym'] for residue in impacted_sym_residues])

[11, 12, 132, 133, 137, 139, 141, 142, 14, 15, 16, 18, 19, 20, 21, 22, 23, 2, 365, 366, 369, 373, 384, 385, 387, 388, 389, 396, 398, 399, 3, 400, 403, 404, 406, 407, 410, 414, 415, 416, 418, 436, 437, 438, 439, 444, 445, 448, 449, 451, 452, 453, 454, 456, 8]
[-3.8898900217852965, -3.9917783918018617, -5.582473398758812, -0.8203623568274844, -0.7279156477110705, -7.31571726859182, -0.9081957074571906, 0.3023741814533071, -2.9838619715529453, -4.548585564132624, -3.559422780225799, -5.039072103556087, -5.358612906378457, -5.695400728627054, -2.3211926053430734, -4.274400536580646, -1.095492411041137, -2.014350518648093, -0.1868209882366574, -1.55729679517104, -3.809319823736001, -7.637660711738528, -1.7400261240837889, -1.497863587101887, -1.975199457697389, -6.132366886931946, -1.5942676039149328, -1.9004024325743496, -5.215051921198757, -1.997901128303493, -2.89716929959147, -1.0177891518268938, -0.4816747393308231, -5.500639358859231, -5.501791718504379, -2.722057203406389, -4.0086305

In [149]:
# same_d['clean_asu_for_refine_design_0001']
target_sym_residues = ['R280', 'R281', 'R282', 'R283', 'R285', 'R286', 'R288', 'R289', 'R290', 'R291', 'R292', 'R293', 'R294', 'R295', 'R296', 'R297', 'R299', 'R300', 'R302', 'R309', 'D312', 'D314']
asu_check = [same_d['clean_asu_for_refine_design_0001'][residue.replace('R', 'B').replace('D', 'B')]['energy_sym'] 
             for residue in target_sym_residues]
non_sym_asu = [same_d['clean_asu_for_refine_design_0001'][residue.replace('R', 'B').replace('D', 'B')]['energy'] 
               for residue in target_sym_residues]
clean_sym_asu = [clean_same_d['clean_asu_for_refine_design_correct_asu_0001'][residue.replace('R', 'B').replace('D', 'B')]['energy_sym'] 
               for residue in target_sym_residues]
clean_asu = [clean_same_d['clean_asu_for_refine_design_correct_asu_0001'][residue.replace('R', 'B').replace('D', 'B')]['energy'] 
               for residue in target_sym_residues]

In [156]:
print([(letter, clean_different_d['clean_asu_for_refine_design_correct_asu_0001']['%s314' % letter]) for letter in PDB.available_letters[3:24:2]])

[('D', {'energy_sym': 0.0, 'sasa_sym': {'polar': 0.0, 'hydrophobic': 21.776, 'total': 21.776}}), ('F', {'energy_sym': 0.0, 'sasa_sym': {'polar': 0.0, 'hydrophobic': 20.239, 'total': 20.239}}), ('H', {'energy_sym': 0.0, 'sasa_sym': {'polar': 0.0, 'hydrophobic': 19.214, 'total': 19.214}}), ('J', {'energy_sym': 0.0, 'sasa_sym': {'polar': 0.0, 'hydrophobic': 12.296, 'total': 12.296}}), ('L', {'energy_sym': -1.081, 'sasa_sym': {'polar': 0.0, 'hydrophobic': 18.444, 'total': 18.444}}), ('N', {'energy_sym': 0.0, 'sasa_sym': {'polar': 0.0, 'hydrophobic': 16.14, 'total': 16.14}}), ('P', {'energy_sym': 0.0, 'sasa_sym': {'polar': 0.0, 'hydrophobic': 13.066, 'total': 13.066}}), ('R', {'energy_sym': 0.0, 'sasa_sym': {'polar': 0.0, 'hydrophobic': 10.247, 'total': 10.247}}), ('T', {'energy_sym': 0.0, 'sasa_sym': {'polar': 0.675, 'hydrophobic': 15.37, 'total': 16.045}}), ('V', {'energy_sym': 0.0, 'sasa_sym': {'polar': 0.0, 'hydrophobic': 13.833, 'total': 13.833}}), ('X', {'energy_sym': 0.0, 'sasa_sym':

In [159]:
clean_asu_different = [clean_different_d['clean_asu_for_refine_design_correct_asu_0001'][residue.replace('D', 'L')]['energy_sym']
                       if residue in clean_different_d['clean_asu_for_refine_design_correct_asu_0001'] else 0
                       for residue in target_sym_residues]

In [78]:
print('Interface   complex unbound  Neighbors   complex unbound')
for idx, residue in enumerate(new_d[des_of_interest]):
    print('     %3d|   %.3f %.3f\t    %s  %.3f    %.3f ' 
          % (residue, old_d[des_of_interest].get(residue, {'energy_complex': -0.})['energy_complex'], old_d_unbound[des_of_interest].get(residue, {'energy_unbound': -0.})['energy_unbound'], ' | ', new_d[des_of_interest].get(residue, {'energy_complex': 0.})['energy_complex'], new_d_unbound[des_of_interest].get(residue, {'energy_unbound': 0.})['energy_unbound']))

Interface   complex unbound  Neighbors   complex unbound
      10|   -4.604 -5.071	     |   -4.604    -8.404 
      11|   -0.000 -0.000	     |   -3.890    -3.949 
      12|   -0.000 -0.000	     |   -3.992    -4.006 
     132|   -0.000 -0.000	     |   -5.582    -5.526 
     133|   -0.000 -0.000	     |   -0.820    -0.821 
     134|   -5.635 -3.522	     |   -5.635    -5.574 
     135|   -1.772 -0.501	     |   -1.775    -0.697 
     136|   -1.969 0.419	     |   -1.959    -0.129 
     137|   -0.000 -0.000	     |   -0.728    -0.753 
     138|   -3.806 -1.995	     |   -3.807    -3.662 
     139|   -0.000 -0.000	     |   -7.316    -7.046 
      13|   -3.757 -3.159	     |   -3.752    -5.265 
     140|   -2.075 -2.161	     |   -2.071    -4.252 
     141|   -0.000 -0.000	     |   -0.908    -0.996 
     142|   -0.000 -0.000	     |   0.302    -1.050 
      14|   -0.000 -0.000	     |   -2.984    -3.280 
      15|   -0.000 -0.000	     |   -4.549    -4.696 
      16|   -0.000 -0.000	     |   -3.559   