### Hydrogen Bonding 
***

**Jupyter Notebook** designed to demonstrate the power of the **hexABC REST-API programmatic interface**. 

The **REST-API** is used to extract **Hydrogen Bonds** statistics (nº of HBs, lifetimes) for specific **base pairs** (e.g. T-A, G-C) in different **MD simulations** and different **trimers** (e.g. TCA). The example is focused on **terminal trimers**, to study **fraying events**.

The workflow is powered by the [hexABC database REST API](https://mmb.irbbarcelona.org/webdev3/hexABC/rest)
***

### Importing auxiliary libraries

In [None]:
import requests
import urllib
import json
import plotly
import itertools
import ipywidgets
from IPython.display import display
from math import ceil

### Defining auxiliary functions

In [None]:
#
# find_seq_term: finding MD simulations containing a sequence fragment; Returns metadata for the systems found.
#
def find_seq_term(json_data, pattern):
    complement_map = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}
    complement = ''.join(complement_map[base] for base in reversed(pattern))

    matching_projects = []

    for project in json_data:
        watson_seq = project.get('sequences', [None])[0]

        if watson_seq and len(watson_seq) > 4:
            positions = []

            for motif in [pattern, complement]:
                if watson_seq.startswith(motif):
                    positions.append((motif, 2))
                if watson_seq.endswith(motif):
                    # End position is the start index of the motif at the end
                    pos = len(watson_seq) - len(motif)
                    positions.append((motif, pos + 2))  

            if positions:
                matching_projects.append({
                    'id': project['id'],
                    'name': project['name'],
                    'sequence': watson_seq,
                    'positions': positions
                })

    return matching_projects


### Base REST-API URL

In [None]:
API_BASE_URL = "https://mmb.irbbarcelona.org/webdev3/hexABC/api"

### Getting projects info

Retrieving all the **projects metadata** from the **hexABC database**.

* Endpoint used: https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects

In [None]:
url_get_projects = f'{API_BASE_URL}/projects?limit=1000'
with urllib.request.urlopen(url_get_projects) as response:
    r_projects = json.loads(response.read().decode("utf-8"))
print(json.dumps(r_projects, indent=4))

### Pagination

If the number of **projects** exceeds the default limit (50), **pagination** is needed. In this case, we need to loop over the returned pages to store all the desired information.  


In [None]:
# Set a list to store all the mined metadata
projects_metadata = []

# Set a list to store all the mined accession values
accessions = []

# Get the number of projects from the previous response
n_projects = r_projects['total']

# Set the limit of projects per page
limit = 100

# Calculate the expected number of pages
pages = ceil(n_projects / limit)

# Iterate over pages
for page in range(1, pages + 1):
    
    print(f'Requesting page {page}/{pages}', end='\r')
    
    # Set the URL for the projects endpoint
    # Include both limit and page parameters
    paginated_url = f'{API_BASE_URL}/projects?limit={limit}&page={page}'
    
    # Query the API
    with urllib.request.urlopen(paginated_url) as resp:
        response = json.loads(resp.read().decode("utf-8"))
        
        # Mine target data
        projects = response['projects']
        project_accessions = [ project['accession'] for project in projects]
        accessions += project_accessions
        projects_metadata = [*projects_metadata, *projects]
    
print(f'We have obtained metadata information for {len(accessions)} simulations')

## TRIMERS

### Select the trimers

Selecting the desired **trimers** from the list of all possible **DNA trimers** (e.g. GCG)

In [None]:
# Generate all possible DNA trimers
bases = ['A', 'T', 'C', 'G']
trimer_list = [''.join(p) for p in itertools.product(bases, repeat=3)]

mdsel = ipywidgets.Dropdown(
    options=trimer_list,
    description='Sel. trimer:',
    disabled=False,
    value='GCG' # default value
)
display(mdsel)

### Searching for trimers

Looking for specific **trimer sequence** within the **dataset**. 


In [None]:
trimer = mdsel.value
results = find_seq_term(projects_metadata, trimer)

for result in results:
    print(f"{result['id']} - {result['name']}")
    print(f"  Watson strand: {result['sequence']}")
    for motif, pos in result['positions']:
        print(f"    ↳ found '{motif}' at position {pos}")


### Extract hydrogen bond values

For each **trimer** found, extract the **hydrogen bond** values **along time** for the central nucleotide (e.g. G**C**G). 

* Endpoint used: https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/{id}/analyses/hbonds

In [None]:
from collections import Counter, defaultdict
from itertools import groupby

aggregated_hbs = []
for result in results:
    print(f"{result['id']} - {result['name']}")
    print(f"  Watson strand: {result['sequence']}")
    
    url_param = f"{API_BASE_URL}/projects/{result['id']}/analyses/hbonds?from=1&to=-1&factor=500"
    print(url_param)
    
    with urllib.request.urlopen(url_param) as response:
        hbs = json.loads(response.read().decode("utf-8"))
        
    for motif, pos in result['positions']:
        print(f"    ↳ found '{motif}' at position {pos}")

        # Only interested in terminal (flanking) nucleotides
        if (pos > 2 and pos < 19):
            continue

        pos_index = pos 
        code = f"{pos_index}{motif[1]}"
        print("Code: " + code)

        # Complementary case
        if motif!=trimer:
            #pos_index = pos + 2
            code = f"{pos_index}{motif[1]}"
            print("Code -rev-: " + code)
        
        for bp_item in hbs['hbonds']:
            #print(json.dumps(bp_item['bp'], indent=4))
            
            print(bp_item['bp'])
            if (bp_item['bp'].startswith(code)):
                list_hbonds = bp_item['hbonds']
                aggregated_hbs.extend(list_hbonds)
                #print(list_hbonds)
                print(len(list_hbonds))
                
                # Percentages
                counts = Counter(list_hbonds)
                total = len(list_hbonds)
                percentages = {k: round((v / total) * 100, 2) for k, v in counts.items()}

                # Lifetimes
                lifetimes = defaultdict(list)
                for value, group in groupby(list_hbonds):
                    length = len(list(group))
                    lifetimes[value].append(length)

                # Output
                print("Percentages:")
                for value in sorted(percentages):
                    print(f"  {value}: {percentages[value]}%")

                # Output min and max per state
                print("Min and Max Lifetimes:")
                for state in sorted(lifetimes):
                    durations = lifetimes[state]
                    print(f"  {state}: min = {min(durations)}, max = {max(durations)}")
    
print("FINAL NUMBERS:")

# Percentages
counts = Counter(aggregated_hbs)
total = len(aggregated_hbs)
percentages = {k: round((v / total) * 100, 2) for k, v in counts.items()}

# Output
print("Percentages:")
for value in sorted(percentages):
    print(f"  {value}: {percentages[value]}%")

    ↳ found 'GCA' at position 2
Code: 4C
20C21G
19G22C
18T23A
17A24T
16A25T
15T26A
14G27C
13T28A
12C29G
11C30G
10G31C
9A32T
8T33A
7A34T
6G35C
5T36A
4A37T
3A38T
2C39G
1G40C
    ↳ found 'TGC' at position 19
Code: 21G
Code -rev-: 21G
20C21G
19G22C
18T23A
17A24T
16A25T
15T26A
14G27C
13T28A
12C29G
11C30G
10G31C
9A32T
8T33A
7A34T
6G35C
5T36A
4A37T
3A38T
2C39G
1G40C
seq012-1 - hexABC Sequence 012 - replica 1
  Watson strand: GCAATGTCCAGGGGCGGAGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq012-1/analyses/hbonds?from=1&to=490
    ↳ found 'GCA' at position 2
Code: 4C
20C21G
19G22C
18A23T
17G24C
16G25C
15C26G
14G27C
13G28C
12G29C
11G30C
10A31T
9C32G
8C33G
7T34A
6G35C
5T36A
4A37T
3A38T
2C39G
1G40C
seq012-2 - hexABC Sequence 012 - replica 2
  Watson strand: GCAATGTCCAGGGGCGGAGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq012-2/analyses/hbonds?from=1&to=490
    ↳ found 'GCA' at position 2
Code: 4C
20C21G
19G22C
18A23T
17G24C
16G25C
15C26G
14G27C
13G28C
12G29C
11G30C
1

    ↳ found 'GCA' at position 2
Code: 4C
20C21G
19G22C
18T23A
17A24T
16C25G
15C26G
14A27T
13A28T
12A29T
11A30T
10A31T
9G32C
8G33C
7A34T
6C35G
5A36T
4G37C
3A38T
2C39G
1G40C
    ↳ found 'TGC' at position 19
Code: 21G
Code -rev-: 21G
20C21G
19G22C
18T23A
17A24T
16C25G
15C26G
14A27T
13A28T
12A29T
11A30T
10A31T
9G32C
8G33C
7A34T
6C35G
5A36T
4G37C
3A38T
2C39G
1G40C
seq022-1 - hexABC Sequence 022 - replica 1
  Watson strand: GCAGAGCAGTCCATGAGGGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq022-1/analyses/hbonds?from=1&to=490
    ↳ found 'GCA' at position 2
Code: 4C
20C21G
19G22C
18G23C
17G24C
16A25T
15G26C
14T27A
13A28T
12C29G
11C30G
10T31A
9G32C
8A33T
7C34G
6G35C
5A36T
4G37C
3A38T
2C39G
1G40C
seq022-2 - hexABC Sequence 022 - replica 2
  Watson strand: GCAGAGCAGTCCATGAGGGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq022-2/analyses/hbonds?from=1&to=490
    ↳ found 'GCA' at position 2
Code: 4C
20C21G
19G22C
18G23C
17G24C
16A25T
15G26C
14T27A
13A28T
12C29G
11C30G
1

    ↳ found 'GCA' at position 2
Code: 4C
20C21G
19G22C
18A23T
17C24G
16G25C
15T26A
14A27T
13C28G
12A29T
11T30A
10T31A
9C32G
8A33T
7G34C
6C35G
5G36C
4G37C
3A38T
2C39G
1G40C
seq032-2 - hexABC Sequence 032 - replica 2
  Watson strand: GCAGGCGACTTACATGCAGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq032-2/analyses/hbonds?from=1&to=490
    ↳ found 'GCA' at position 2
Code: 4C
20C21G
19G22C
18A23T
17C24G
16G25C
15T26A
14A27T
13C28G
12A29T
11T30A
10T31A
9C32G
8A33T
7G34C
6C35G
5G36C
4G37C
3A38T
2C39G
1G40C
seq033-1 - hexABC Sequence 033 - replica 1
  Watson strand: GCAGGCTGCTGTTGTGGCGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq033-1/analyses/hbonds?from=1&to=490
    ↳ found 'GCA' at position 2
Code: 4C
20C21G
19G22C
18C23G
17G24C
16G25C
15T26A
14G27C
13T28A
12T29A
11G30C
10T31A
9C32G
8G33C
7T34A
6C35G
5G36C
4G37C
3A38T
2C39G
1G40C
seq033-2 - hexABC Sequence 033 - replica 2
  Watson strand: GCAGGCTGCTGTTGTGGCGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/p

    ↳ found 'GCA' at position 2
Code: 4C
20C21G
19G22C
18C23G
17A24T
16A25T
15T26A
14G27C
13T28A
12G29C
11G30C
10C31G
9G32C
8G33C
7A34T
6T35A
5T36A
4T37A
3A38T
2C39G
1G40C
seq044-2 - hexABC Sequence 044 - replica 2
  Watson strand: GCATTTAGGCGGTGTAACGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq044-2/analyses/hbonds?from=1&to=490
    ↳ found 'GCA' at position 2
Code: 4C
20C21G
19G22C
18C23G
17A24T
16A25T
15T26A
14G27C
13T28A
12G29C
11G30C
10C31G
9G32C
8G33C
7A34T
6T35A
5T36A
4T37A
3A38T
2C39G
1G40C
seq050-1 - hexABC Sequence 050 - replica 1
  Watson strand: GCCAGGAGCGAATTTTATGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq050-1/analyses/hbonds?from=1&to=490
    ↳ found 'TGC' at position 19
Code: 21G
Code -rev-: 21G
20C21G
19G22C
18T23A
17A24T
16T25A
15T26A
14T27A
13T28A
12A29T
11A30T
10G31C
9C32G
8G33C
7A34T
6G35C
5G36C
4A37T
3C38G
2C39G
1G40C
seq050-2 - hexABC Sequence 050 - replica 2
  Watson strand: GCCAGGAGCGAATTTTATGC
https://mmb.irbbarcelona.org/we

    ↳ found 'TGC' at position 19
Code: 21G
Code -rev-: 21G
20C21G
19G22C
18T23A
17A24T
16T25A
15C26G
14G27C
13A28T
12G29C
11G30C
10G31C
9A32T
8T33A
7A34T
6C35G
5A36T
4A37T
3G38C
2C39G
1G40C
seq082-2 - hexABC Sequence 082 - replica 2
  Watson strand: GCGAACATAGGGAGCTATGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq082-2/analyses/hbonds?from=1&to=490
    ↳ found 'TGC' at position 19
Code: 21G
Code -rev-: 21G
20C21G
19G22C
18T23A
17A24T
16T25A
15C26G
14G27C
13A28T
12G29C
11G30C
10G31C
9A32T
8T33A
7A34T
6C35G
5A36T
4A37T
3G38C
2C39G
1G40C
seq083-1 - hexABC Sequence 083 - replica 1
  Watson strand: GCGAACGACCTTGTGAATGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq083-1/analyses/hbonds?from=1&to=490
    ↳ found 'TGC' at position 19
Code: 21G
Code -rev-: 21G
20C21G
19G22C
18T23A
17A24T
16A25T
15G26C
14T27A
13G28C
12T29A
11T30A
10C31G
9C32G
8A33T
7G34C
6C35G
5A36T
4A37T
3G38C
2C39G
1G40C
seq083-2 - hexABC Sequence 083 - replica 2
  Watson strand: GCGAACGACCTTGTGA

    ↳ found 'TGC' at position 19
Code: 21G
Code -rev-: 21G
20C21G
19G22C
18T23A
17G24C
16T25A
15T26A
14C27G
13G28C
12C29G
11T30A
10G31C
9G32C
8G33C
7C34G
6A35T
5G36C
4T37A
3G38C
2C39G
1G40C
seq163-1 - hexABC Sequence 163 - replica 1
  Watson strand: GCTCGACGACGTATGCGTGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq163-1/analyses/hbonds?from=1&to=490
    ↳ found 'TGC' at position 19
Code: 21G
Code -rev-: 21G
20C21G
19G22C
18T23A
17G24C
16C25G
15G26C
14T27A
13A28T
12T29A
11G30C
10C31G
9A32T
8G33C
7C34G
6A35T
5G36C
4C37G
3T38A
2C39G
1G40C
seq163-2 - hexABC Sequence 163 - replica 2
  Watson strand: GCTCGACGACGTATGCGTGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq163-2/analyses/hbonds?from=1&to=490
    ↳ found 'TGC' at position 19
Code: 21G
Code -rev-: 21G
20C21G
19G22C
18T23A
17G24C
16C25G
15G26C
14T27A
13A28T
12T29A
11G30C
10C31G
9A32T
8G33C
7C34G
6A35T
5G36C
4C37G
3T38A
2C39G
1G40C
seq165-1 - hexABC Sequence 165 - replica 1
  Watson strand: GCTCGCGAAGAAAGTA

### Plotting hydrogen bond values

**Pie Chart** representing HBs statistics for the specific **trimer**. <br>
Less than 3 HBs (for C,G) or 2 HBs (for A,T) means **unstable/broken base pairs**. 

In [None]:
import plotly.graph_objects as go

# Filter and prep labels and values
labels = [f"{k} HBs" for k, v in percentages.items()]
values = list(percentages.values())

fig = go.Figure(data=[go.Pie(
    labels=labels,
    values=values,
    hole=0.4,  # Makes it a donut
    pull=[0.05 if v < 5 else 0 for v in values],  # Emphasize smaller slices
    textinfo='label+percent',
    marker=dict(colors=['#FFDD57', '#FF6B6B', '#4ECDC4', '#556270'])
)])

fig.update_layout(
    title_text=f"State Distribution for {trimer} trimer",
    title_font_size=20,
    showlegend=True,
    annotations=[
        dict(text=f"{trimer}", x=0.5, y=0.5, font_size=14, showarrow=False),
    ]
)

fig.show()


## TRIMERS COMPARISON

### Select the trimers for comparison

Selecting the desired **trimers** from the list of all possible **DNA trimers** (e.g. GCC - GCG)

In [None]:
# Generate all possible DNA trimers
bases = ['A', 'T', 'C', 'G']
trimer_list = [''.join(p) for p in itertools.product(bases, repeat=3)]
trimer_pairs_list = [f"{a}-{b}" for a, b in itertools.combinations(trimer_list, 2)]

mdsel = ipywidgets.Dropdown(
    options=trimer_pairs_list,
    description='Sel. trimer pair:',
    disabled=False,
    value='GCC-GCG' # default value
)
display(mdsel)

### Extract hydrogen bond values

For each **trimer** found, extract the **hydrogen bond** values **along time** for the central nucleotide (e.g. G**C**G). 

* Endpoint used: https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/{id}/analyses/hbonds

In [None]:
from collections import Counter, defaultdict
from itertools import groupby

trimer1, trimer2 = mdsel.value.split('-')

results = find_seq_term(projects_metadata, trimer1)

aggregated_hbs_trimer1 = []
aggregated_hbs_lt_trimer1 = defaultdict(list)
for result in results:
    print(f"{result['id']} - {result['name']}")
    print(f"  Watson strand: {result['sequence']}")

    url_param = f"{API_BASE_URL}/projects/{result['id']}/analyses/hbonds?from=1&to=-1&factor=500"
    print(url_param)
    
    with urllib.request.urlopen(url_param) as response:
        hbs = json.loads(response.read().decode("utf-8"))
        
    for motif, pos in result['positions']:
        print(f"    ↳ found '{motif}' at position {pos}")

        # Only interested in terminal (flanking) nucleotides
        if (pos > 2 and pos < 19):
            continue
        
        pos_index = pos
        code = f"{pos_index}{motif[1]}"
        print("Code: " + code)

        # Complementary case
        if motif!=trimer:
            code = f"{pos_index}{motif[1]}"
            print("Code -rev-: " + code)
        
        for bp_item in hbs['hbonds']:
            
            print(bp_item['bp'])
            if (bp_item['bp'].startswith(code)):
                list_hbonds = bp_item['hbonds']
                aggregated_hbs_trimer1.extend(list_hbonds)
                
                # Percentages
                counts = Counter(list_hbonds)
                total = len(list_hbonds)
                percentages = {k: round((v / total) * 100, 2) for k, v in counts.items()}

                # Lifetimes
                lifetimes = defaultdict(list)
                for value, group in groupby(list_hbonds):
                    length = len(list(group))
                    lifetimes[value].append(length)
                    aggregated_hbs_lt_trimer1[value].extend([length])

                # Output
                print("Percentages:")
                for value in sorted(percentages):
                    print(f"  {value}: {percentages[value]}%")

                # Output min and max per state
                print("Min and Max Lifetimes:")
                for state in sorted(lifetimes):
                    durations = lifetimes[state]
                    print(f"  {state}: min = {min(durations)}, max = {max(durations)}")
    
print("FINAL NUMBERS:")

# Percentages
counts = Counter(aggregated_hbs_trimer1)
total = len(aggregated_hbs_trimer1)
percentages_trimer1 = {k: round((v / total) * 100, 2) for k, v in counts.items()}

# Output
print("Percentages Trimer 1:")
for value in sorted(percentages_trimer1):
    print(f"  {value}: {percentages_trimer1[value]}%")

    
###############
# TRIMER 2
###############
    
results = find_seq_term(projects_metadata, trimer2)

aggregated_hbs_trimer2 = []
aggregated_hbs_lt_trimer2 = defaultdict(list)
for result in results:
    print(f"{result['id']} - {result['name']}")
    print(f"  Watson strand: {result['sequence']}")
        
    url_param = f"{API_BASE_URL}/projects/{result['id']}/analyses/hbonds?from=1&to=-1&factor=500"
    print(url_param)
    
    with urllib.request.urlopen(url_param) as response:
        hbs = json.loads(response.read().decode("utf-8"))
        
    for motif, pos in result['positions']:
        print(f"    ↳ found '{motif}' at position {pos}")

        # Only interested in terminal (flanking) nucleotides
        if (pos > 2 and pos < 19):
            continue

        pos_index = pos
        code = f"{pos_index}{motif[1]}"
        print("Code: " + code)

        # Complementary case
        if motif!=trimer:
            code = f"{pos_index}{motif[1]}"
            print("Code -rev-: " + code)
        
        for bp_item in hbs['hbonds']:
            
            print(bp_item['bp'])
            if (bp_item['bp'].startswith(code)):
                list_hbonds = bp_item['hbonds']
                aggregated_hbs_trimer2.extend(list_hbonds)
                
                # Percentages
                counts = Counter(list_hbonds)
                total = len(list_hbonds)
                percentages = {k: round((v / total) * 100, 2) for k, v in counts.items()}

                # Lifetimes
                lifetimes = defaultdict(list)
                for value, group in groupby(list_hbonds):
                    length = len(list(group))
                    lifetimes[value].append(length)
                    aggregated_hbs_lt_trimer2[value].extend([length])

                # Output
                print("Percentages:")
                for value in sorted(percentages):
                    print(f"  {value}: {percentages[value]}%")

                # Output min and max per state
                print("Min and Max Lifetimes:")
                for state in sorted(lifetimes):
                    durations = lifetimes[state]
                    print(f"  {state}: min = {min(durations)}, max = {max(durations)}")
    
print("FINAL NUMBERS:")

# Percentages
counts = Counter(aggregated_hbs_trimer2)
total = len(aggregated_hbs_trimer2)
percentages_trimer2 = {k: round((v / total) * 100, 2) for k, v in counts.items()}

# Output
print("Percentages Trimer 2:")
for value in sorted(percentages_trimer2):
    print(f"  {value}: {percentages_trimer2[value]}%")
    
# Output min and max per state
print("Min and Max Lifetimes Trimer 1:")
for state in sorted(aggregated_hbs_lt_trimer1):
    durations = aggregated_hbs_lt_trimer1[state]
    print(f"  {state}: min = {min(durations)}, max = {max(durations)}")

# Output min and max per state
print("Min and Max Lifetimes Trimer 2:")
for state in sorted(aggregated_hbs_lt_trimer2):
    durations = aggregated_hbs_lt_trimer2[state]
    print(f"  {state}: min = {min(durations)}, max = {max(durations)}")


    ↳ found 'GCA' at position 2
Code: 4C
20C21G
19G22C
18T23A
17A24T
16A25T
15T26A
14G27C
13T28A
12C29G
11C30G
10G31C
9A32T
8T33A
7A34T
6G35C
5T36A
4A37T
3A38T
2C39G
1G40C
    ↳ found 'TGC' at position 19
Code: 21G
Code -rev-: 21G
20C21G
19G22C
18T23A
17A24T
16A25T
15T26A
14G27C
13T28A
12C29G
11C30G
10G31C
9A32T
8T33A
7A34T
6G35C
5T36A
4A37T
3A38T
2C39G
1G40C
seq012-1 - hexABC Sequence 012 - replica 1
  Watson strand: GCAATGTCCAGGGGCGGAGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq012-1/analyses/hbonds?from=1&to=-1
    ↳ found 'GCA' at position 2
Code: 4C
20C21G
19G22C
18A23T
17G24C
16G25C
15C26G
14G27C
13G28C
12G29C
11G30C
10A31T
9C32G
8C33G
7T34A
6G35C
5T36A
4A37T
3A38T
2C39G
1G40C
seq012-2 - hexABC Sequence 012 - replica 2
  Watson strand: GCAATGTCCAGGGGCGGAGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq012-2/analyses/hbonds?from=1&to=-1
    ↳ found 'GCA' at position 2
Code: 4C
20C21G
19G22C
18A23T
17G24C
16G25C
15C26G
14G27C
13G28C
12G29C
11G30C
10A

    ↳ found 'GCA' at position 2
Code: 4C
20C21G
19G22C
18G23C
17C24G
16C25G
15G26C
14T27A
13C28G
12T29A
11G30C
10C31G
9A32T
8A33T
7C34G
6A35T
5A36T
4G37C
3A38T
2C39G
1G40C
seq020-2 - hexABC Sequence 020 - replica 2
  Watson strand: GCAGAACAACGTCTGCCGGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq020-2/analyses/hbonds?from=1&to=-1
    ↳ found 'GCA' at position 2
Code: 4C
20C21G
19G22C
18G23C
17C24G
16C25G
15G26C
14T27A
13C28G
12T29A
11G30C
10C31G
9A32T
8A33T
7C34G
6A35T
5A36T
4G37C
3A38T
2C39G
1G40C
seq021-1 - hexABC Sequence 021 - replica 1
  Watson strand: GCAGACAGGAAAAACCATGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq021-1/analyses/hbonds?from=1&to=-1
    ↳ found 'GCA' at position 2
Code: 4C
20C21G
19G22C
18T23A
17A24T
16C25G
15C26G
14A27T
13A28T
12A29T
11A30T
10A31T
9G32C
8G33C
7A34T
6C35G
5A36T
4G37C
3A38T
2C39G
1G40C
    ↳ found 'TGC' at position 19
Code: 21G
Code -rev-: 21G
20C21G
19G22C
18T23A
17A24T
16C25G
15C26G
14A27T
13A28T
12A29T
11A30T
10A

    ↳ found 'GCA' at position 2
Code: 4C
20C21G
19G22C
18A23T
17C24G
16T25A
15A26T
14T27A
13G28C
12C29G
11T30A
10G31C
9A32T
8T33A
7A34T
6A35T
5G36C
4G37C
3A38T
2C39G
1G40C
seq031-1 - hexABC Sequence 031 - replica 1
  Watson strand: GCAGGACTTGGATGTTAAGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq031-1/analyses/hbonds?from=1&to=-1
    ↳ found 'GCA' at position 2
Code: 4C
20C21G
19G22C
18A23T
17A24T
16T25A
15T26A
14G27C
13T28A
12A29T
11G30C
10G31C
9T32A
8T33A
7C34G
6A35T
5G36C
4G37C
3A38T
2C39G
1G40C
seq031-2 - hexABC Sequence 031 - replica 2
  Watson strand: GCAGGACTTGGATGTTAAGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq031-2/analyses/hbonds?from=1&to=-1
    ↳ found 'GCA' at position 2
Code: 4C
20C21G
19G22C
18A23T
17A24T
16T25A
15T26A
14G27C
13T28A
12A29T
11G30C
10G31C
9T32A
8T33A
7C34G
6A35T
5G36C
4G37C
3A38T
2C39G
1G40C
seq032-1 - hexABC Sequence 032 - replica 1
  Watson strand: GCAGGCGACTTACATGCAGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/pro

    ↳ found 'GCA' at position 2
Code: 4C
20C21G
19G22C
18G23C
17G24C
16G25C
15A26T
14C27G
13A28T
12A29T
11A30T
10C31G
9A32T
8G33C
7T34A
6T35A
5G36C
4T37A
3A38T
2C39G
1G40C
seq042-2 - hexABC Sequence 042 - replica 2
  Watson strand: GCATGTTGACAAACAGGGGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq042-2/analyses/hbonds?from=1&to=-1
    ↳ found 'GCA' at position 2
Code: 4C
20C21G
19G22C
18G23C
17G24C
16G25C
15A26T
14C27G
13A28T
12A29T
11A30T
10C31G
9A32T
8G33C
7T34A
6T35A
5G36C
4T37A
3A38T
2C39G
1G40C
seq043-1 - hexABC Sequence 043 - replica 1
  Watson strand: GCATTACCAAGGGCCAGGGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq043-1/analyses/hbonds?from=1&to=-1
    ↳ found 'GCA' at position 2
Code: 4C
20C21G
19G22C
18G23C
17G24C
16A25T
15C26G
14C27G
13G28C
12G29C
11G30C
10A31T
9A32T
8C33G
7C34G
6A35T
5T36A
4T37A
3A38T
2C39G
1G40C
seq043-2 - hexABC Sequence 043 - replica 2
  Watson strand: GCATTACCAAGGGCCAGGGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/pro

    ↳ found 'TGC' at position 19
Code: 21G
Code -rev-: 21G
20C21G
19G22C
18T23A
17T24A
16C25G
15G26C
14C27G
13A28T
12A29T
11A30T
10G31C
9G32C
8G33C
7A34T
6G35C
5C36G
4G37C
3C38G
2C39G
1G40C
seq073-1 - hexABC Sequence 073 - replica 1
  Watson strand: GCCGTATTGAGGATTCGTGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq073-1/analyses/hbonds?from=1&to=-1
    ↳ found 'TGC' at position 19
Code: 21G
Code -rev-: 21G
20C21G
19G22C
18T23A
17G24C
16C25G
15T26A
14T27A
13A28T
12G29C
11G30C
10A31T
9G32C
8T33A
7T34A
6A35T
5T36A
4G37C
3C38G
2C39G
1G40C
seq073-2 - hexABC Sequence 073 - replica 2
  Watson strand: GCCGTATTGAGGATTCGTGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq073-2/analyses/hbonds?from=1&to=-1
    ↳ found 'TGC' at position 19
Code: 21G
Code -rev-: 21G
20C21G
19G22C
18T23A
17G24C
16C25G
15T26A
14T27A
13A28T
12G29C
11G30C
10A31T
9G32C
8T33A
7T34A
6A35T
5T36A
4G37C
3C38G
2C39G
1G40C
seq080-1 - hexABC Sequence 080 - replica 1
  Watson strand: GCGAAACCCTGCAAGCAT

    ↳ found 'TGC' at position 19
Code: 21G
Code -rev-: 21G
20C21G
19G22C
18T23A
17G24C
16T25A
15A26T
14A27T
13C28G
12G29C
11G30C
10G31C
9C32G
8G33C
7A34T
6A35T
5T36A
4G37C
3G38C
2C39G
1G40C
seq125-2 - hexABC Sequence 125 - replica 2
  Watson strand: GCGGTAAGCGGGCAATGTGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq125-2/analyses/hbonds?from=1&to=-1
    ↳ found 'TGC' at position 19
Code: 21G
Code -rev-: 21G
20C21G
19G22C
18T23A
17G24C
16T25A
15A26T
14A27T
13C28G
12G29C
11G30C
10G31C
9C32G
8G33C
7A34T
6A35T
5T36A
4G37C
3G38C
2C39G
1G40C
seq130-1 - hexABC Sequence 130 - replica 1
  Watson strand: GCGTAACTCTTACGGGATGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq130-1/analyses/hbonds?from=1&to=-1
    ↳ found 'TGC' at position 19
Code: 21G
Code -rev-: 21G
20C21G
19G22C
18T23A
17A24T
16G25C
15G26C
14G27C
13C28G
12A29T
11T30A
10T31A
9C32G
8T33A
7C34G
6A35T
5A36T
4T37A
3G38C
2C39G
1G40C
seq130-2 - hexABC Sequence 130 - replica 2
  Watson strand: GCGTAACTCTTACGGGAT

    ↳ found 'GGC' at position 19
Code: 21G
Code -rev-: 21G
20C21G
19G22C
18G23C
17A24T
16A25T
15A26T
14C27G
13G28C
12C29G
11C30G
10T31A
9G32C
8A33T
7T34A
6C35G
5G36C
4A37T
3A38T
2C39G
1G40C
seq015-1 - hexABC Sequence 015 - replica 1
  Watson strand: GCACATGTCAGAATATTGGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq015-1/analyses/hbonds?from=1&to=-1
    ↳ found 'GGC' at position 19
Code: 21G
Code -rev-: 21G
20C21G
19G22C
18G23C
17T24A
16T25A
15A26T
14T27A
13A28T
12A29T
11G30C
10A31T
9C32G
8T33A
7G34C
6T35A
5A36T
4C37G
3A38T
2C39G
1G40C
seq015-2 - hexABC Sequence 015 - replica 2
  Watson strand: GCACATGTCAGAATATTGGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq015-2/analyses/hbonds?from=1&to=-1
    ↳ found 'GGC' at position 19
Code: 21G
Code -rev-: 21G
20C21G
19G22C
18G23C
17T24A
16T25A
15A26T
14T27A
13A28T
12A29T
11G30C
10A31T
9C32G
8T33A
7G34C
6T35A
5A36T
4C37G
3A38T
2C39G
1G40C
seq016-1 - hexABC Sequence 016 - replica 1
  Watson strand: GCACCCATTAAAAAGGCG

    ↳ found 'GCC' at position 2
Code: 4C
Code -rev-: 4C
20C21G
19G22C
18G23C
17A24T
16A25T
15A26T
14G27C
13T28A
12A29T
11T30A
10G31C
9G32C
8C33G
7A34T
6A35T
5A36T
4A37T
3C38G
2C39G
1G40C
    ↳ found 'GGC' at position 19
Code: 21G
Code -rev-: 21G
20C21G
19G22C
18G23C
17A24T
16A25T
15A26T
14G27C
13T28A
12A29T
11T30A
10G31C
9G32C
8C33G
7A34T
6A35T
5A36T
4A37T
3C38G
2C39G
1G40C
seq046-1 - hexABC Sequence 046 - replica 1
  Watson strand: GCCAAGCTCTCGAAGCAGGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq046-1/analyses/hbonds?from=1&to=-1
    ↳ found 'GCC' at position 2
Code: 4C
Code -rev-: 4C
20C21G
19G22C
18G23C
17A24T
16C25G
15G26C
14A27T
13A28T
12G29C
11C30G
10T31A
9C32G
8T33A
7C34G
6G35C
5A36T
4A37T
3C38G
2C39G
1G40C
    ↳ found 'GGC' at position 19
Code: 21G
Code -rev-: 21G
20C21G
19G22C
18G23C
17A24T
16C25G
15G26C
14A27T
13A28T
12G29C
11C30G
10T31A
9C32G
8T33A
7C34G
6G35C
5A36T
4A37T
3C38G
2C39G
1G40C
seq046-2 - hexABC Sequence 046 - replica 2
  Watson strand: GCCAAGCTCT

    ↳ found 'GCC' at position 2
Code: 4C
Code -rev-: 4C
20C21G
19G22C
18G23C
17A24T
16A25T
15C26G
14A27T
13A28T
12C29G
11G30C
10C31G
9T32A
8A33T
7A34T
6A35T
5A36T
4C37G
Percentages:
  0: 0.01%
  1: 0.08%
  2: 0.64%
  3: 99.27%
Min and Max Lifetimes:
  0: min = 1, max = 42
  1: min = 1, max = 58
  2: min = 1, max = 5
  3: min = 1, max = 1224
3C38G
2C39G
1G40C
    ↳ found 'GGC' at position 19
Code: 21G
Code -rev-: 21G
20C21G
19G22C
18G23C
17A24T
16A25T
15C26G
14A27T
13A28T
12C29G
11G30C
10C31G
9T32A
8A33T
7A34T
6A35T
5A36T
4C37G
3C38G
2C39G
1G40C
seq056-2 - hexABC Sequence 056 - replica 2
  Watson strand: GCCCAAAATCGCAACAAGGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq056-2/analyses/hbonds?from=1&to=-1
    ↳ found 'GCC' at position 2
Code: 4C
Code -rev-: 4C
20C21G
19G22C
18G23C
17A24T
16A25T
15C26G
14A27T
13A28T
12C29G
11G30C
10C31G
9T32A
8A33T
7A34T
6A35T
5A36T
4C37G
Percentages:
  0: 0.0%
  1: 0.09%
  2: 0.65%
  3: 99.27%
Min and Max Lifetimes:
  0: min = 1, max = 1
  

    ↳ found 'GCC' at position 2
Code: 4C
Code -rev-: 4C
20C21G
19G22C
18C23G
17A24T
16T25A
15A26T
14T27A
13G28C
12G29C
11G30C
10G31C
9A32T
8G33C
7G34C
6C35G
5G36C
4C37G
Percentages:
  0: 0.03%
  1: 0.08%
  2: 0.5%
  3: 99.38%
Min and Max Lifetimes:
  0: min = 1, max = 167
  1: min = 1, max = 39
  2: min = 1, max = 2
  3: min = 1, max = 1742
3C38G
2C39G
1G40C
seq064-1 - hexABC Sequence 064 - replica 1
  Watson strand: GCCCTGTGATATTGTAAAGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq064-1/analyses/hbonds?from=1&to=-1
    ↳ found 'GCC' at position 2
Code: 4C
Code -rev-: 4C
20C21G
19G22C
18A23T
17A24T
16A25T
15T26A
14G27C
13T28A
12T29A
11A30T
10T31A
9A32T
8G33C
7T34A
6G35C
5T36A
4C37G
Percentages:
  0: 0.0%
  1: 0.06%
  2: 0.6%
  3: 99.34%
Min and Max Lifetimes:
  0: min = 1, max = 8
  1: min = 1, max = 26
  2: min = 1, max = 2
  3: min = 1, max = 1655
3C38G
2C39G
1G40C
seq064-2 - hexABC Sequence 064 - replica 2
  Watson strand: GCCCTGTGATATTGTAAAGC
https://mmb.irbbarcelona

    ↳ found 'GCC' at position 2
Code: 4C
Code -rev-: 4C
20C21G
19G22C
18T23A
17G24C
16C25G
15T26A
14T27A
13A28T
12G29C
11G30C
10A31T
9G32C
8T33A
7T34A
6A35T
5T36A
4G37C
3C38G
2C39G
1G40C
seq074-1 - hexABC Sequence 074 - replica 1
  Watson strand: GCCTAGAAGGTATCGTCAGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq074-1/analyses/hbonds?from=1&to=-1
    ↳ found 'GCC' at position 2
Code: 4C
Code -rev-: 4C
20C21G
19G22C
18A23T
17C24G
16T25A
15G26C
14C27G
13T28A
12A29T
11T30A
10G31C
9G32C
8A33T
7A34T
6G35C
5A36T
4T37A
3C38G
2C39G
1G40C
seq074-2 - hexABC Sequence 074 - replica 2
  Watson strand: GCCTAGAAGGTATCGTCAGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq074-2/analyses/hbonds?from=1&to=-1
    ↳ found 'GCC' at position 2
Code: 4C
Code -rev-: 4C
20C21G
19G22C
18A23T
17C24G
16T25A
15G26C
14C27G
13T28A
12A29T
11T30A
10G31C
9G32C
8A33T
7A34T
6G35C
5A36T
4T37A
3C38G
2C39G
1G40C
seq075-1 - hexABC Sequence 075 - replica 1
  Watson strand: GCCTATAGAGCGTCATGAGC
https:

    ↳ found 'GGC' at position 19
Code: 21G
Code -rev-: 21G
20C21G
19G22C
18G23C
17A24T
16C25G
15A26T
14G27C
13C28G
12A29T
11C30G
10A31T
9C32G
8C33G
7C34G
6C35G
5A36T
4G37C
3G38C
2C39G
1G40C
seq105-2 - hexABC Sequence 105 - replica 2
  Watson strand: GCGGACCCCACACGACAGGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq105-2/analyses/hbonds?from=1&to=-1
    ↳ found 'GGC' at position 19
Code: 21G
Code -rev-: 21G
20C21G
19G22C
18G23C
17A24T
16C25G
15A26T
14G27C
13C28G
12A29T
11C30G
10A31T
9C32G
8C33G
7C34G
6C35G
5A36T
4G37C
3G38C
2C39G
1G40C
seq118-1 - hexABC Sequence 118 - replica 1
  Watson strand: GCGGCTAAGGCTCGCACGGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq118-1/analyses/hbonds?from=1&to=-1
    ↳ found 'GGC' at position 19
Code: 21G
Code -rev-: 21G
20C21G
19G22C
18G23C
17C24G
16A25T
15C26G
14G27C
13C28G
12T29A
11C30G
10G31C
9G32C
8A33T
7A34T
6T35A
5C36G
4G37C
3G38C
2C39G
1G40C
seq118-2 - hexABC Sequence 118 - replica 2
  Watson strand: GCGGCTAAGGCTCGCACG

    ↳ found 'GGC' at position 19
Code: 21G
Code -rev-: 21G
20C21G
19G22C
18G23C
17G24C
16A25T
15A26T
14A27T
13G28C
12G29C
11T30A
10C31G
9T32A
8C33G
7C34G
6T35A
5G36C
4T37A
3G38C
2C39G
1G40C
seq145-1 - hexABC Sequence 145 - replica 1
  Watson strand: GCGTTACTTATGAGATCGGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq145-1/analyses/hbonds?from=1&to=-1
    ↳ found 'GGC' at position 19
Code: 21G
Code -rev-: 21G
20C21G
19G22C
18G23C
17C24G
16T25A
15A26T
14G27C
13A28T
12G29C
11T30A
10A31T
9T32A
8T33A
7C34G
6A35T
5T36A
4T37A
3G38C
2C39G
1G40C
seq145-2 - hexABC Sequence 145 - replica 2
  Watson strand: GCGTTACTTATGAGATCGGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq145-2/analyses/hbonds?from=1&to=-1
    ↳ found 'GGC' at position 19
Code: 21G
Code -rev-: 21G
20C21G
19G22C
18G23C
17C24G
16T25A
15A26T
14G27C
13A28T
12G29C
11T30A
10A31T
9T32A
8T33A
7C34G
6A35T
5T36A
4T37A
3G38C
2C39G
1G40C
seq152-1 - hexABC Sequence 152 - replica 1
  Watson strand: GCTAATTAAGTTCCGGAG

    ↳ found 'GGC' at position 19
Code: 21G
Code -rev-: 21G
20C21G
19G22C
18G23C
17G24C
16G25C
15T26A
14C27G
13A28T
12T29A
11T30A
10A31T
9G32C
8G33C
7T34A
6A35T
5T36A
4G37C
3T38A
2C39G
1G40C
seq180-2 - hexABC Sequence 180 - replica 2
  Watson strand: GCTGTATGGATTACTGGGGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq180-2/analyses/hbonds?from=1&to=-1
    ↳ found 'GGC' at position 19
Code: 21G
Code -rev-: 21G
20C21G
19G22C
18G23C
17G24C
16G25C
15T26A
14C27G
13A28T
12T29A
11T30A
10A31T
9G32C
8G33C
7T34A
6A35T
5T36A
4G37C
3T38A
2C39G
1G40C
seq181-1 - hexABC Sequence 181 - replica 1
  Watson strand: GCTGTCTGGTCCGTAGAGGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq181-1/analyses/hbonds?from=1&to=-1
    ↳ found 'GGC' at position 19
Code: 21G
Code -rev-: 21G
20C21G
19G22C
18G23C
17A24T
16G25C
15A26T
14T27A
13G28C
12C29G
11C30G
10T31A
9G32C
8G33C
7T34A
6C35G
5T36A
4G37C
3T38A
2C39G
1G40C
seq181-2 - hexABC Sequence 181 - replica 2
  Watson strand: GCTGTCTGGTCCGTAGAG

### Plotting hydrogen bond values

**Pie Charts** representing HBs statistics for the specific **trimers**. <br>
Less than 3 HBs (for C,G) or 2 HBs (for A,T) means **unstable/broken base pairs**. 

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

color_map = {
    0: '#1f77b4',  # blue
    1: '#ff7f0e',  # orange
    2: '#2ca02c',  # green
    3: '#d62728'   # red
}

labels_1 = [f"{k} HBs" for k in percentages_trimer1]
values_1 = list(percentages_trimer1.values())
colors_1 = [color_map[k] for k in percentages_trimer1]

labels_2 = [f"{k} HBs" for k in percentages_trimer2]
values_2 = list(percentages_trimer2.values())
colors_2 = [color_map[k] for k in percentages_trimer2]

# Create subplot layout for 2 pies
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])

fig.add_trace(go.Pie(
    labels=labels_1,
    values=values_1,
    hole=0.4,
    textinfo='label+percent',
    marker=dict(colors=colors_1),
    pull=[0.05 if v < 5 else 0 for v in values_1],
    name="Study 1"
), row=1, col=1)

fig.add_trace(go.Pie(
    labels=labels_2,
    values=values_2,
    hole=0.4,
    textinfo='label+percent',
    marker=dict(colors=colors_2),
    pull=[0.05 if v < 5 else 0 for v in values_2],
    name="Study 2"
), row=1, col=2)


# Add titles
tit = f"{trimer1}-{trimer2}"
fig.update_layout(
    title={
        'text': tit,
        'x': 0.45,  # Left-align
        'y': 0.98,
        'xanchor': 'left',
        'yanchor': 'top',
        'font': dict(size=22)
    },
    annotations=[
        dict(text=f"{trimer1}", x=0.205, y=0.5, font_size=14, showarrow=False),
        dict(text=f"{trimer2}", x=0.795, y=0.5, font_size=14, showarrow=False)
    ]
)

fig.show()


### Plotting hydrogen bond lifetimes 

**Histograms** representing HBs lifetimes for the specific **trimers**. <br>


In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

states = sorted(aggregated_hbs_lt_trimer1.keys())
n_states = len(states)

# Set up subplots: one row, multiple columns
fig = make_subplots(rows=1, cols=n_states, subplot_titles=[f"{s} HBs" for s in states])

# Add one histogram per state
for i, state in enumerate(states):
    lifetimes = aggregated_hbs_lt_trimer1[state]
    fig.add_trace(
        go.Histogram(
            x=lifetimes,
            nbinsx=min(20, max(lifetimes) - min(lifetimes) + 1),
            marker_color='#1f77b4',
            name=f"{state} HBs",
            showlegend=False
        ),
        row=1,
        col=i+1
    )

# Layout polish
fig.update_layout(
    title_text=f"Lifetime Distributions per State - {trimer1}",
    height=400,
    width=300 * n_states,
    bargap=0.1,
    margin=dict(t=50, r=20, l=20, b=40)
)

# Add axis labels uniformly
for i in range(n_states):
    fig.update_xaxes(title_text="Lifetime (frames)", row=1, col=i+1)
    fig.update_yaxes(title_text="Frequency", row=1, col=1)

fig.show()


In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

states = sorted(aggregated_hbs_lt_trimer2.keys())
n_states = len(states)

# Set up subplots: one row, multiple columns
fig = make_subplots(rows=1, cols=n_states, subplot_titles=[f"{s} HBs" for s in states])

# Add one histogram per state
for i, state in enumerate(states):
    lifetimes = aggregated_hbs_lt_trimer2[state]
    fig.add_trace(
        go.Histogram(
            x=lifetimes,
            nbinsx=min(20, max(lifetimes) - min(lifetimes) + 1),
            marker_color='#1f77b4',
            name=f"{state} HBs",
            showlegend=False
        ),
        row=1,
        col=i+1
    )

# Layout polish
fig.update_layout(
     title_text=f"Lifetime Distributions per State - {trimer2}",
    height=400,
    width=300 * n_states,
    bargap=0.1,
    margin=dict(t=50, r=20, l=20, b=40)
)

# Add axis labels uniformly
for i in range(n_states):
    fig.update_xaxes(title_text="Lifetime (frames)", row=1, col=i+1)
    fig.update_yaxes(title_text="Frequency", row=1, col=1)

fig.show()
