### Hydrogen Bonding 
***

**Jupyter Notebook** designed to demonstrate the power of the **hexABC REST-API programmatic interface**. 

The **REST-API** is used to extract **Hydrogen Bonds** statistics (nº of HBs, lifetimes) for specific **base pairs** (e.g. T-A, G-C) in different **MD simulations** and different **trimers** (e.g. TCA). 

The workflow is powered by the [hexABC database REST API](https://mmb.irbbarcelona.org/webdev3/hexABC/rest)
***

### Importing auxiliary libraries

In [178]:
import requests
import urllib
import json
import plotly
import itertools
import ipywidgets
from IPython.display import display
from math import ceil

### Defining auxiliary functions

In [179]:
#
# find_seq: finding MD simulations containing a sequence fragment; Returns metadata for the systems found.
#
def find_seq(json_data, pattern):
    complement_map = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}
    complement = ''.join(complement_map[base] for base in reversed(pattern))

    matching_projects = []

    for project in json_data:
        watson_seq = project.get('sequences', [None])[0]

        if watson_seq and len(watson_seq) > 4:
            trimmed_seq = watson_seq[2:-2]  # Skip first and last 2 bases (flanking regions)
            positions = []

            for motif in [pattern, complement]:
            #for motif in [pattern]:
                pos = trimmed_seq.find(motif)
                while pos != -1:
                    # Adjust position relative to original sequence
                    positions.append((motif, pos + 2))
                    pos = trimmed_seq.find(motif, pos + 1)

            if positions:
                matching_projects.append({
                    'id': project['id'],
                    'name': project['name'],
                    'sequence': watson_seq,
                    'positions': positions
                })

    return matching_projects

### Base REST-API URL

In [180]:
API_BASE_URL = "https://mmb.irbbarcelona.org/webdev3/hexABC/api"

### Getting projects info

Retrieving all the **projects metadata** from the **hexABC database**.

* Endpoint used: https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects

In [181]:
url_get_projects = f'{API_BASE_URL}/projects?limit=1000'
with urllib.request.urlopen(url_get_projects) as response:
    r_projects = json.loads(response.read().decode("utf-8"))
print(json.dumps(r_projects, indent=4))

{
    "total": 380,
    "projects": [
        {
            "id": "seq001-1",
            "accession": "seq001-1",
            "name": "hexABC Sequence 001 - replica 1",
            "sequences": [
                "GCAAACTTGTATATGTGTGC",
                "GCACACATATACAAGTTTGC"
            ],
            "analyses": [
                "clusters",
                "clusters-00",
                "clusters-01",
                "dist-perres",
                "energies",
                "fluctuation",
                "hbonds",
                "interactions",
                "pca",
                "rgyr",
                "rmsd-pairwise",
                "rmsd-perres",
                "rmsds",
                "sasa",
                "helical"
            ]
        },
        {
            "id": "seq001-2",
            "accession": "seq001-2",
            "name": "hexABC Sequence 001 - replica 2",
            "sequences": [
                "GCAAACTTGTATATGTGTGC",
                "GCACACATATACAAGTTT

### Pagination

If the number of **projects** exceeds the default limit (50), **pagination** is needed. In this case, we need to loop over the returned pages to store all the desired information.  


In [182]:
# Set a list to store all the mined metadata
projects_metadata = []

# Set a list to store all the mined accession values
accessions = []

# Get the number of projects from the previous response
n_projects = r_projects['total']

# Set the limit of projects per page
limit = 100

# Calculate the expected number of pages
pages = ceil(n_projects / limit)

# Iterate over pages
for page in range(1, pages + 1):
    
    print(f'Requesting page {page}/{pages}', end='\r')
    
    # Set the URL for the projects endpoint
    # Include both limit and page parameters
    paginated_url = f'{API_BASE_URL}/projects?limit={limit}&page={page}'
    
    # Query the API
    with urllib.request.urlopen(paginated_url) as resp:
        response = json.loads(resp.read().decode("utf-8"))
        
        # Mine target data
        projects = response['projects']
        project_accessions = [ project['accession'] for project in projects]
        accessions += project_accessions
        projects_metadata = [*projects_metadata, *projects]
    
print(f'We have obtained metadata information for {len(accessions)} simulations')

We have obtained metadata information for 380 simulations


## TRIMERS

### Select the trimers

Selecting the desired **trimers** from the list of all possible **DNA trimers** (e.g. GCG)

In [187]:
# Generate all possible DNA trimers
bases = ['A', 'T', 'C', 'G']
trimer_list = [''.join(p) for p in itertools.product(bases, repeat=3)]

mdsel = ipywidgets.Dropdown(
    options=trimer_list,
    description='Sel. trimer:',
    disabled=False,
    value='CAA' # default value
)
display(mdsel)

Dropdown(description='Sel. trimer:', index=32, options=('AAA', 'AAT', 'AAC', 'AAG', 'ATA', 'ATT', 'ATC', 'ATG'…

### Searching for trimers

Looking for specific **trimer sequence** within the **dataset**. 


In [188]:
trimer = mdsel.value
results = find_seq(projects_metadata, trimer)

for result in results:
    print(f"{result['id']} - {result['name']}")
    print(f"  Watson strand: {result['sequence']}")
    for motif, pos in result['positions']:
        print(f"    ↳ found '{motif}' at position {pos}")


seq001-1 - hexABC Sequence 001 - replica 1
  Watson strand: GCAAACTTGTATATGTGTGC
    ↳ found 'TTG' at position 6
seq001-2 - hexABC Sequence 001 - replica 2
  Watson strand: GCAAACTTGTATATGTGTGC
    ↳ found 'TTG' at position 6
seq004-1 - hexABC Sequence 004 - replica 1
  Watson strand: GCAAATTTGGAGTCTAGAGC
    ↳ found 'TTG' at position 6
seq004-2 - hexABC Sequence 004 - replica 2
  Watson strand: GCAAATTTGGAGTCTAGAGC
    ↳ found 'TTG' at position 6
seq007-1 - hexABC Sequence 007 - replica 1
  Watson strand: GCAACGCAGTGTTGGACGGC
    ↳ found 'TTG' at position 11
seq007-2 - hexABC Sequence 007 - replica 2
  Watson strand: GCAACGCAGTGTTGGACGGC
    ↳ found 'TTG' at position 11
seq008-1 - hexABC Sequence 008 - replica 1
  Watson strand: GCAAGCTAGTCCGCAAAGGC
    ↳ found 'CAA' at position 13
seq008-2 - hexABC Sequence 008 - replica 2
  Watson strand: GCAAGCTAGTCCGCAAAGGC
    ↳ found 'CAA' at position 13
seq009-1 - hexABC Sequence 009 - replica 1
  Watson strand: GCAAGGCCTATTGCTAGCGC
    ↳ found

### Extract hydrogen bond values

For each **trimer** found, extract the **hydrogen bond** values **along time** for the central nucleotide (e.g. G**C**G). 

* Endpoint used: https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/{id}/analyses/hbonds

In [196]:
from collections import Counter, defaultdict
from itertools import groupby

aggregated_hbs = []
for result in results:
    print(f"{result['id']} - {result['name']}")
    print(f"  Watson strand: {result['sequence']}")
    #if result['id']=="seq156-2" : continue
    #if result['id']=="seq092-2" : continue
    #if result['id']=="seq095-2" : continue
    #if result['id']=="seq175-1" : continue
    #if result['id']=="seq105-1" : continue
        
    if result['id']=="seq010-1" : continue # HB wrong
    if result['id']=="seq013-1" : continue # HB wrong
    if result['id']=="seq030-1" : continue # HB wrong
    if result['id']=="seq021-1" : continue # HB wrong
    if result['id']=="seq047-1" : continue # HB wrong
    if result['id']=="seq056-1" : continue # HB wrong
    if result['id']=="seq058-1" : continue # HB wrong
    if result['id']=="seq062-1" : continue # HB wrong
    if result['id']=="seq070-1" : continue # HB wrong
    if result['id']=="seq084-1" : continue # HB wrong
    if result['id']=="seq130-1" : continue # HB wrong
    if result['id']=="seq142-1" : continue # HB wrong
    if result['id']=="seq143-1" : continue # HB wrong
    if result['id']=="seq151-1" : continue # HB wrong
    if result['id']=="seq179-1" : continue # HB wrong
 
    if result['id'].endswith("-2"): continue

    url_param = f"{API_BASE_URL}/projects/{result['id']}/analyses/hbonds?from=1&to=490"
    #url_param = f"{API_BASE_URL}/projects/{result['id']}/analyses/hbonds?from=1&to=490000"
    print(url_param)
    
    with urllib.request.urlopen(url_param) as response:
        hbs = json.loads(response.read().decode("utf-8"))
        
    for motif, pos in result['positions']:
        print(f"    ↳ found '{motif}' at position {pos}")

        pos_index = pos + 2
        code = f"{pos_index}{motif[1]}"
        print("Code: " + code)

        # Complementary case
        if motif!=trimer:
            #pos_index = pos + 2
            code = f"{pos_index}{motif[1]}"
            print("Code -rev-: " + code)
            
        
        for bp_item in hbs['hbonds']:
            #print(json.dumps(bp_item['bp'], indent=4))
            
            print(bp_item['bp'])
            if (bp_item['bp'].startswith(code)):
                list_hbonds = bp_item['hbonds']
                aggregated_hbs.extend(list_hbonds)
                #print(list_hbonds)
                print(len(list_hbonds))
                
                # Percentages
                counts = Counter(list_hbonds)
                total = len(list_hbonds)
                percentages = {k: round((v / total) * 100, 2) for k, v in counts.items()}

                # Lifetimes
                lifetimes = defaultdict(list)
                for value, group in groupby(list_hbonds):
                    length = len(list(group))
                    lifetimes[value].append(length)

                # Output
                print("Percentages:")
                for value in sorted(percentages):
                    print(f"  {value}: {percentages[value]}%")

                # Output min and max per state
                print("Min and Max Lifetimes:")
                for state in sorted(lifetimes):
                    durations = lifetimes[state]
                    print(f"  {state}: min = {min(durations)}, max = {max(durations)}")
    
print("FINAL NUMBERS:")

# Percentages
counts = Counter(aggregated_hbs)
total = len(aggregated_hbs)
percentages = {k: round((v / total) * 100, 2) for k, v in counts.items()}

# Output
print("Percentages:")
for value in sorted(percentages):
    print(f"  {value}: {percentages[value]}%")

seq001-1 - hexABC Sequence 001 - replica 1
  Watson strand: GCAAACTTGTATATGTGTGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq001-1/analyses/hbonds?from=1&to=490
    ↳ found 'TTG' at position 6
Code: 8T
Code -rev-: 8T
20C21G
19G22C
18T23A
17G24C
16T25A
15G26C
14T27A
13A28T
12T29A
11A30T
10T31A
9G32C
8T33A
490
Percentages:
  1: 2.65%
  2: 97.35%
Min and Max Lifetimes:
  1: min = 1, max = 2
  2: min = 1, max = 210
7T34A
6C35G
5A36T
4A37T
3A38T
2C39G
1G40C
seq001-2 - hexABC Sequence 001 - replica 2
  Watson strand: GCAAACTTGTATATGTGTGC
seq004-1 - hexABC Sequence 004 - replica 1
  Watson strand: GCAAATTTGGAGTCTAGAGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq004-1/analyses/hbonds?from=1&to=490
    ↳ found 'TTG' at position 6
Code: 8T
Code -rev-: 8T
20C21G
19G22C
18A23T
17G24C
16A25T
15T26A
14C27G
13T28A
12G29C
11A30T
10G31C
9G32C
8T33A
490
Percentages:
  1: 3.27%
  2: 96.73%
Min and Max Lifetimes:
  1: min = 1, max = 1
  2: min = 2, max = 150
7T34A
6T35A
5A3

HTTPError: HTTP Error 500: Internal Server Error

### Plotting hydrogen bond values

**Pie Chart** representing HBs statistics for the specific **trimer**. <br>
Less than 3 HBs (for C,G) or 2 HBs (for A,T) means **unstable/broken base pairs**. 

In [194]:
import plotly.graph_objects as go

# Filter and prep labels and values
labels = [f"{k} HBs" for k, v in percentages.items()]
values = list(percentages.values())

fig = go.Figure(data=[go.Pie(
    labels=labels,
    values=values,
    hole=0.4,  # Makes it a donut
    pull=[0.05 if v < 5 else 0 for v in values],  # Emphasize smaller slices
    textinfo='label+percent',
    marker=dict(colors=['#FFDD57', '#FF6B6B', '#4ECDC4', '#556270'])
)])

fig.update_layout(
    title_text=f"State Distribution for {trimer} trimer",
    title_font_size=20,
    showlegend=True
)

fig.show()


## TRIMERS COMPARISON

### Select the trimers for comparison

Selecting the desired **trimers** from the list of all possible **DNA trimers** (e.g. GCG - ATT)

In [197]:
# Generate all possible DNA trimers
bases = ['A', 'T', 'C', 'G']
trimer_list = [''.join(p) for p in itertools.product(bases, repeat=3)]
trimer_pairs_list = [f"{a}-{b}" for a, b in itertools.combinations(trimer_list, 2)]

mdsel = ipywidgets.Dropdown(
    options=trimer_pairs_list,
    description='Sel. trimer pair:',
    disabled=False,
    value='AAT-CAG' # default value
)
display(mdsel)

Dropdown(description='Sel. trimer pair:', index=96, options=('AAA-AAT', 'AAA-AAC', 'AAA-AAG', 'AAA-ATA', 'AAA-…

### Extract hydrogen bond values

For each **trimer** found, extract the **hydrogen bond** values **along time** for the central nucleotide (e.g. G**C**G). 

* Endpoint used: https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/{id}/analyses/hbonds

In [228]:
from collections import Counter, defaultdict
from itertools import groupby

trimer1, trimer2 = mdsel.value.split('-')

results = find_seq(projects_metadata, trimer1)

aggregated_hbs_trimer1 = []
aggregated_hbs_lt_trimer1 = defaultdict(list)
for result in results:
    print(f"{result['id']} - {result['name']}")
    print(f"  Watson strand: {result['sequence']}")
        
    if result['id']=="seq010-1" : continue # HB wrong
    if result['id']=="seq013-1" : continue # HB wrong
    if result['id']=="seq021-1" : continue # HB wrong
    if result['id']=="seq030-1" : continue # HB wrong
    if result['id']=="seq032-1" : continue # HB wrong
    if result['id']=="seq040-1" : continue # HB wrong
    if result['id']=="seq047-1" : continue # HB wrong
    if result['id']=="seq050-1" : continue # HB wrong
    if result['id']=="seq055-1" : continue # HB wrong
    if result['id']=="seq056-1" : continue # HB wrong
    if result['id']=="seq058-1" : continue # HB wrong
    if result['id']=="seq062-1" : continue # HB wrong
    if result['id']=="seq064-1" : continue # HB wrong
    if result['id']=="seq068-1" : continue # HB wrong
    if result['id']=="seq070-1" : continue # HB wrong
    if result['id']=="seq071-1" : continue # HB wrong
    if result['id']=="seq073-1" : continue # HB wrong
    if result['id']=="seq084-1" : continue # HB wrong
    if result['id']=="seq085-1" : continue # HB wrong
    if result['id']=="seq091-1" : continue # HB wrong
    if result['id']=="seq093-1" : continue # HB wrong
    if result['id']=="seq097-1" : continue # HB wrong
    if result['id']=="seq098-1" : continue # HB wrong
    if result['id']=="seq101-1" : continue # HB wrong
    if result['id']=="seq130-1" : continue # HB wrong
    if result['id']=="seq142-1" : continue # HB wrong
    if result['id']=="seq143-1" : continue # HB wrong
    if result['id']=="seq144-1" : continue # HB wrong
    if result['id']=="seq150-1" : continue # HB wrong
    if result['id']=="seq151-1" : continue # HB wrong
    if result['id']=="seq152-1" : continue # HB wrong
    if result['id']=="seq153-1" : continue # HB wrong
    if result['id']=="seq155-1" : continue # HB wrong
    if result['id']=="seq159-1" : continue # HB wrong
    if result['id']=="seq170-1" : continue # HB wrong
    if result['id']=="seq171-1" : continue # HB wrong
    if result['id']=="seq173-1" : continue # HB wrong
    if result['id']=="seq174-1" : continue # HB wrong
    if result['id']=="seq177-1" : continue # HB wrong
    if result['id']=="seq179-1" : continue # HB wrong
    if result['id']=="seq180-1" : continue # HB wrong
    if result['id']=="seq182-1" : continue # HB wrong
    if result['id']=="seq184-1" : continue # HB wrong
    if result['id']=="seq185-1" : continue # HB wrong
    if result['id']=="seq186-1" : continue # HB wrong
    if result['id']=="seq189-1" : continue # HB wrong
 
    if result['id'].endswith("-2"): continue

    url_param = f"{API_BASE_URL}/projects/{result['id']}/analyses/hbonds?from=1&to=500"
    #url_param = f"{API_BASE_URL}/projects/{result['id']}/analyses/hbonds?from=1&to=490000"
    print(url_param)
    
    with urllib.request.urlopen(url_param) as response:
        hbs = json.loads(response.read().decode("utf-8"))
        
    for motif, pos in result['positions']:
        print(f"    ↳ found '{motif}' at position {pos}")

        pos_index = pos + 2
        code = f"{pos_index}{motif[1]}"
        print("Code: " + code)

        # Complementary case
        if motif!=trimer:
            code = f"{pos_index}{motif[1]}"
            print("Code -rev-: " + code)
        
        for bp_item in hbs['hbonds']:
            
            print(bp_item['bp'])
            if (bp_item['bp'].startswith(code)):
                list_hbonds = bp_item['hbonds']
                aggregated_hbs_trimer1.extend(list_hbonds)
                
                # Percentages
                counts = Counter(list_hbonds)
                total = len(list_hbonds)
                percentages = {k: round((v / total) * 100, 2) for k, v in counts.items()}

                # Lifetimes
                lifetimes = defaultdict(list)
                for value, group in groupby(list_hbonds):
                    length = len(list(group))
                    lifetimes[value].append(length)
                    aggregated_hbs_lt_trimer1[value].extend([length])

                # Output
                print("Percentages:")
                for value in sorted(percentages):
                    print(f"  {value}: {percentages[value]}%")

                # Output min and max per state
                print("Min and Max Lifetimes:")
                for state in sorted(lifetimes):
                    durations = lifetimes[state]
                    print(f"  {state}: min = {min(durations)}, max = {max(durations)}")
    
print("FINAL NUMBERS:")

# Percentages
counts = Counter(aggregated_hbs_trimer1)
total = len(aggregated_hbs_trimer1)
percentages_trimer1 = {k: round((v / total) * 100, 2) for k, v in counts.items()}

# Output
print("Percentages Trimer 1:")
for value in sorted(percentages_trimer1):
    print(f"  {value}: {percentages_trimer1[value]}%")

    
###############
# TRIMER 2
###############
    
results = find_seq(projects_metadata, trimer2)

aggregated_hbs_trimer2 = []
aggregated_hbs_lt_trimer2 = defaultdict(list)
for result in results:
    print(f"{result['id']} - {result['name']}")
    print(f"  Watson strand: {result['sequence']}")
        
    if result['id']=="seq010-1" : continue # HB wrong
    if result['id']=="seq013-1" : continue # HB wrong
    if result['id']=="seq021-1" : continue # HB wrong
    if result['id']=="seq030-1" : continue # HB wrong
    if result['id']=="seq032-1" : continue # HB wrong
    if result['id']=="seq040-1" : continue # HB wrong
    if result['id']=="seq047-1" : continue # HB wrong
    if result['id']=="seq050-1" : continue # HB wrong
    if result['id']=="seq055-1" : continue # HB wrong
    if result['id']=="seq056-1" : continue # HB wrong
    if result['id']=="seq058-1" : continue # HB wrong
    if result['id']=="seq062-1" : continue # HB wrong
    if result['id']=="seq064-1" : continue # HB wrong
    if result['id']=="seq068-1" : continue # HB wrong
    if result['id']=="seq070-1" : continue # HB wrong
    if result['id']=="seq071-1" : continue # HB wrong
    if result['id']=="seq073-1" : continue # HB wrong
    if result['id']=="seq084-1" : continue # HB wrong
    if result['id']=="seq085-1" : continue # HB wrong
    if result['id']=="seq091-1" : continue # HB wrong
    if result['id']=="seq093-1" : continue # HB wrong
    if result['id']=="seq097-1" : continue # HB wrong
    if result['id']=="seq098-1" : continue # HB wrong
    if result['id']=="seq101-1" : continue # HB wrong
    if result['id']=="seq130-1" : continue # HB wrong
    if result['id']=="seq142-1" : continue # HB wrong
    if result['id']=="seq143-1" : continue # HB wrong
    if result['id']=="seq144-1" : continue # HB wrong
    if result['id']=="seq150-1" : continue # HB wrong
    if result['id']=="seq151-1" : continue # HB wrong
    if result['id']=="seq152-1" : continue # HB wrong
    if result['id']=="seq153-1" : continue # HB wrong
    if result['id']=="seq155-1" : continue # HB wrong
    if result['id']=="seq159-1" : continue # HB wrong
    if result['id']=="seq170-1" : continue # HB wrong
    if result['id']=="seq171-1" : continue # HB wrong
    if result['id']=="seq173-1" : continue # HB wrong
    if result['id']=="seq174-1" : continue # HB wrong
    if result['id']=="seq177-1" : continue # HB wrong
    if result['id']=="seq179-1" : continue # HB wrong
    if result['id']=="seq180-1" : continue # HB wrong
    if result['id']=="seq182-1" : continue # HB wrong
    if result['id']=="seq184-1" : continue # HB wrong
    if result['id']=="seq185-1" : continue # HB wrong
    if result['id']=="seq186-1" : continue # HB wrong
    if result['id']=="seq189-1" : continue # HB wrong
 
    if result['id'].endswith("-2"): continue

    url_param = f"{API_BASE_URL}/projects/{result['id']}/analyses/hbonds?from=1&to=5000"
    #url_param = f"{API_BASE_URL}/projects/{result['id']}/analyses/hbonds?from=1&to=490000"
    print(url_param)
    
    with urllib.request.urlopen(url_param) as response:
        hbs = json.loads(response.read().decode("utf-8"))
        
    for motif, pos in result['positions']:
        print(f"    ↳ found '{motif}' at position {pos}")

        pos_index = pos + 2
        code = f"{pos_index}{motif[1]}"
        print("Code: " + code)

        # Complementary case
        if motif!=trimer:
            code = f"{pos_index}{motif[1]}"
            print("Code -rev-: " + code)
        
        for bp_item in hbs['hbonds']:
            
            print(bp_item['bp'])
            if (bp_item['bp'].startswith(code)):
                list_hbonds = bp_item['hbonds']
                aggregated_hbs_trimer2.extend(list_hbonds)
                
                # Percentages
                counts = Counter(list_hbonds)
                total = len(list_hbonds)
                percentages = {k: round((v / total) * 100, 2) for k, v in counts.items()}

                # Lifetimes
                lifetimes = defaultdict(list)
                for value, group in groupby(list_hbonds):
                    length = len(list(group))
                    lifetimes[value].append(length)
                    aggregated_hbs_lt_trimer2[value].extend([length])

                # Output
                print("Percentages:")
                for value in sorted(percentages):
                    print(f"  {value}: {percentages[value]}%")

                # Output min and max per state
                print("Min and Max Lifetimes:")
                for state in sorted(lifetimes):
                    durations = lifetimes[state]
                    print(f"  {state}: min = {min(durations)}, max = {max(durations)}")
    
print("FINAL NUMBERS:")

# Percentages
counts = Counter(aggregated_hbs_trimer2)
total = len(aggregated_hbs_trimer2)
percentages_trimer2 = {k: round((v / total) * 100, 2) for k, v in counts.items()}

# Output
print("Percentages Trimer 2:")
for value in sorted(percentages_trimer2):
    print(f"  {value}: {percentages_trimer2[value]}%")
    
# Output min and max per state
print("Min and Max Lifetimes Trimer 1:")
for state in sorted(aggregated_hbs_lt_trimer1):
    durations = aggregated_hbs_lt_trimer1[state]
    print(f"  {state}: min = {min(durations)}, max = {max(durations)}")

# Output min and max per state
print("Min and Max Lifetimes Trimer 2:")
for state in sorted(aggregated_hbs_lt_trimer2):
    durations = aggregated_hbs_lt_trimer2[state]
    print(f"  {state}: min = {min(durations)}, max = {max(durations)}")


seq004-1 - hexABC Sequence 004 - replica 1
  Watson strand: GCAAATTTGGAGTCTAGAGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq004-1/analyses/hbonds?from=1&to=500
    ↳ found 'AAT' at position 3
Code: 5A
Code -rev-: 5A
20C21G
19G22C
18A23T
17G24C
16A25T
15T26A
14C27G
13T28A
12G29C
11A30T
10G31C
9G32C
8T33A
7T34A
6T35A
5A36T
Percentages:
  1: 1.6%
  2: 98.4%
Min and Max Lifetimes:
  1: min = 1, max = 2
  2: min = 5, max = 177
4A37T
3A38T
2C39G
1G40C
    ↳ found 'ATT' at position 4
Code: 6T
Code -rev-: 6T
20C21G
19G22C
18A23T
17G24C
16A25T
15T26A
14C27G
13T28A
12G29C
11A30T
10G31C
9G32C
8T33A
7T34A
6T35A
Percentages:
  1: 1.6%
  2: 98.4%
Min and Max Lifetimes:
  1: min = 1, max = 1
  2: min = 26, max = 92
5A36T
4A37T
3A38T
2C39G
1G40C
seq004-2 - hexABC Sequence 004 - replica 2
  Watson strand: GCAAATTTGGAGTCTAGAGC
seq006-1 - hexABC Sequence 006 - replica 1
  Watson strand: GCAACATTTCGCCGAGGAGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq006-1/analyses/hbonds

    ↳ found 'ATT' at position 2
Code: 4T
Code -rev-: 4T
20C21G
19G22C
18C23G
17A24T
16A25T
15T26A
14G27C
13T28A
12G29C
11G30C
10C31G
9G32C
8G33C
7A34T
6T35A
5T36A
4T37A
Percentages:
  1: 1.6%
  2: 98.4%
Min and Max Lifetimes:
  1: min = 1, max = 2
  2: min = 4, max = 191
3A38T
2C39G
1G40C
seq044-2 - hexABC Sequence 044 - replica 2
  Watson strand: GCATTTAGGCGGTGTAACGC
seq047-1 - hexABC Sequence 047 - replica 1
  Watson strand: GCCACTCGGAAATTAGCAGC
seq047-2 - hexABC Sequence 047 - replica 2
  Watson strand: GCCACTCGGAAATTAGCAGC
seq050-1 - hexABC Sequence 050 - replica 1
  Watson strand: GCCAGGAGCGAATTTTATGC
seq050-2 - hexABC Sequence 050 - replica 2
  Watson strand: GCCAGGAGCGAATTTTATGC
seq055-1 - hexABC Sequence 055 - replica 1
  Watson strand: GCCATGAATGTAGAACGTGC
seq055-2 - hexABC Sequence 055 - replica 2
  Watson strand: GCCATGAATGTAGAACGTGC
seq056-1 - hexABC Sequence 056 - replica 1
  Watson strand: GCCCAAAATCGCAACAAGGC
seq056-2 - hexABC Sequence 056 - replica 2
  Watson strand: GC

    ↳ found 'AAT' at position 7
Code: 9A
Code -rev-: 9A
20C21G
19G22C
18G23C
17A24T
16T25A
15G26C
14A27T
13G28C
12T29A
11C30G
10T31A
9A32T
Percentages:
  1: 0.4%
  2: 99.6%
Min and Max Lifetimes:
  1: min = 1, max = 1
  2: min = 76, max = 311
8A33T
7C34G
6A35T
5A36T
4T37A
3G38C
2C39G
1G40C
seq129-2 - hexABC Sequence 129 - replica 2
  Watson strand: GCGTAACAATCTGAGTAGGC
seq131-1 - hexABC Sequence 131 - replica 1
  Watson strand: GCGTAATATGGGCTGGCAGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq131-1/analyses/hbonds?from=1&to=500
    ↳ found 'AAT' at position 4
Code: 6A
Code -rev-: 6A
20C21G
19G22C
18A23T
17C24G
16G25C
15G26C
14T27A
13C28G
12G29C
11G30C
10G31C
9T32A
8A33T
7T34A
6A35T
Percentages:
  1: 2.4%
  2: 97.6%
Min and Max Lifetimes:
  1: min = 1, max = 1
  2: min = 5, max = 100
5A36T
4T37A
3G38C
2C39G
1G40C
seq131-2 - hexABC Sequence 131 - replica 2
  Watson strand: GCGTAATATGGGCTGGCAGC
seq132-1 - hexABC Sequence 132 - replica 1
  Watson strand: GCGTAATTGTGCGGTCTAGC

    ↳ found 'CAG' at position 3
Code: 5A
Code -rev-: 5A
20C21G
19G22C
18A23T
17T24A
16A25T
15G26C
14A27T
13T28A
12T29A
11A30T
10G31C
9A32T
8T33A
7G34C
6G35C
5A36T
Percentages:
  1: 1.78%
  2: 98.22%
Min and Max Lifetimes:
  1: min = 1, max = 2
  2: min = 1, max = 199
4C37G
3A38T
2C39G
1G40C
seq014-2 - hexABC Sequence 014 - replica 2
  Watson strand: GCACAGGTAGATTAGATAGC
seq015-1 - hexABC Sequence 015 - replica 1
  Watson strand: GCACATGTCAGAATATTGGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq015-1/analyses/hbonds?from=1&to=5000
    ↳ found 'CAG' at position 8
Code: 10A
Code -rev-: 10A
20C21G
19G22C
18G23C
17T24A
16T25A
15A26T
14T27A
13A28T
12A29T
11G30C
10A31T
Percentages:
  0: 0.08%
  1: 1.92%
  2: 98.0%
Min and Max Lifetimes:
  0: min = 1, max = 1
  1: min = 1, max = 2
  2: min = 1, max = 286
9C32G
8T33A
7G34C
6T35A
5A36T
4C37G
3A38T
2C39G
1G40C
seq015-2 - hexABC Sequence 015 - replica 2
  Watson strand: GCACATGTCAGAATATTGGC
seq018-1 - hexABC Sequence 018 - replica 1

    ↳ found 'CTG' at position 9
Code: 11T
Code -rev-: 11T
20C21G
19G22C
18C23G
17A24T
16C25G
15G26C
14A27T
13G28C
12G29C
11T30A
Percentages:
  0: 0.02%
  1: 2.02%
  2: 97.96%
Min and Max Lifetimes:
  0: min = 1, max = 1
  1: min = 1, max = 2
  2: min = 1, max = 273
10C31G
9G32C
8C33G
7A34T
6T35A
5G36C
4T37A
3A38T
2C39G
1G40C
seq041-2 - hexABC Sequence 041 - replica 2
  Watson strand: GCATGTACGCTGGAGCACGC
seq042-1 - hexABC Sequence 042 - replica 1
  Watson strand: GCATGTTGACAAACAGGGGC
https://mmb.irbbarcelona.org/webdev3/hexABC/api/projects/seq042-1/analyses/hbonds?from=1&to=5000
    ↳ found 'CAG' at position 13
Code: 15A
Code -rev-: 15A
20C21G
19G22C
18G23C
17G24C
16G25C
15A26T
Percentages:
  0: 0.02%
  1: 2.36%
  2: 97.62%
Min and Max Lifetimes:
  0: min = 1, max = 1
  1: min = 1, max = 2
  2: min = 1, max = 462
14C27G
13A28T
12A29T
11A30T
10C31G
9A32T
8G33C
7T34A
6T35A
5G36C
4T37A
3A38T
2C39G
1G40C
seq042-2 - hexABC Sequence 042 - replica 2
  Watson strand: GCATGTTGACAAACAGGGGC
seq04

HTTPError: HTTP Error 500: Internal Server Error

### Plotting hydrogen bond values

**Pie Charts** representing HBs statistics for the specific **trimers**. <br>
Less than 3 HBs (for C,G) or 2 HBs (for A,T) means **unstable/broken base pairs**. 

In [173]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

color_map = {
    0: '#1f77b4',  # blue
    1: '#ff7f0e',  # orange
    2: '#2ca02c',  # green
    3: '#d62728'   # red
}

labels_1 = [f"{k} HBs" for k in percentages_trimer1]
values_1 = list(percentages_trimer1.values())
colors_1 = [color_map[k] for k in percentages_trimer1]

labels_2 = [f"{k} HBs" for k in percentages_trimer2]
values_2 = list(percentages_trimer2.values())
colors_2 = [color_map[k] for k in percentages_trimer2]

# Create subplot layout for 2 pies
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])

fig.add_trace(go.Pie(
    labels=labels_1,
    values=values_1,
    hole=0.4,
    textinfo='label+percent',
    marker=dict(colors=colors_1),
    pull=[0.05 if v < 5 else 0 for v in values_1],
    name="Study 1"
), row=1, col=1)

fig.add_trace(go.Pie(
    labels=labels_2,
    values=values_2,
    hole=0.4,
    textinfo='label+percent',
    marker=dict(colors=colors_2),
    pull=[0.05 if v < 5 else 0 for v in values_2],
    name="Study 2"
), row=1, col=2)


# Add titles
tit = f"{trimer1}-{trimer2}"
fig.update_layout(
    title={
        'text': tit,
        'x': 0.3,  # Left-align
        'y': 0.98,
        'xanchor': 'left',
        'yanchor': 'top',
        'font': dict(size=22)
    },
    annotations=[
        dict(text=f"{trimer1}", x=0.20, y=0.5, font_size=14, showarrow=False),
        dict(text=f"{trimer2}", x=0.80, y=0.5, font_size=14, showarrow=False)
    ]
)

fig.show()


### Plotting hydrogen bond lifetimes 

**Histograms** representing HBs lifetimes for the specific **trimers**. <br>


In [231]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

states = sorted(aggregated_hbs_lt_trimer1.keys())
n_states = len(states)

# Set up subplots: one row, multiple columns
fig = make_subplots(rows=1, cols=n_states, subplot_titles=[f"{s} HBs" for s in states])

# Add one histogram per state
for i, state in enumerate(states):
    lifetimes = aggregated_hbs_lt_trimer1[state]
    fig.add_trace(
        go.Histogram(
            x=lifetimes,
            nbinsx=min(20, max(lifetimes) - min(lifetimes) + 1),
            marker_color='#1f77b4',
            name=f"{state} HBs",
            showlegend=False
        ),
        row=1,
        col=i+1
    )

# Layout polish
fig.update_layout(
    title_text=f"Lifetime Distributions per State - {trimer1}",
    height=400,
    width=300 * n_states,
    bargap=0.1,
    margin=dict(t=50, r=20, l=20, b=40)
)

# Add axis labels uniformly
for i in range(n_states):
    fig.update_xaxes(title_text="Lifetime (frames)", row=1, col=i+1)
    fig.update_yaxes(title_text="Frequency", row=1, col=1)

fig.show()


In [230]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

states = sorted(aggregated_hbs_lt_trimer2.keys())
n_states = len(states)

# Set up subplots: one row, multiple columns
fig = make_subplots(rows=1, cols=n_states, subplot_titles=[f"{s} HBs" for s in states])

# Add one histogram per state
for i, state in enumerate(states):
    lifetimes = aggregated_hbs_lt_trimer2[state]
    fig.add_trace(
        go.Histogram(
            x=lifetimes,
            nbinsx=min(20, max(lifetimes) - min(lifetimes) + 1),
            marker_color='#1f77b4',
            name=f"{state} HBs",
            showlegend=False
        ),
        row=1,
        col=i+1
    )

# Layout polish
fig.update_layout(
     title_text=f"Lifetime Distributions per State - {trimer2}",
    height=400,
    width=300 * n_states,
    bargap=0.1,
    margin=dict(t=50, r=20, l=20, b=40)
)

# Add axis labels uniformly
for i in range(n_states):
    fig.update_xaxes(title_text="Lifetime (frames)", row=1, col=i+1)
    fig.update_yaxes(title_text="Frequency", row=1, col=1)

fig.show()
