In [213]:
# dictionary mapping gene names to list of last included exons
five_prime_genes = {
    # MGA exon 22, NUTM1 exon 3, source: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6318763/
    "MGA": [22], 
    
    # BRD4 exons 10 & 11, sources:
    # - https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5378225/
    # - https://aacrjournals.org/cancerres/article/63/2/304/510577/BRD4-NUT-Fusion-OncogeneA-Novel-Mechanism-in
    #
    # - exon 15, source: https://www.nature.com/articles/onc2012487
    "BRD4": [10, 11, 12, 13, 14, 15],
    
    # BRD3 exon 9, NUTM1 exon 2, source: https://www.nature.com/articles/1210852
    "BRD3": [9],
    
    
    # MXD4 exon 5, NUTM1 exons 2 & 3, sources:
    # - https://pubmed.ncbi.nlm.nih.gov/30338611/
    # - https://www.nature.com/articles/s41379-021-00792-z
    "MXD4": [5],
    
    # CIC exons 16-20, NUTM1 exons 2-5, source: https://hal.archives-ouvertes.fr/hal-01927040
    "CIC": [16, 17, 18, 19, 20],
    
    # SLC12A6 exon 2, NUTM1 exon 3, source: https://www.haematologica.org/article/view/9099
    "SLC12A6": [2],
    
    # YAP1 exon 3, NUTM1 exon 2, source: https://link.springer.com/article/10.1007/s12105-020-01173-9
    "YAP1": [3],
    
    # NSD3 exon 7, NUTM1 exon 2, sources:
    # * https://www.frontiersin.org/articles/10.3389/fonc.2022.860830/full
    # * https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4125436/
    "NSD3": [7],
}

# * even though the CIC paper claims there are fusions on exons 4 & 5, I don't see those in any other papers
# * OK, found one reference with BRD4 exon 11 fused onto NUTM1 exon 3, both of which have end/start phases of 1
#   source: 
# * Oy, found that older references missed what is now considered exon 1, so adding exon 6 which was the old exon 5
NUTM1_start_exons = [2, 3, 4, 5, 6] 

In [214]:
from pyensembl import ensembl_grch38

In [215]:
name_to_gene = {}
for name in set(five_prime_genes.keys()).union({"NUTM1"}):
    genes = ensembl_grch38.genes_by_name(name)
    assert len(genes) == 1
    name_to_gene[name] = genes[0]

In [216]:
def transcript_key(t):
    return (t.complete, -t.support_level if t.support_level else 100, len(t.protein_sequence) if t.protein_sequence else 0)

def pick_best_transcript(ts):
    return sorted(ts, key=transcript_key)[-1]

In [217]:
canonical_transcripts = {"NUTM1": "NUTM1-203"}

name_to_transcript = {}

for (name, gene) in name_to_gene.items():
    if name in canonical_transcripts:
        transcript_name = canonical_transcripts[name]
        transcript = ensembl_grch38.transcripts_by_name(transcript_name)[0]
    else:
        transcript = pick_best_transcript(gene.transcripts)
    name_to_transcript[name] = transcript

In [218]:
name_to_transcript

{'CIC': Transcript(transcript_id='ENST00000681038', transcript_name='CIC-209', gene_id='ENSG00000079432', biotype='protein_coding', contig='19', start=42269252, end=42295796, strand='+', genome='GRCh38'),
 'BRD3': Transcript(transcript_id='ENST00000303407', transcript_name='BRD3-201', gene_id='ENSG00000169925', biotype='protein_coding', contig='9', start=134030305, end=134068026, strand='-', genome='GRCh38'),
 'NUTM1': Transcript(transcript_id='ENST00000537011', transcript_name='NUTM1-203', gene_id='ENSG00000184507', biotype='protein_coding', contig='15', start=34343315, end=34357735, strand='+', genome='GRCh38'),
 'SLC12A6': Transcript(transcript_id='ENST00000676379', transcript_name='SLC12A6-221', gene_id='ENSG00000140199', biotype='protein_coding', contig='15', start=34230036, end=34337462, strand='-', genome='GRCh38'),
 'BRD4': Transcript(transcript_id='ENST00000679869', transcript_name='BRD4-213', gene_id='ENSG00000141867', biotype='protein_coding', contig='19', start=15235519, en

In [219]:
name_to_coding_exon_lengths = {}
for name, transcript in name_to_transcript.items():
    exon_coords = transcript.coding_sequence_position_ranges
    if transcript.strand == "-":
        exons = reversed([(end, start) for (start, end) in exon_coords])
    name_to_coding_exon_lengths[name] = [end - start + 1 for (start, end) in exon_coords]
        

In [220]:
name_to_coding_exon_lengths

{'CIC': [2794,
  150,
  235,
  130,
  183,
  166,
  203,
  226,
  104,
  1234,
  188,
  122,
  167,
  294,
  326,
  245,
  155,
  132,
  132,
  365],
 'BRD3': [213, 138, 148, 215, 372, 129, 192, 236, 293, 129, 113],
 'NUTM1': [6, 94, 709, 129, 137, 287, 117, 2001],
 'SLC12A6': [271,
  45,
  95,
  132,
  147,
  55,
  131,
  242,
  215,
  159,
  99,
  58,
  175,
  119,
  99,
  120,
  105,
  169,
  196,
  170,
  132,
  108,
  185,
  134,
  32,
  33],
 'BRD4': [285,
  138,
  136,
  290,
  363,
  129,
  210,
  200,
  296,
  111,
  53,
  370,
  588,
  113,
  163,
  131,
  206,
  238,
  66],
 'YAP1': [321, 251, 116, 114, 182, 48, 131, 113, 236],
 'MXD4': [64, 100, 30, 115, 163, 155],
 'NSD3': [675,
  72,
  163,
  155,
  516,
  127,
  101,
  46,
  131,
  129,
  127,
  198,
  171,
  147,
  157,
  203,
  113,
  270,
  117,
  142,
  107,
  205,
  239],
 'MGA': [1064,
  949,
  79,
  96,
  132,
  105,
  659,
  346,
  227,
  186,
  73,
  518,
  151,
  291,
  1505,
  131,
  52,
  207,
  112,
  234,
 

In [221]:
name_to_5prime_utr_exons_count = {}
for name, t in name_to_transcript.items():
    
    if t.strand == "+":
        start_codon_pos = min(t.start_codon_positions)
        count = sum([
            end < start_codon_pos
            for (_, end) in t.exon_intervals])
    else:
        start_codon_pos = max(t.start_codon_positions)
        count = sum([
            start > start_codon_pos
            for (start, end) in t.exon_intervals])
    name_to_5prime_utr_exons_count[name] = count
name_to_5prime_utr_exons_count

{'CIC': 1,
 'BRD3': 1,
 'NUTM1': 0,
 'SLC12A6': 0,
 'BRD4': 1,
 'YAP1': 0,
 'MXD4': 0,
 'NSD3': 1,
 'MGA': 1}

In [222]:

name_and_exon_to_cds_length = {}

for name, exon_numbers in list(five_prime_genes.items()) + [("NUTM1", NUTM1_start_exons)]:
    print(name, exon_numbers) 
    exon_numbers = [min(exon_numbers) - 1] + exon_numbers
    num_utr_exons = name_to_5prime_utr_exons_count[name]
    for exon_number in exon_numbers:
        cds_length = sum(name_to_coding_exon_lengths[name][:(exon_number - num_utr_exons)])
        print("-- %s %d: %d (%d)" % (name, exon_number, cds_length, cds_length % 3))
        name_and_exon_to_cds_length[(name, exon_number)] = cds_length

MGA [22]
-- MGA 21: 7117 (1)
-- MGA 22: 7294 (1)
BRD4 [10, 11, 12, 13, 14, 15]
-- BRD4 9: 1751 (2)
-- BRD4 10: 2047 (1)
-- BRD4 11: 2158 (1)
-- BRD4 12: 2211 (0)
-- BRD4 13: 2581 (1)
-- BRD4 14: 3169 (1)
-- BRD4 15: 3282 (0)
BRD3 [9]
-- BRD3 8: 1407 (0)
-- BRD3 9: 1643 (2)
MXD4 [5]
-- MXD4 4: 309 (0)
-- MXD4 5: 472 (1)
CIC [16, 17, 18, 19, 20]
-- CIC 15: 6196 (1)
-- CIC 16: 6522 (0)
-- CIC 17: 6767 (2)
-- CIC 18: 6922 (1)
-- CIC 19: 7054 (1)
-- CIC 20: 7186 (1)
SLC12A6 [2]
-- SLC12A6 1: 271 (1)
-- SLC12A6 2: 316 (1)
YAP1 [3]
-- YAP1 2: 572 (2)
-- YAP1 3: 688 (1)
NSD3 [7]
-- NSD3 6: 1581 (0)
-- NSD3 7: 1708 (1)
NUTM1 [2, 3, 4, 5, 6]
-- NUTM1 1: 6 (0)
-- NUTM1 2: 100 (1)
-- NUTM1 3: 809 (2)
-- NUTM1 4: 938 (2)
-- NUTM1 5: 1075 (1)
-- NUTM1 6: 1362 (0)


In [223]:
name_and_exon_start_phase = {}
name_and_exon_end_phase = {}
for name, exon_numbers in list(five_prime_genes.items()) + [("NUTM1", NUTM1_start_exons)]:
    for exon_number in exon_numbers:
        cds_length_prev = name_and_exon_to_cds_length[(name, exon_number - 1)]
        cds_length = name_and_exon_to_cds_length[(name, exon_number)]
        start_phase = cds_length_prev % 3
        end_phase = cds_length % 3
        name_and_exon_start_phase[(name, exon_number)] = start_phase
        name_and_exon_end_phase[(name, exon_number)] = end_phase
        print("%s exon %d, start phase %d, end phase %d" % (name, exon_number, start_phase, end_phase))
        

MGA exon 22, start phase 1, end phase 1
BRD4 exon 10, start phase 2, end phase 1
BRD4 exon 11, start phase 1, end phase 1
BRD4 exon 12, start phase 1, end phase 0
BRD4 exon 13, start phase 0, end phase 1
BRD4 exon 14, start phase 1, end phase 1
BRD4 exon 15, start phase 1, end phase 0
BRD3 exon 9, start phase 0, end phase 2
MXD4 exon 5, start phase 0, end phase 1
CIC exon 16, start phase 1, end phase 0
CIC exon 17, start phase 0, end phase 2
CIC exon 18, start phase 2, end phase 1
CIC exon 19, start phase 1, end phase 1
CIC exon 20, start phase 1, end phase 1
SLC12A6 exon 2, start phase 1, end phase 1
YAP1 exon 3, start phase 2, end phase 1
NSD3 exon 7, start phase 0, end phase 1
NUTM1 exon 2, start phase 0, end phase 1
NUTM1 exon 3, start phase 1, end phase 2
NUTM1 exon 4, start phase 2, end phase 2
NUTM1 exon 5, start phase 2, end phase 1
NUTM1 exon 6, start phase 1, end phase 0


In [224]:
valid_pairs = []

for five_prime_name, five_prime_exon_numbers in list(five_prime_genes.items()):
    for five_prime_exon_number in five_prime_exon_numbers:
        five_prime_phase = name_and_exon_end_phase[(five_prime_name, five_prime_exon_number)]

        for three_prime_name, three_prime_exon_numbers in [("NUTM1", NUTM1_start_exons)]:
            for three_prime_exon_number in three_prime_exon_numbers:
                three_prime_phase = name_and_exon_start_phase[(three_prime_name, three_prime_exon_number)]
                if five_prime_phase == three_prime_phase:
                    print("%s exon %d -> %s exon %d" % (
                        five_prime_name, five_prime_exon_number, 
                        three_prime_name, three_prime_exon_number))
                    valid_pairs.append((
                        (five_prime_name, five_prime_exon_number),
                        (three_prime_name, three_prime_exon_number)
                    ))

MGA exon 22 -> NUTM1 exon 3
MGA exon 22 -> NUTM1 exon 6
BRD4 exon 10 -> NUTM1 exon 3
BRD4 exon 10 -> NUTM1 exon 6
BRD4 exon 11 -> NUTM1 exon 3
BRD4 exon 11 -> NUTM1 exon 6
BRD4 exon 12 -> NUTM1 exon 2
BRD4 exon 13 -> NUTM1 exon 3
BRD4 exon 13 -> NUTM1 exon 6
BRD4 exon 14 -> NUTM1 exon 3
BRD4 exon 14 -> NUTM1 exon 6
BRD4 exon 15 -> NUTM1 exon 2
BRD3 exon 9 -> NUTM1 exon 4
BRD3 exon 9 -> NUTM1 exon 5
MXD4 exon 5 -> NUTM1 exon 3
MXD4 exon 5 -> NUTM1 exon 6
CIC exon 16 -> NUTM1 exon 2
CIC exon 17 -> NUTM1 exon 4
CIC exon 17 -> NUTM1 exon 5
CIC exon 18 -> NUTM1 exon 3
CIC exon 18 -> NUTM1 exon 6
CIC exon 19 -> NUTM1 exon 3
CIC exon 19 -> NUTM1 exon 6
CIC exon 20 -> NUTM1 exon 3
CIC exon 20 -> NUTM1 exon 6
SLC12A6 exon 2 -> NUTM1 exon 3
SLC12A6 exon 2 -> NUTM1 exon 6
YAP1 exon 3 -> NUTM1 exon 3
YAP1 exon 3 -> NUTM1 exon 6
NSD3 exon 7 -> NUTM1 exon 3
NSD3 exon 7 -> NUTM1 exon 6


In [225]:
def translate(seq):
      
    table = {
        'ATA':'I', 'ATC':'I', 'ATT':'I', 'ATG':'M',
        'ACA':'T', 'ACC':'T', 'ACG':'T', 'ACT':'T',
        'AAC':'N', 'AAT':'N', 'AAA':'K', 'AAG':'K',
        'AGC':'S', 'AGT':'S', 'AGA':'R', 'AGG':'R',                
        'CTA':'L', 'CTC':'L', 'CTG':'L', 'CTT':'L',
        'CCA':'P', 'CCC':'P', 'CCG':'P', 'CCT':'P',
        'CAC':'H', 'CAT':'H', 'CAA':'Q', 'CAG':'Q',
        'CGA':'R', 'CGC':'R', 'CGG':'R', 'CGT':'R',
        'GTA':'V', 'GTC':'V', 'GTG':'V', 'GTT':'V',
        'GCA':'A', 'GCC':'A', 'GCG':'A', 'GCT':'A',
        'GAC':'D', 'GAT':'D', 'GAA':'E', 'GAG':'E',
        'GGA':'G', 'GGC':'G', 'GGG':'G', 'GGT':'G',
        'TCA':'S', 'TCC':'S', 'TCG':'S', 'TCT':'S',
        'TTC':'F', 'TTT':'F', 'TTA':'L', 'TTG':'L',
        'TAC':'Y', 'TAT':'Y', 'TAA':'_', 'TAG':'_',
        'TGC':'C', 'TGT':'C', 'TGA':'_', 'TGG':'W',
    }
    protein = ""
    if len(seq)%3 == 0:
        for i in range(0, len(seq), 3):
            codon = seq[i:i + 3]
            protein+= table[codon]
    return protein

In [226]:
fused_coding_sequences = {}
fused_proteins = {}
breakpoint_offset = {}
for (k1, k2) in valid_pairs:
    (p5_name, p5_exon), (p3_name, p3_exon) = k1, k2
    p5_t = name_to_transcript[p5_name]
    p5_full_cds = p5_t.coding_sequence
    p5_cds_length = name_and_exon_to_cds_length[(p5_name, p5_exon)]
    print("Truncating %s CDS (length %d) to exon %d (length %d)" % (
        p5_name,
        len(p5_full_cds),
        p5_exon,
        p5_cds_length,
    ))
    p5_cds = p5_full_cds[:p5_cds_length]
    p3_t = name_to_transcript[p3_name]
    p3_full_cds = p3_t.coding_sequence
    p3_cds_length_before = name_and_exon_to_cds_length[(p3_name, p3_exon - 1)]
    p3_cds = p3_full_cds[p3_cds_length_before:]
    print("Starting %s CDS (length %d) at exon %d (length %d)" % (
        p3_name,
        len(p3_full_cds),
        p3_exon,
        len(p3_cds)
    ))

    combined_cds = p5_cds + p3_cds
    assert len(combined_cds) % 3 == 0
    fused_coding_sequences[(k1, k2)] = combined_cds
    breakpoint_offset[(k1, k2)] = len(p5_cds)
    fused_proteins[(k1, k2)] = translate(combined_cds)
    
    

Truncating MGA CDS (length 8571) to exon 22 (length 7294)
Starting NUTM1 CDS (length 3483) at exon 3 (length 3383)
Truncating MGA CDS (length 8571) to exon 22 (length 7294)
Starting NUTM1 CDS (length 3483) at exon 6 (length 2408)
Truncating BRD4 CDS (length 4089) to exon 10 (length 2047)
Starting NUTM1 CDS (length 3483) at exon 3 (length 3383)
Truncating BRD4 CDS (length 4089) to exon 10 (length 2047)
Starting NUTM1 CDS (length 3483) at exon 6 (length 2408)
Truncating BRD4 CDS (length 4089) to exon 11 (length 2158)
Starting NUTM1 CDS (length 3483) at exon 3 (length 3383)
Truncating BRD4 CDS (length 4089) to exon 11 (length 2158)
Starting NUTM1 CDS (length 3483) at exon 6 (length 2408)
Truncating BRD4 CDS (length 4089) to exon 12 (length 2211)
Starting NUTM1 CDS (length 3483) at exon 2 (length 3477)
Truncating BRD4 CDS (length 4089) to exon 13 (length 2581)
Starting NUTM1 CDS (length 3483) at exon 3 (length 3383)
Truncating BRD4 CDS (length 4089) to exon 13 (length 2581)
Starting NUTM1 

In [227]:
import pandas as pd
from collections import defaultdict
columns = [
    "upstream_gene",
    "upstream_transcript",
    "upstream_last_exon",
    "downstream_gene",
    "downstream_transcript",
    "downstream_first_exon",
    "upstream_cds",
    "downstream_cds",
    "protein_upstream",
    "protein_junction",
    "protein_downstream",
    "junction_creates_mutant_residue",
]
data = defaultdict(list)
for (k1, k2), cds in fused_coding_sequences.items():
    (upstream_gene, upstream_exon), (downstream_gene, downstream_exon) = k1, k2
    upstream_transcript = name_to_transcript[upstream_gene]
    downstream_transcript = name_to_transcript[downstream_gene]
    
    breakpoint = breakpoint_offset[(k1, k2)]
    protein = fused_proteins[(k1, k2)]
    data["upstream_gene"].append(upstream_gene)
    data["upstream_transcript"].append(upstream_transcript.name)
    data["upstream_last_exon"].append(upstream_exon)
    data["downstream_gene"].append(downstream_gene)
    data["downstream_transcript"].append(downstream_transcript.name)
    data["downstream_first_exon"].append(downstream_exon)
    data["upstream_cds"].append(cds[:breakpoint])
    data["downstream_cds"].append(cds[breakpoint:])
    if breakpoint % 3 == 0:
        data["protein_upstream"].append(protein[:breakpoint // 3])
        data["protein_junction"].append("")
        data["protein_downstream"].append(protein[breakpoint//3:])
        data["junction_creates_mutant_residue"].append(False)
    else:
        data["protein_upstream"].append(protein[:breakpoint // 3])
        junction_aa = protein[breakpoint // 3]
        data["protein_junction"].append(junction_aa)
        data["protein_downstream"].append(protein[breakpoint // 3 + 1:])
        upstream_protein_sequence = upstream_transcript.protein_sequence
        downstream_protein_sequence = downstream_transcript.protein_sequence
        p3_name, p3_exon = k2
        cds_length_before_downstream_breakpoint = name_and_exon_to_cds_length[(p3_name, p3_exon - 1)]
        
        data["junction_creates_mutant_residue"].append(
            (junction_aa != upstream_protein_sequence[breakpoint//3]) and 
            (junction_aa != downstream_protein_sequence[cds_length_before_downstream_breakpoint // 3])
        )
    
df = pd.DataFrame(data)
#with open("nutm1-fusions.txt", "w") as f:
#    f.write("%s\n" % (",".join(columns),))
        

In [228]:
df_short = pd.DataFrame()
df_short["upstream_transcript"] = df["upstream_transcript"]
df_short["upstream_exon"] = df["upstream_last_exon"]
df_short["downstream_transcript"] = df["downstream_transcript"]
df_short["downstream_exon"] = df["downstream_first_exon"]

df_short["protein_upstream"] = df['protein_upstream'].str.slice(-10, None)
df_short["protein_junction"] = df['protein_junction']
df_short["protein_downstream"] = df['protein_downstream'].str.slice(0, 10)
df_short["mutant_junction"] = df["junction_creates_mutant_residue"]
#['protein_upstream', 'protein_junction', 'protein_downstream']]
df_short

Unnamed: 0,upstream_transcript,upstream_exon,downstream_transcript,downstream_exon,protein_upstream,protein_junction,protein_downstream,mutant_junction
0,MGA-207,22,NUTM1-203,3,QVAGSAVALP,A,SALPGPDMSM,False
1,MGA-207,22,NUTM1-203,6,QVAGSAVALP,V,YIPKKAASKT,False
2,BRD4-213,10,NUTM1-203,3,SCLRKKRKPQ,A,SALPGPDMSM,False
3,BRD4-213,10,NUTM1-203,6,SCLRKKRKPQ,V,YIPKKAASKT,False
4,BRD4-213,11,NUTM1-203,3,SSSDSEDSET,A,SALPGPDMSM,False
5,BRD4-213,11,NUTM1-203,6,SSSDSEDSET,V,YIPKKAASKT,False
6,BRD4-213,12,NUTM1-203,2,KGHPGREQKK,,VTLGPGPDCL,False
7,BRD4-213,13,NUTM1-203,3,LNQHAVVSPP,A,SALPGPDMSM,False
8,BRD4-213,13,NUTM1-203,6,LNQHAVVSPP,V,YIPKKAASKT,False
9,BRD4-213,14,NUTM1-203,3,RHHKSDPYST,A,SALPGPDMSM,False


In [229]:
df.to_csv("nutm1-fusion-sequences.csv", index=True)

In [230]:
df_short.to_csv("nutm1-fusion-sequences-compact.csv", index=True)

In [231]:
name_to_transcript["BRD4"].protein_sequence

'MSAESGPGTRLRNLPVMGDGLETSQMSTTQAQAQPQPANAASTNPPPPETSNPNKPKRQTNQLQYLLRVVLKTLWKHQFAWPFQQPVDAVKLNLPDYYKIIKTPMDMGTIKKRLENNYYWNAQECIQDFNTMFTNCYIYNKPGDDIVLMAEALEKLFLQKINELPTEETEIMIVQAKGRGRGRKETGTAKPGVSTVPNTTQASTPPQTQTPQPNPPPVQATPHPFPAVTPDLIVQTPVMTVVPPQPLQTPPPVPPQPQPPPAPAPQPVQSHPPIIAATPQPVKTKKGVKRKADTTTPTTIDPIHEPPSLPPEPKTTKLGQRRESSRPVKPPKKDVPDSQQHPAPEKSSKVSEQLKCCSGILKEMFAKKHAAYAWPFYKPVDVEALGLHDYCDIIKHPMDMSTIKSKLEAREYRDAQEFGADVRLMFSNCYKYNPPDHEVVAMARKLQDVFEMRFAKMPDEPEEPVVAVSSPAVPPPTKVVAPPSSSDSSSDSSSDSDSSTDDSEEERAQRLAELQEQLKAVHEQLAALSQPQQNKPKKKEKDKKEKKKEKHKRKEEVEENKKSKAKEPPPKKTKKNNSSNSNVSKKEPAPMKSKPPPTYESEEEDKCKPMSYEEKRQLSLDINKLPGEKLGRVVHIIQSREPSLKNSNPDEIEIDFETLKPSTLRELERYVTSCLRKKRKPQAEKVDVIAGSSKMKGFSSSESESSSESSSSDSEDSETEMAPKSKKKGHPGREQKKHHHHHHQQMQQAPAPVPQQPPPPPQQPPPPPPPQQQQQPPPPPPPPSMPQQAAPAMKSSPPPFIATQVPVLEPQLPGSVFDPIGHFTQPILHLPQPELPPHLPQPPEHSTPPHLNQHAVVSPPALHNALPQQPSRPSNRAAALPPKPARPPAVSPALTQTPLLPQPPMAQPPQVLLEDEEPPAPPLTSMQMQLYLQQLQKVQPPTPLLPSVKVQSQPPPPLPPPPHPSVQQQLQQQPPPPPPPQPQPPPQQQHQPPPRPVHL