In [1]:
import base64
import sys

import ipywidgets as widgets
import matplotlib.pyplot as plt
from IPython.core.display import HTML

import random_proteome as randp
AMINO_ACID_LST = randp.AMINO_ACID_LST

# Random proteome generator 🎲

This app generates a random proteome based on the proteome input as a template.

The random and template proteomes share the following properties:

- Same number of proteins.
- Same protein size distribution.
- Same amino acid distribution.

In [2]:
upload_button = widgets.FileUpload(
    button_style="info",
    accept="",
    description="Upload template proteome",
    layout={"width": "250px"},
    multiple=False  # True to accept multiple files upload else False
)

output_log = widgets.Output(layout={'border': '1px solid #e5e8e8', 
                                    'padding': '3px',
                                    'width': '505px',
                                    'color': '#515a5a'})

with output_log:
    print("🔼 Click on the button above to upload fasta file.")

In [3]:
template="template-proteome.fasta"
@output_log.capture(clear_output=True)
def on_file_upload(change):
    with output_log:
        if upload_button.value:
            [uploaded_file] = upload_button.value
            with open(template, "wb") as fp:
                fp.write(upload_button.value[uploaded_file]["content"])
            print("File uploaded succesfuly!")
            print("Now click on the button 'Create random proteome' 🔼")

upload_button.observe(on_file_upload, "value")

In [4]:
process_button = widgets.Button(description="Generate random proteome",
                                layout={"width": "250px"},
                                button_style="info",)

@output_log.capture(clear_output=True)
def on_click_process_button(b):
    if not upload_button.value:
        print("🤨 Upload file first!")
        return 1
    PROTEIN_NAME_LST, PROTEIN_LEN_LST, PROTEOME_SEQ = randp.read_template_proteome(template)
    print(f"Read {len(PROTEIN_NAME_LST)} proteins.")
    print(f"Found {len(PROTEOME_SEQ)} amino acids.")
    PROTEOME_AMINO_ACID_DISTRIBUTION = randp.get_amino_acid_proportion(PROTEOME_SEQ, 
                                                                       randp.AMINO_ACID_LST)
    shuffled_proteome_sequence = randp.shuffle_sequence(PROTEOME_SEQ)
    print("\n🦠 Building random proteome...")
    random_protein_seq_lst = randp.create_random_proteins_from_proteome(
                                                                shuffled_proteome_sequence,
                                                                PROTEIN_LEN_LST)
    fasta_filename = "random_proteome.fasta"
    randp.write_fasta(random_protein_seq_lst, fasta_filename)
    print(f"{len(random_protein_seq_lst)} proteins created!")
    print("\n📊 Computing amino acid distribution probability...")
    tsv_filename = "random_proteome.tsv"
    randp.write_distribution(random_protein_seq_lst, 
                             randp.AMINO_ACID_LST, 
                             tsv_filename, 
                             ref_distribution=PROTEOME_AMINO_ACID_DISTRIBUTION)
    print(f"Saved!\n")

    display(HTML(f"<a download='{fasta_filename}' href='../files/{fasta_filename}'>"
                  "Click here to download the random proteome (.fasta file)</a>"))
    display(HTML(f"<a download='{tsv_filename}' href='../files/{tsv_filename}'>"
                  "Click here to download amino acid distribution of the random proteome (.tsv file)</a>"))

    # For the record, example to download the file through
    # a base64 string inside the HTML href tag.
    #fasta_data = open(fasta_filename, "r").read().encode("ascii")
    #fasta_data_encoded = base64.b64encode(fasta_data)
    #fasta_data_clean = str(fasta_data_encoded).replace("'", "")[1:]
    #display(HTML(f"<a download='{fasta_filename}' href='data:text/plain;charset=utf-8;base64,{fasta_data_clean}'>Click here to download random proteome</a>"))
        
process_button.on_click(on_click_process_button)

In [5]:
display(widgets.HBox([upload_button, process_button]))
display(output_log)

HBox(children=(FileUpload(value={}, button_style='info', description='Upload template proteome', layout=Layout…

Output(layout=Layout(border='1px solid #e5e8e8', padding='3px', width='505px'))