In [None]:
# @title Run

!pip install -q logomaker
!pip install -q "python-benedict[all]"

from benedict import benedict
import pandas as pd
from collections import defaultdict
from typing import List
import logomaker, requests, contextlib
from IPython.display import display, HTML

# @markdown -----------------------------
# @markdown Uniprot ID?
# @markdown Go to [Uniprot](https://www.uniprot.org/)
# @markdown search your virus
# @markdown and copy the alphanumeric code
# @markdown `Q66478` is A71 for example
uniprot_accession = '' # @param {type:"string"}

# @markdown <-- Press play to run this after inputting your value

# @markdown You will be asked to sign into Google, Colab is by Google.
# @markdown This runs on Google's servers.


# ## Retrieve
response: requests.Response = requests.get(f'https://rest.uniprot.org/uniprotkb/{uniprot_accession}.json')
response.raise_for_status()
data = response.json()
bata = benedict(data)
name: str = 'protein'
with contextlib.suppress(AttributeError):
  name = bata.proteinDescription.recommendedName.fullName.value
print(f'Downloaded {name}')

# ## Extract
sites = defaultdict(list)
for feature in bata.features:
    if feature.type == 'Site':
        sites[feature.description].append((feature.location.start.value,
                                           feature.location.end.value))
print(f'Three {len(sites)} sites definitions are present: One or more may be relevant')

# ## Display
for site_name in sites:
  cuts: List[int] = [c[0] for c in sites[site_name]]
  seq:str = bata.sequence.value
  cutseqs: List[str] = [seq[c-10:c+10] for c in cuts]
  counts: pd.DataFrame = logomaker.alignment_to_matrix(cutseqs)
  logo = logomaker.Logo(counts, color_scheme='chemistry')
  logo.ax.set_title(site_name)