In [None]:
# if using R, run before starting the session
if(!require(BiocManager)) install.packages("BiocManager")
BiocManager::install("OmnipathR", update = FALSE)

# Task 2 programmatic access solution

In [None]:
### define function to retrieve retrieve a sequence from UniProt
get_uniprot_sequence <- function(protein_id) {

  # create API URL
  api_url <- paste0("https://rest.uniprot.org/uniprotkb/", protein_id, ".txt")

  # define the keep and sequence variables
  keep <- FALSE
  sequence <- ""

  # read lines in the entry text file
  for(line in readLines(api_url)) {
    
    # if the line starts with //, do not keep sequence
    if(startsWith(line, "//")) {
      keep <- FALSE
    }

    # if the keep variable is true, then store sequence
    if(keep) {
      sequence <- paste0(sequence, line)
    }

    # if the line starts with the SQ letters, keep sequence in the next iteration
    if(startsWith(line, "SQ")) {
      keep <- TRUE
    }

  }

  # remove white spaces from the sequence
  sequence <- gsub(" ", "", sequence)

  return(sequence)

}

### apply function
# iterate over accession IDs and retrieve their sequences
accession_list = c("P04637", "P40763", "Q92630", "P00533", "Q9BXS6")
for(accession in accession_list) {
  print(get_uniprot_sequence(accession))
}

[1] "MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGPDEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD"
[1] "MAQWNQLQQLDTRYLEQLHQLYSDSFPMELRQFLAPWIESQDWAYAASKESHATLVFHNLLGEIDQQYSRFLQESNVLYQHNLRRIKQFLQSRYLEKPMEIARIVARCLWEESRLLQTAATAAQQGGQANHPTAAVVTEKQQMLEQHLQDVRKRVQDLEQKMKVVENLQDDFDFNYKTLKSQGDMQDLNGNNQSVTRQKMQQLEQMLTALDQMRRSIVSELAGLLSAMEYVQKTLTDEELADWKRRQQIACIGGPPNICLDRLENWITSLAESQLQTRQQIKKLEELQQKVSYKGDPIVQHRPMLEERIVELFRNLMKSAFVVERQPCMPMHPDRPLVIKTGVQFTTKVRLLVKFPELNYQLKIKVCIDKDSGDVAALRGSRKFNILGTNTKVMNMEESNNGSLSAEFKHLTLREQRCGNGGRANCDASLIVTEELHLITFETEVYHQGLKIDLETHSLPVVVISNICQMPNAWASILWYNMLTNNPKNVNFFTKPPIGTWDQVAEVLSWQFSSTTKRGLSIEQLTTLAEKLLGPGVNYSGCQITWAKFCKENMAGKGFSFWVWLDNIIDLVKKYILALWNEGYIMGFISKERER

# Task 3 programmatic access solution

In [3]:
library(OmnipathR)

# define interesting proteins
interesting_proteins <- c("P04637", "P40763", "Q92630", "P00533", "Q9BXS6")

# retrieve all interactions in omnipath
all_interactions <- OmnipathR::import_all_interactions()

# subset interactions to those where the source or target is within our selection
interesting_interactions <- subset(all_interactions, source %in% interesting_proteins | target %in% interesting_proteins)

# show the head of resulting data frame
head(interesting_interactions[, c("source", "target", "type")])

source,target,type
<chr>,<chr>,<chr>
P00533,Q8NET8,post_translational
P04637,O43663,post_translational
P04637,P18847,post_translational
P18847,P04637,post_translational
P04198,P04637,post_translational
P23468,P40763,post_translational
