# Load Model into GPU

In [1]:
%load_ext autoreload
%autoreload 2
import torch
import esm
from Shared_Functions import *

In [2]:
model, alphabet = esm.pretrained.load_model_and_alphabet('esm2_t36_3B_UR50D')
model.eval()
batch_converter = alphabet.get_batch_converter()
device = torch.device("cuda")
if torch.cuda.is_available():
    model =  model.to(device)
    print("Transferred model to GPU")

# Download Reference Sequence and Embed

In [3]:
from Bio import Entrez
from Bio import SeqIO
Entrez.email = "sample@example.org"
handle = Entrez.efetch(db="nucleotide",
                       id="NC_045512.2",
                       rettype="gb",
                       retmode="gb")
whole_sequence = SeqIO.read(handle, "genbank")
model_layers = 36

In [6]:
dms_results = process_and_dms_sequence_genbank(whole_sequence.seq,whole_sequence,model,model_layers,device,batch_converter,alphabet,)
compressed_pickle('DMS/Results/sarscov2_all_dms',dms_results)

# Annotate Spike Table

In [None]:
dms_results=decompress_pickle('sarscov2_spike_dms.pbz2')
mutations_list = list(dms_results['S:0'].keys())
columns = ['label', 'semantic_score', 'grammaticality', 'relative_grammaticality', 'sequence_grammaticality', 'relative_sequence_grammaticality', 'probability']
table = []
for key in mutations_list:
    if key != 'Reference':
        row = pd.DataFrame([dms_results['S:0'][key].get(c) for c in columns]).T
        row.columns = columns
        table.append(row)
dms_s_table = pd.concat(table)
dms_s_table.to_csv('DMS/Results/Scores/sarscov2_s_dms_scores.csv',index=False)

In [None]:
dms_s_table = pd.read_csv('DMS/Results/Scores/sarscov2_s_dms_scores.csv')

dms_s_table['ref'] = dms_s_table.label.str[0]
dms_s_table['alt'] = dms_s_table.label.str[-1]
dms_s_table['position'] = dms_s_table.label.str[1:-1].astype(int)

#Keep Reference scores
reference_s_table = dms_s_table[dms_s_table.ref == dms_s_table.alt]
#Filter non mutations
dms_s_table = dms_s_table[dms_s_table.ref != dms_s_table.alt]


dms_s_table = dms_s_table.sort_values('semantic_score')
dms_s_table['semantic_rank'] = dms_s_table.reset_index().index.astype(int) + 1
dms_s_table = dms_s_table.sort_values('grammaticality')
dms_s_table['grammatical_rank'] =dms_s_table .reset_index().index.astype(int) + 1
dms_s_table['acquisition_priority'] = dms_s_table['semantic_rank'] + dms_s_table['grammatical_rank']

dms_s_table = dms_s_table.sort_values('sequence_grammaticality')
dms_s_table['sequence_grammatical_rank'] =dms_s_table.reset_index().index.astype(int) + 1
dms_s_table['sequence_acquisition_priority'] = dms_s_table['semantic_rank'] + dms_s_table['sequence_grammatical_rank']


In [None]:
dms_s_table.to_csv('DMS/Results/Scores/sarscov2_s_dms_scores_annotated.csv')