In [None]:
import os
os.environ['THEANO_FLAGS'] = "'device=cpu'"
os.environ['KERAS_BACKEND'] = "tensorflow"

In [None]:
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import tensorflow as tf
import seaborn as sns

from mhcflurry.dataset import Dataset
from mhcflurry.peptide_encoding import indices_to_hotshot_encoding
from mhcflurry.regression_target import ic50_to_regression_target

In [None]:
file_to_explore="/root/.local/share/mhcflurry/2/class1_data/combined_human_class1_dataset.csv"
dataset = Dataset.from_csv(
        filename=file_to_explore,
        sep=",",
        peptide_column_name="peptide")

In [None]:
df = dataset.to_dataframe()
df.columns

In [None]:
df[df.species == 'human'].groupby('affinity').size().order().tail(10)

In [None]:
df_reduced = df[df.allele.isin(['HLA-A0201', 'HLA-A2301', 'HLA-A2402', 'HLA-A1101'])][['allele','affinity']].reset_index(drop=True)

In [None]:
sns.violinplot(x=df_reduced['allele'], y=np.log(df_reduced['affinity']))

In [None]:
scaled_affinity = ic50_to_regression_target(df_reduced['affinity'])
sns.boxplot(x=df_reduced['allele'], y=scaled_affinity)

In [None]:
df_reduced.groupby('allele').size()

In [None]:
df_kmers = dataset.kmer_index_encoding()
training_hotshot = indices_to_hotshot_encoding(df_kmers[0])
training_labels = ic50_to_regression_target(df_kmers[1])

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Activation

model = Sequential()
model.add(Dense(input_dim=189, output_dim=1))
model.add(Activation("sigmoid"))
model.compile(loss="mse", optimizer="rmsprop")

In [None]:
model.fit(training_hotshot, training_labels, nb_epoch=5, batch_size=1)