In [None]:
# Generic imports:
%matplotlib inline
import glob
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import math
import random
import re
import os

# Machine learning/Stats imports:
import keras
import tensorflow as tf
from sklearn.decomposition import PCA
from keras.utils.vis_utils import model_to_dot, plot_model

os.chdir('/home/matsen/Downloads/repos/vampire/')
import vampire
import vampire.xcr_vector_conversion as conversion
import vampire.tcr_vae as tcr_vae
import vampire.germline_cdr3_aa_tensor as aa_tensor
import vampire.models as models
import vampire.common as common
import vampire.preprocess_adaptive as preprocess

import importlib
importlib.reload(vampire)

def model_to_svg(model, path):
    with open(path, 'wb') as fp:
        fp.write(model_to_dot(model).create(prog='dot', format='svg'))

### Real data

In [None]:
df = preprocess.apply_all_filters(
    preprocess.read_adaptive_tsv('vampire/pipe_main/sample_data/02-0249_TCRB.4000.tsv.bz2'))

print(df.iloc[range(1000, 1005), :].to_html(index=False))

### `basic` model _before training_

In [None]:
d = tcr_vae.TCRVAE.default_params()
d['model'] = 'basic'
v = tcr_vae.TCRVAE(d)
model_to_svg(v.decoder, '/home/matsen/Downloads/basic_decoder.svg')
print(v.generate(5).to_html(index=False))

### `count_match` model _before training_

In [None]:
d = tcr_vae.TCRVAE.default_params()
d['model'] = 'count_match'
v = tcr_vae.TCRVAE(d)
model_to_svg(v.decoder, '/home/matsen/Downloads/count_match_decoder.svg')
print(v.generate(5).to_html(index=False))

In [None]:
plot_model(v.vae, show_shapes=True, to_file='/home/matsen/Downloads/model.png')

In [None]:
d = tcr_vae.TCRVAE.default_params()
d['model'] = 'count_match'
v = tcr_vae.TCRVAE(d)

model_to_svg(v.vae, '/home/matsen/Downloads/count_match.svg')

### Latent space visualization

In [None]:
def add_pcs(v, df):
    """
    Add principal component information to a copy TCR data frame.
    """
    z_mean,_ = v.encode(conversion.unpadded_tcrbs_to_onehot(df, v.params['max_cdr3_len']))
    pca = PCA(n_components=2)
    pca.fit(z_mean)
    z_mean_pcs = pca.transform(z_mean)
    df = pd.DataFrame(df)
    df['pc_1'] = z_mean_pcs[:, 0]
    df['pc_2'] = z_mean_pcs[:, 1]
    return df

In [None]:
os.chdir('/home/matsen/Downloads/repos/vampire/vampire/_ignore/plotting/2019-02-08-deneuter-train/')
v = tcr_vae.TCRVAE.of_directory('deneuter-2019-02-07.train/0.75/basic/')

df = pd.read_csv('merged.agg.csv.bz2')
add_pcs(v, df).to_csv('pcs.csv', index=False)

In [None]:
df_topgenes = df.loc[(df['v_gene'] == 'TCRBV30-01') & (df['j_gene'] == 'TCRBJ01-02'),]
add_pcs(v, df_topgenes).to_csv('pcs_topgenes.csv', index=False)