# Tutorial

Simulate genes under various levels of selection coefficient, with different demographic models (1,2,3-epoch) and with selection time between 15kya and 25 kya. 

In [None]:
# bash generate_training_data.sh

In [1]:
import os
import gzip

import numpy as np
import scipy.stats

import skimage.transform
from keras import models, layers, optimizers, regularizers
from keras.utils import to_categorical, plot_model

import matplotlib.pyplot as plt
import pymc3
import pydot

Using TensorFlow backend.


In [2]:
%run -i ImaGene.py

For the first analysis, we seek to understand the impact of soring images on the quantification. 
We use 1-epoch model for this first analysis.
Results will be visualised by linear regression plots.

Read simulations and store in object.

In [3]:
myfile = ImaFile(simulations_folder='/home/mfumagal/Data/ImaGene/Simulations_Epoch1', nr_samples=128, model_name='Marth-1epoch-CEU')

In [5]:
import time
localtime = time.localtime(time.time())
print("Local current time :", localtime)
localtime = time.localtime(time.time())
print("Local current time :", localtime)

Local current time : time.struct_time(tm_year=2018, tm_mon=12, tm_mday=20, tm_hour=12, tm_min=5, tm_sec=51, tm_wday=3, tm_yday=354, tm_isdst=0)
Local current time : time.struct_time(tm_year=2018, tm_mon=12, tm_mday=20, tm_hour=12, tm_min=5, tm_sec=51, tm_wday=3, tm_yday=354, tm_isdst=0)


In [6]:
mypop = myfile.read_simulations(parameter_name='selection_coeff_hetero')

Local current time : time.struct_time(tm_year=2018, tm_mon=12, tm_mday=20, tm_hour=12, tm_min=6, tm_sec=31, tm_wday=3, tm_yday=354, tm_isdst=0)
/home/mfumagal/Data/ImaGene/Simulations_Epoch1/msms..280...0150..txt.gz : 2000
/home/mfumagal/Data/ImaGene/Simulations_Epoch1/msms..130...0175..txt.gz : 2000
/home/mfumagal/Data/ImaGene/Simulations_Epoch1/msms..140...0250..txt.gz : 2000
/home/mfumagal/Data/ImaGene/Simulations_Epoch1/msms..300...0250..txt.gz : 2000
/home/mfumagal/Data/ImaGene/Simulations_Epoch1/msms..200...0225..txt.gz : 2000
/home/mfumagal/Data/ImaGene/Simulations_Epoch1/msms..210...0225..txt.gz : 2000
/home/mfumagal/Data/ImaGene/Simulations_Epoch1/msms..240...0200..txt.gz : 2000
/home/mfumagal/Data/ImaGene/Simulations_Epoch1/msms..90...0200..txt.gz : 2000
/home/mfumagal/Data/ImaGene/Simulations_Epoch1/msms..260...0225..txt.gz : 2000
/home/mfumagal/Data/ImaGene/Simulations_Epoch1/msms..210...0250..txt.gz : 2000
/home/mfumagal/Data/ImaGene/Simulations_Epoch1/msms..230...0200..tx

/home/mfumagal/Data/ImaGene/Simulations_Epoch1/msms..150...0175..txt.gz : 2000
/home/mfumagal/Data/ImaGene/Simulations_Epoch1/msms..10...0150..txt.gz : 2000
/home/mfumagal/Data/ImaGene/Simulations_Epoch1/msms..370...0150..txt.gz : 2000
/home/mfumagal/Data/ImaGene/Simulations_Epoch1/msms..30...0175..txt.gz : 2000
/home/mfumagal/Data/ImaGene/Simulations_Epoch1/msms..140...0200..txt.gz : 2000
/home/mfumagal/Data/ImaGene/Simulations_Epoch1/msms..320...0200..txt.gz : 2000
/home/mfumagal/Data/ImaGene/Simulations_Epoch1/msms..240...0225..txt.gz : 2000
/home/mfumagal/Data/ImaGene/Simulations_Epoch1/msms..230...0150..txt.gz : 2000
/home/mfumagal/Data/ImaGene/Simulations_Epoch1/msms..130...0250..txt.gz : 2000
/home/mfumagal/Data/ImaGene/Simulations_Epoch1/msms..350...0150..txt.gz : 2000
/home/mfumagal/Data/ImaGene/Simulations_Epoch1/msms..140...0225..txt.gz : 2000
/home/mfumagal/Data/ImaGene/Simulations_Epoch1/msms..390...0150..txt.gz : 2000
/home/mfumagal/Data/ImaGene/Simulations_Epoch1/msms..3

(optional) Save description of files in .json file

In [7]:
import json

with open('/home/mfumagal/Data/ImaGene/simulations_Epoch1.json', 'w') as fp:
    json.dump(mypop.description, fp, sort_keys=True, allow_nan=False, indent=0)

(optional) Save the original data object.

In [8]:
import _pickle as pickle

In [None]:
# to save
#os.mkdir('/home/mfumagal/Data/ImaGene/Sorting_effect_Epoch1')
with open('/home/mfumagal/Data/ImaGene/Sorting_effect_Epoch1/mypop','wb') as fp:
    pickle.dump(mypop, fp)

In [None]:
# to load:
with open('/home/mfumagal/Data/ImaGene/Sorting_effect_Epoch1/mypop','rb') as fp:
    mypop = pickle.load(fp)

Explore the object.

In [1]:
mypop.summary()

NameError: name 'mypop' is not defined

In [None]:
print(len(mypop.data))
print(mypop.data[0].shape)
print(mypop.data[0].dtype)

Plot one image for no selection, at 1% and at 2%

In [None]:
mypop.plot(0)
print(mypop.description[0])

In [None]:
idx = np.where(mypop.target==0)[0][0]
mypop.plot(idx)
print(mypop.description[idx])
mypop.data[idx].shape

In [None]:
idx = np.where(mypop.target==400)[0][0]
mypop.plot(idx)
print(mypop.description[idx])
mypop.data[idx].shape

Switch to major/minor allele polarisation.

In [None]:
mypop.majorminor()
mypop.plot(0)

In [None]:
# optional
with open('/home/mfumagal/Data/ImaGene/Sorting_effect_Epoch1/mypop_majorminor','wb') as fp:
    pickle.dump(mypop, fp)

Filter out rare variants.

In [None]:
mypop.filter_freq(0.01)
mypop.plot(0)
mypop.data[0].shape

In [None]:
mypop.summary()

In [None]:
# optional
with open('/home/mfumagal/Data/ImaGene/Sorting_effect_Epoch1/mypop_majorminor_filtered','wb') as fp:
    pickle.dump(mypop, fp)

Sort images using different orderings.
Then resize to same dimensions 128x128.
Then convert them to float numpy arrays and shuffle using the same order.

In [None]:
shuffle_index = np.random.permutation(len(mypop.data))

In [None]:
## NONE
# load fresh object
with open('/home/mfumagal/Data/ImaGene/Sorting_effect_Epoch1/mypop_majorminor_filtered','rb') as fp:
    mypop = pickle.load(fp)
# sort and resize
mypop.resize((128, 128))
mypop.plot(0)
# convert and shuffle
mypop.convert()
mypop.shuffle(shuffle_index)
# save
with open('/home/mfumagal/Data/ImaGene/Sorting_effect_Epoch1/mypop_majorminor_filtered_sortednone','wb') as fp:
    pickle.dump(mypop, fp)

In [None]:
## ROWS FREQ
# load fresh object
with open('/home/mfumagal/Data/ImaGene/Sorting_effect_Epoch1/mypop_majorminor_filtered','rb') as fp:
    mypop = pickle.load(fp)
# sort and resize
mypop.sort('rows_freq')
mypop.resize((128, 128))
mypop.plot(0)
# convert and shuffle
mypop.convert()
mypop.shuffle(shuffle_index)
# save
with open('/home/mfumagal/Data/ImaGene/Sorting_effect_Epoch1/mypop_majorminor_filtered_sortedrowsfreq','wb') as fp:
    pickle.dump(mypop, fp)

In [None]:
## COLS FREQ
# load fresh object
with open('/home/mfumagal/Data/ImaGene/Sorting_effect_Epoch1/mypop_majorminor_filtered','rb') as fp:
    mypop = pickle.load(fp)
# sort and resize
mypop.sort('cols_freq')
mypop.resize((128, 128))
mypop.plot(0)
# convert and shuffle
mypop.convert()
mypop.shuffle(shuffle_index)
# save
with open('/home/mfumagal/Data/ImaGene/Sorting_effect_Epoch1/mypop_majorminor_filtered_sortedcolsfreq','wb') as fp:
    pickle.dump(mypop, fp)

In [None]:
## ROWS+COLS FREQ
# load fresh object
with open('/home/mfumagal/Data/ImaGene/Sorting_effect_Epoch1/mypop_majorminor_filtered','rb') as fp:
    mypop = pickle.load(fp)
# sort and resize
mypop.sort('rows_freq')
mypop.sort('cols_freq')
mypop.resize((128, 128))
mypop.plot(0)
# convert and shuffle
mypop.convert()
mypop.shuffle(shuffle_index)
# save
with open('/home/mfumagal/Data/ImaGene/Sorting_effect_Epoch1/mypop_majorminor_filtered_sortedrowscolsfreq','wb') as fp:
    pickle.dump(mypop, fp)

In [None]:
## ROWS+COLS DISTANCE
# load fresh object
with open('/home/mfumagal/Data/ImaGene/Sorting_effect_Epoch1/mypop_majorminor_filtered','rb') as fp:
    mypop = pickle.load(fp)
# sort and resize
mypop.sort('rows_distance_top')
mypop.sort('cols_distance_top')
mypop.resize((128, 128))
mypop.plot(0)
# convert and shuffle
mypop.convert()
mypop.shuffle(shuffle_index)
# save
with open('/home/mfumagal/Data/ImaGene/Sorting_effect_Epoch1/mypop_majorminor_filtered_sortedrowscolsdist','wb') as fp:
    pickle.dump(mypop, fp)

In [None]:
## ROWS DISTANCE
# load fresh object
with open('/home/mfumagal/Data/ImaGene/Sorting_effect_Epoch1/mypop_majorminor_filtered','rb') as fp:
    mypop = pickle.load(fp)
# sort and resize
mypop.sort('rows_distance_top')
mypop.resize((128, 128))
mypop.plot(0)
# convert and shuffle
mypop.convert()
mypop.shuffle(shuffle_index)
# save
with open('/home/mfumagal/Data/ImaGene/Sorting_effect_Epoch1/mypop_majorminor_filtered_sortedrowsdist','wb') as fp:
    pickle.dump(mypop, fp)

In [None]:
## COLS DISTANCE
# load fresh object
with open('/home/mfumagal/Data/ImaGene/Sorting_effect_Epoch1/mypop_majorminor_filtered','rb') as fp:
    mypop = pickle.load(fp)
# sort and resize
mypop.sort('cols_distance_top')
mypop.resize((128, 128))
mypop.plot(0)
# convert and shuffle
mypop.convert()
mypop.shuffle(shuffle_index)
# save
with open('/home/mfumagal/Data/ImaGene/Sorting_effect_Epoch1/mypop_majorminor_filtered_sortedcolsdist','wb') as fp:
    pickle.dump(mypop, fp)

In [None]:
## ROWS DISTANCE + COLS FREQ
# load fresh object
with open('/home/mfumagal/Data/ImaGene/Sorting_effect_Epoch1/mypop_majorminor_filtered','rb') as fp:
    mypop = pickle.load(fp)
# sort and resize
mypop.sort('rows_distance_top')
mypop.sort('cols_freq')
mypop.resize((128, 128))
mypop.plot(0)
# convert and shuffle
mypop.convert()
mypop.shuffle(shuffle_index)
# save
with open('/home/mfumagal/Data/ImaGene/Sorting_effect_Epoch1/mypop_majorminor_filtered_sortedrowsdistcolsfreq','wb') as fp:
    pickle.dump(mypop, fp)

Set classes and targets.

In [None]:
#
print(mypop.classes)
mypop.target = np.zeros(len(mypop.data), dtype='float32')
mypop.set_targets(sd=1)
mypop.target[0]

Build, compile, train and test a network.

In [None]:
# initiate an instance of ImaNet object
mynet = ImaNet(mypop)

In [None]:
# build the newtork, e.g. for multiclassification
mynet.net = models.Sequential()
mynet.net.add(layers.Conv2D(16, (3,3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(0.001), input_shape=mynet.input_shape))
mynet.net.add(layers.MaxPool2D((2,2)))
mynet.net.add(layers.Conv2D(32, (3,3), activation='relu', padding='same', kernel_regularizer=regularizers.l2(0.001), input_shape=mynet.input_shape))
mynet.net.add(layers.MaxPool2D((2,2)))
mynet.net.add(layers.Dropout(0.5))
mynet.net.add(layers.Flatten())
mynet.net.add(layers.Dense(64, activation = 'relu'))
mynet.net.add(layers.Dense(mynet.output_shape-1, activation = 'sigmoid'))

In [None]:
mynet.net.summary()
plot_model(mynet.net, to_file='Data/net.png')

In [None]:
mynet.net.compile(loss='binary_crossentropy', optimizer=optimizers.RMSprop(lr=0.001), metrics=['acc'])

In [None]:
nr_train = int(mynet.gene.data.shape[0] * (1 - mynet.notraining[0]))
nr_test = int(mynet.gene.data.shape[0]) - nr_train
print(nr_train, nr_test)

In [None]:
mynet.train(epochs=20, batch_size=128)

In [None]:
mynet.plot_train()

In [None]:
scores = mynet.test()
print(scores)

In [None]:
values = mynet.predict(gene=mypop.data[0:1])
print(mypop.classes[np.argmax(mypop.target[0])])
print(mypop.description[0])