# Quick start for binary classification

Quick start for binary classification

In [None]:
# bash generate_data_binary.sh

In [None]:
import os
import gzip

import numpy as np
import scipy.stats

import skimage.transform
from keras import models, layers, activations, optimizers, regularizers
from keras.utils import plot_model
from keras.models import load_model

import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import pymc3 # this will be removed
import pydot # optional

In [None]:
%run -i ImaGene.py

First iteration.

Read simulations and store them into object.

In [None]:
myfile = ImaFile(simulations_folder='/home/mfumagal/Data/ImaGene.binary/Simulations1.Epoch3', nr_samples=128, model_name='Marth-3epoch-CEU')

In [None]:
mypop = myfile.read_simulations(parameter_name='selection_coeff_hetero', max_nrepl=2000)

Have a look at the data and the allele frequency for the selected allele.

In [None]:
mypop.summary()

In [None]:
freqs = calculate_allele_frequency(mypop, 0.5)
plt.scatter(mypop.targets, freqs, marker='o')
plt.xlabel('Target')
plt.ylabel('Allele frequency')

Manipulate object to: major/minor polarisation, sort rows/cols, resize, convert to float.

In [None]:
mypop.majorminor()
mypop.filter_freq(0.01)
mypop.sort('rows_freq')
mypop.sort('cols_freq')
mypop.resize((128, 128))
mypop.convert(verbose=True)

Plot one image per class as an illustration and check the new dimensions.

In [None]:
for sel in mypop.classes:
    print(sel)
    mypop.plot(np.where(mypop.targets == sel)[0][0])
mypop.summary()

Select only images corresponding to specified classes (e.g. 0 and 300) and check new dimensions.

In [None]:
mypop.classes = np.array([0,300])
classes_idx = get_index_classes(mypop.targets, mypop.classes)
len(classes_idx)

In [None]:
mypop.subset(classes_idx)
mypop.summary()

Randomly shuffle images.

In [None]:
rnd_idx = get_index_random(mypop)
mypop.subset(rnd_idx)

Convert targets to the appropriate format for keras (vectorise them).

In [None]:
mypop.targets = to_binary(mypop.targets)

The data is ready to be used for the classification.
You can save it.

In [None]:
import _pickle as pickle

In [None]:
# to save, create folder
os.mkdir('/home/mfumagal/Data/ImaGene.binary/Images1.Epoch3')

In [None]:
# save
with open('/home/mfumagal/Data/ImaGene.binary/Images1.Epoch3/mypop','wb') as fp:
    pickle.dump(mypop, fp)

In [None]:
# to load:
with open('/home/mfumagal/Data/ImaGene.binary/Images1.Epoch3/mypop','rb') as fp:
    mypop = pickle.load(fp)

Build the model.

In [None]:
%run -i ImaGene.py

In [None]:
mynet = ImaNet()

In [None]:
mynet.model = models.Sequential([
                        layers.Conv2D(filters=32, kernel_size=(3,3), strides=(1,1), activation='relu', padding='valid', input_shape=mypop.data.shape[1:4]),
                        layers.MaxPooling2D(pool_size=(2,2)),
                        layers.Dropout(rate=0.5),
                        layers.Conv2D(filters=32, kernel_size=(3,3), strides=(1,1), activation='relu', padding='valid'),
                        layers.MaxPooling2D(pool_size=(2,2)),
                        layers.Dropout(rate=0.5),
                        layers.Conv2D(filters=32, kernel_size=(3,3), strides=(1,1), activation='relu', padding='valid'),
                        layers.MaxPooling2D(pool_size=(2,2)),
                        layers.Dropout(rate=0.5),
                        layers.Flatten(),
                        #layers.Dense(units=128, activation='relu'),
                        layers.Dense(units=1, activation='sigmoid')])

In [None]:
mynet.model.compile(optimizer='rmsprop',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

In [None]:
mynet.plot_net()

In [None]:
mynet.model.fit(mypop.data, mypop.targets, batch_size=64, epochs=1, verbose=1, validation_split=0.10)

In [None]:
mynet.history = {}

In [None]:
mynet.history.update(history.history)

In [None]:
mynet.history

In [None]:
mynet.model.save('net.h5')

In [None]:
mynet.plot_train()

Iterate across all repetitions (i-th) and models (e-th, if relevant).
The first one initialises the model.
The last one is used for testing.

In [1]:
import os
import gzip

import numpy as np
import scipy.stats

import skimage.transform
from keras import models, layers, activations, optimizers, regularizers
from keras.utils import plot_model
from keras.models import load_model

import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import pymc3 # this will be removed
import pydot # optional

Using TensorFlow backend.


In [2]:
%run -i ImaGene.py

In [4]:
import _pickle as pickle

In [None]:
import pathlib


%run -i ImaGene.py
# get_ipython().run_line_magic('run', '-i /rds/general/user/mfumagal/home/Software/ImaGene/ImaGene.py')

for s in [100, 200, 300, 400]:

    for m in ['None', 'Rows', 'Cols', 'RowsCols']:

        for e in [1, 2, 3]:

            folder = '/home/mfumagal/Data/ImaGene/Binary/Results/Epoch' + str(e) + '/S' + str(s) + '/' + str(m)
            print(folder)
            pathlib.Path(folder).mkdir(parents=True, exist_ok=True) 
                       
            i = 0
            while i < 10:

                i += 1
                print(str(s) + str(m) + str(e) + str(i))

                myfile = ImaFile(simulations_folder='/home/mfumagal/Data/ImaGene/Binary/Simulations' + str(i) 
                         + '.Epoch' + str(e), nr_samples=128, model_name='Marth-' + str(e) + 'epoch-CEU')
                mypop = myfile.read_simulations(parameter_name='selection_coeff_hetero', max_nrepl=20)
    
                mypop.majorminor()
                mypop.filter_freq(0.01)
            
                if m == 'Rows':
                    mypop.sort('rows_freq')
                if m == 'Cols':
                    mypop.sort('cols_freq')
                if m == 'RowsCols':
                    mypop.sort('rows_freq')
                    mypop.sort('cols_freq')
                
                mypop.resize((128, 128))
                mypop.convert()
    
                mypop.classes = np.array([0,int(s)])
                classes_idx = get_index_classes(mypop.targets, mypop.classes)
                mypop.subset(classes_idx)
    
                rnd_idx = get_index_random(mypop)
                mypop.subset(rnd_idx)
    
                mypop.targets = to_binary(mypop.targets)
    
                if i == 1:
                    mynet = ImaNet(name='CPx2')
                    mynet.model = models.Sequential([
                        layers.Conv2D(filters=32, kernel_size=(3,3), strides=(1,1), activation='relu', kernel_regularizer=regularizers.l1_l2(l1=0.01, l2=0.01), padding='valid', input_shape=mypop.data.shape[1:4]),
                        layers.MaxPooling2D(pool_size=(2,2)),
                        #layers.Dropout(rate=0.5),
                        layers.Conv2D(filters=32, kernel_size=(3,3), strides=(1,1), activation='relu', kernel_regularizer=regularizers.l1_l2(l1=0.01, l2=0.01), padding='valid'),
                        layers.MaxPooling2D(pool_size=(2,2)),
                        #layers.Dropout(rate=0.5),
                        #layers.Conv2D(filters=32, kernel_size=(3,3), strides=(1,1), activation='relu', padding='valid'),
                        #layers.MaxPooling2D(pool_size=(2,2)),
                        #layers.Dropout(rate=0.5),
                        layers.Flatten(),
                        layers.Dense(units=64, activation='relu'),
                        layers.Dense(units=1, activation='sigmoid')])
                    mynet.model.compile(optimizer='rmsprop',
                        loss='binary_crossentropy',
                        metrics=['accuracy'])
                    mynet.plot_net(summary=True, file=folder + '/net.png')
                else:
                    model = load_model(folder + '/net.h5')
    
                if i < 10:
                    score = mynet.model.fit(mypop.data, mypop.targets, batch_size=32, epochs=1, verbose=0, validation_split=0.10)
                    mynet.update_scores(score)
                    mynet.model.save(folder + '/net.h5')
                else:
                    mynet.test = mynet.model.evaluate(mypop.data, mypop.targets, batch_size=None, verbose=0)
                    print(mynet.test)

            # save the latest data (testing data)
            with open(folder + '/mypop','wb') as fp:
                pickle.dump(mypop, fp)
            # save the latest network
            with open(folder + '/mynet','wb') as fp:
                pickle.dump(mynet, fp)
        
            del mypop
            del mynet


Read results.

In [None]:
for e in [1, 2, 3]:

    folder = '/home/mfumagal/Data/ImaGene/Binary/Results/Epoch' + str(e)
    
    with open(folder + '/mypop','rb') as fp:
        mypop = pickle.load(fp)

    with open(folder + '/mynet','rb') as fp:
        mynet = pickle.load(fp)
        
    

In [5]:
e=3
s=300
m='Rows'

folder = '/home/mfumagal/Data/ImaGene/Binary/Results/Epoch' + str(e) + '/S' + str(s) + '/' + str(m)
    
with open(folder + '/mypop','rb') as fp:
    mypop = pickle.load(fp)

with open(folder + '/mynet','rb') as fp:
    mynet = pickle.load(fp)  


ResourceExhaustedError: OOM when allocating tensor with shape[1] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[node dense_6/bias/Assign (defined at /home/mfumagal/anaconda3/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:402)  = Assign[T=DT_FLOAT, _grappler_relax_allocator_constraints=true, use_locking=true, validate_shape=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](dense_6/bias, dense_6/Const)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.


Caused by op 'dense_6/bias/Assign', defined at:
  File "/home/mfumagal/anaconda3/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/mfumagal/anaconda3/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 505, in start
    self.io_loop.start()
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "/home/mfumagal/anaconda3/lib/python3.6/asyncio/base_events.py", line 438, in run_forever
    self._run_once()
  File "/home/mfumagal/anaconda3/lib/python3.6/asyncio/base_events.py", line 1451, in _run_once
    handle._run()
  File "/home/mfumagal/anaconda3/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/tornado/ioloop.py", line 758, in _run_callback
    ret = callback()
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/tornado/gen.py", line 1233, in inner
    self.run()
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/tornado/gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 357, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 267, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 534, in execute_request
    user_expressions, allow_stdin,
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/tornado/gen.py", line 326, in wrapper
    yielded = next(result)
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2819, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2845, in _run_cell
    return runner(coro)
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/IPython/core/async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3020, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3185, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3267, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-5-17bdc61d63f2>", line 11, in <module>
    mynet = pickle.load(fp)
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/keras/engine/network.py", line 1266, in __setstate__
    model = saving.unpickle_model(state)
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/keras/engine/saving.py", line 435, in unpickle_model
    return _deserialize_model(f)
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/keras/engine/saving.py", line 225, in _deserialize_model
    model = model_from_config(model_config, custom_objects=custom_objects)
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/keras/engine/saving.py", line 458, in model_from_config
    return deserialize(config, custom_objects=custom_objects)
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/keras/layers/__init__.py", line 55, in deserialize
    printable_module_name='layer')
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/keras/utils/generic_utils.py", line 145, in deserialize_keras_object
    list(custom_objects.items())))
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/keras/engine/sequential.py", line 301, in from_config
    model.add(layer)
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/keras/engine/sequential.py", line 181, in add
    output_tensor = layer(self.outputs[0])
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/keras/engine/base_layer.py", line 431, in __call__
    self.build(unpack_singleton(input_shapes))
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/keras/layers/core.py", line 872, in build
    constraint=self.bias_constraint)
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/keras/legacy/interfaces.py", line 91, in wrapper
    return func(*args, **kwargs)
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/keras/engine/base_layer.py", line 252, in add_weight
    constraint=constraint)
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 402, in variable
    v = tf.Variable(value, dtype=tf.as_dtype(dtype), name=name)
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/variables.py", line 183, in __call__
    return cls._variable_v1_call(*args, **kwargs)
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/variables.py", line 146, in _variable_v1_call
    aggregation=aggregation)
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/variables.py", line 125, in <lambda>
    previous_getter = lambda **kwargs: default_variable_creator(None, **kwargs)
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py", line 2444, in default_variable_creator
    expected_shape=expected_shape, import_scope=import_scope)
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/variables.py", line 187, in __call__
    return super(VariableMetaclass, cls).__call__(*args, **kwargs)
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/variables.py", line 1329, in __init__
    constraint=constraint)
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/variables.py", line 1481, in _init_from_args
    validate_shape=validate_shape).op
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/state_ops.py", line 221, in assign
    validate_shape=validate_shape)
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gen_state_ops.py", line 61, in assign
    use_locking=use_locking, name=name)
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 488, in new_func
    return func(*args, **kwargs)
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3274, in create_op
    op_def=op_def)
  File "/home/mfumagal/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1770, in __init__
    self._traceback = tf_stack.extract_stack()

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[1] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[node dense_6/bias/Assign (defined at /home/mfumagal/anaconda3/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:402)  = Assign[T=DT_FLOAT, _grappler_relax_allocator_constraints=true, use_locking=true, validate_shape=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](dense_6/bias, dense_6/Const)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.



In [None]:
mynet.test
mynet.plot_train()
mynet.plot_cm(mypop)
#mynet.plot_scatter(mypop)

In [None]:
os.system('mkdir -p /home/mfumagal/Data/ImaGene/Binary/Results/Epoch3')

os.system('mv net.h5 /home/mfumagal/Data/ImaGene/Binary/Results/Epoch3/.')
os.system('mv net.png /home/mfumagal/Data/ImaGene/Binary/Results/Epoch3/.')

with open('/home/mfumagal/Data/ImaGene/Binary/Results/Epoch3/mypop','wb') as fp:
    pickle.dump(mypop, fp)
    
with open('/home/mfumagal/Data/ImaGene/Binary/Results/Epoch3/mynet','wb') as fp:
    pickle.dump(mynet, fp)

In [None]:
os.system('mkdir -p /home/mfumagal/Data/ImaGene/Binary/Results/Epoch3')

In [None]:
mynet.plot_train()

In [None]:
mynet.plot_cm(mypop)

In [None]:
mynet.plot_scatter(mypop)

In [None]:
e = 1
folder = '/home/mfumagal/Data/ImaGene/Binary/Results/Epoch' + str(e)
print(folder)
subprocess.call(['mkdir -p', folder], shell=True)