# Final stage

In [None]:
!mkdir predictions_final

In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 

from tensorflow.keras.losses import binary_crossentropy as bce
import tensorflow as tf
import numpy as np
from tensorflow.keras.models import load_model
import tcn
from utils import make_predictions_2strands

In [2]:
models = {}
for tf in ["GCM1", "MKX", "MSANTD1", "TPRX1", "ZFTA", 
           "MYPOP", "CREB3L3", "FIZ1", "ZNF500", "ZNF780B", 
           "ZNF831", "ZNF286B", "ZBTB47", "ZNF721", "SP140L", 
           "USF3", "ZBED2", "MYF6", "SALL3", "CAMTA1", 
           "ZNF367", "ZNF648", "ZNF518B", "ZBED5", "ZNF251", 
           "ZNF493", "ZNF20", "LEUTX", "PRDM13", "ZNF395"]:
    
    models[tf] = load_model(f"final_models/{tf}.keras", {"TCN": tcn.TCN})
    

### CHS

In [6]:
experiment = "CHS"
for tf in ["GCM1", "MKX", "MSANTD1", "TPRX1", 
           "ZFTA", "MYPOP", "CREB3L3", "FIZ1", 
           "ZNF500", "ZNF780B", "ZNF831", "ZNF286B", 
           "ZBTB47", "ZNF721", "SP140L", "USF3", 
           "ZBED2", "MYF6", "SALL3", "ZNF367", 
           "ZNF648", "ZNF518B", "ZBED5", "ZNF251", 
           "ZNF493", "ZNF20", "LEUTX", "PRDM13", "ZNF395"]:
    
    make_predictions_2strands(
        fout_name=f"predictions_final/{experiment}_{tf}.tsv",
        experiment=experiment,
        tags=[tf],
        columns=[0],
        model=models[tf],
        b_size=4096,
        total_lines=1735138,
        test_folder="test_final",
    )
    print(f"TF {tf} done")

TF GCM1 done
TF MKX done
TF MSANTD1 done
TF TPRX1 done
TF ZFTA done
TF MYPOP done
TF CREB3L3 done
TF FIZ1 done
TF ZNF500 done
TF ZNF780B done
TF ZNF831 done
TF ZNF286B done
TF ZBTB47 done
TF ZNF721 done
TF SP140L done
TF USF3 done
TF ZBED2 done
TF MYF6 done
TF SALL3 done
TF ZNF367 done
TF ZNF648 done
TF ZNF518B done
TF ZBED5 done
TF ZNF251 done
TF ZNF493 done
TF ZNF20 done
TF LEUTX done
TF PRDM13 done
TF ZNF395 done


In [7]:
%%bash
cd predictions_final/

cut -f1 CHS_GCM1.tsv > CHS_labels.txt
for i in CHS*.tsv; do
  cut -f2 $i > $i.cutted
done;

paste CHS_labels.txt CHS*.tsv.cutted > CHS_total.tsv
gzip -f CHS_total.tsv
rm CHS*.cutted CHS_labels.txt CHS*.tsv
cd ..

### GHTS

In [8]:
experiment = "GHTS"
for tf in ["GCM1", "MKX", "MSANTD1", "TPRX1", 
           "ZFTA", "MYPOP", "CREB3L3", "FIZ1", 
           "ZNF500", "ZNF780B", "ZNF831", "ZNF286B", 
           "ZBTB47", "ZNF721", "SP140L", "USF3", 
           "ZBED2", "MYF6", "SALL3", "CAMTA1", 
           "ZNF367", "ZNF648", "ZNF518B", "ZBED5", 
           "ZNF251", "ZNF493", "LEUTX", "PRDM13", "ZNF395"]:
    make_predictions_2strands(
        fout_name=f"predictions_final/{experiment}_{tf}.tsv",
        experiment=experiment,
        tags=[tf],
        columns=[0],
        model=models[tf],
        b_size=4096,
        total_lines=522243,
        test_folder="test_final",
    )
    print(f"TF {tf} done")

TF GCM1 done
TF MKX done
TF MSANTD1 done
TF TPRX1 done
TF ZFTA done
TF MYPOP done
TF CREB3L3 done
TF FIZ1 done
TF ZNF500 done
TF ZNF780B done
TF ZNF831 done
TF ZNF286B done
TF ZBTB47 done
TF ZNF721 done
TF SP140L done
TF USF3 done
TF ZBED2 done
TF MYF6 done
TF SALL3 done
TF CAMTA1 done
TF ZNF367 done
TF ZNF648 done
TF ZNF518B done
TF ZBED5 done
TF ZNF251 done
TF ZNF493 done
TF LEUTX done
TF PRDM13 done
TF ZNF395 done


In [9]:
%%bash
cd predictions_final/

cut -f1 GHTS_GCM1.tsv > GHTS_labels.txt
for i in GHTS*.tsv; do
  cut -f2 $i > $i.cutted
done;

paste GHTS_labels.txt GHTS*.tsv.cutted > GHTS_total.tsv
gzip -f GHTS_total.tsv
rm GHTS*.cutted GHTS_labels.txt GHTS*.tsv
cd ..

### HTS

In [None]:
experiment = "HTS"
for tf in ["GCM1", "MKX", "MSANTD1", "TPRX1", 
           "MYPOP", "CREB3L3", "FIZ1", "ZNF780B", 
           "ZNF831", "ZNF286B", "ZBTB47", "ZNF721", 
           "SP140L", "USF3", "ZBED2", "MYF6", 
           "SALL3", "CAMTA1", "ZNF367", "ZNF648", 
           "ZNF518B", "ZBED5", "ZNF251", "ZNF493", 
           "ZNF20", "LEUTX", "PRDM13", "ZNF395"]:
    make_predictions_2strands(
        fout_name=f"predictions_final/{experiment}_{tf}.tsv",
        experiment=experiment,
        tags=[tf],
        columns=[0],
        model=models[tf],
        b_size=4096,
        total_lines=26668562 // 2,
        test_folder="test_final",
    )
    print(f"TF {tf} done")

In [14]:
%%bash
cd predictions_final/

cut -f1 HTS_GCM1.tsv > HTS_labels.txt
for i in HTS*.tsv; do
  cut -f2 $i > $i.cutted
done;

paste HTS_labels.txt HTS*.tsv.cutted > HTS_total.tsv
gzip -f HTS_total.tsv
rm HTS*.tsv.cutted HTS_labels.txt #HTS*.tsv
cd ..

### SMS

In [10]:
experiment = "SMS"
for tf in ["TPRX1", "USF3", "ZBED2", "CAMTA1", 
           "ZNF367", "ZNF648", "ZBED5", "ZNF251", 
           "ZNF493", "PRDM13", "ZNF395"]:
    make_predictions_2strands(
        fout_name=f"predictions_final/{experiment}_{tf}.tsv",
        experiment=experiment,
        tags=[tf],
        columns=[0],
        model=models[tf],
        b_size=8192,
        total_lines=1875093,
        test_folder="test_final",
    )
    print(f"TF {tf} done")

TF TPRX1 done
TF USF3 done
TF ZBED2 done
TF CAMTA1 done
TF ZNF367 done
TF ZNF648 done
TF ZBED5 done
TF ZNF251 done
TF ZNF493 done
TF PRDM13 done
TF ZNF395 done


In [11]:
%%bash
cd predictions_final/

cut -f1 SMS_TPRX1.tsv > SMS_labels.txt
for i in SMS*.tsv; do
  cut -f2 $i > $i.cutted
done;

paste SMS_labels.txt SMS*.tsv.cutted > SMS_total.tsv
gzip -f SMS_total.tsv
rm SMS*.cutted SMS_labels.txt SMS*.tsv
cd ..

### PBM

In [4]:
experiment = "PBM"
for tf in ["GCM1", "MKX", "MSANTD1", "TPRX1", 
           "ZFTA", "MYPOP", "SP140L", "USF3", 
           "ZBED2", "MYF6", "ZBED5", "LEUTX"]:
    make_predictions_2strands(
        fout_name=f"predictions_final/{experiment}_{tf}.tsv",
        experiment=experiment,
        tags=[tf],
        columns=[0],
        model=models[tf],
        b_size=4096,
        total_lines=40329,
        test_folder="test_final",
    )
    print(f"TF {tf} done")

TF GCM1 done
TF MKX done
TF MSANTD1 done
TF TPRX1 done
TF ZFTA done
TF MYPOP done
TF SP140L done
TF USF3 done
TF ZBED2 done
TF MYF6 done
TF ZBED5 done
TF LEUTX done


In [5]:
%%bash
cd predictions_final/

cut -f1 PBM_GCM1.tsv > PBM_labels.txt
for i in PBM*.tsv; do
  cut -f2 $i > $i.cutted
done;

paste PBM_labels.txt PBM*.tsv.cutted > PBM_total.tsv
gzip -f PBM_total.tsv
rm PBM*.cutted PBM_labels.txt PBM*.tsv
cd ..