## Cluster snippets

Run CellProfiler interactive session

    module unload python
    module load gcc/4.8.2 python/2.7.12 java/jdk/1.8.0/102 cellprofiler/3.0
    salloc --partition=develop --time=2:00:00 --job-name="$(date +%F)_InterCellProf" --nodes=1 --mem=16000 --nodelist=$(hostname) srun cellprofiler

Run max projection job  

    srun --time=12:00:00 --job-name=MaxP_NKP2 --partition=shortq --output slog/%j.log --error slog/%j.err --cpus-per-task=1 --mem-per-cpu=10000 --ntasks=1 --nodes=1 python3 ../MaxProj.py
    
Run batch job

    sbatch --export=iter=$i --output /scratch/lab_menche/Images_Yolla/JobLogs/iter$i_%j.log --job-name=$i_CPProfilingYolla --error /scratch/lab_menche/Images_Yolla/JobLogs/iter$i_%j.err runSingleJob.sh

    # coding: utf-8

    import os, subprocess

    rootFolder = 'Results/'

    nbFailures = 0

    for folder in os.listdir(rootFolder):
        if 'batch' in folder:
            content = os.listdir(rootFolder+'/'+folder)
            if not content:
                nbFailures += 1
                b = int(folder.split("_")[1])
                cmdSlurm = "sbatch --export=iter={0} --output /scratch/lab_menche/Images_Yolla/Plate2/JobLogs/%j_iter{0}.log --job-name={0}_CPProfilingYolla --error /scratch/lab_menche/Images_Yolla/Plate2/JobLogs/%j_iter{0}.err runSingleJob.sh".format(b)
                subprocess.call(cmdSlurm, shell=True)

    for i in {6..1019}; do sbatch --export=iter=$i --output /scratch/lab_menche/Images_Yolla/JobLogs/iter$i"_%j.log" --job-name=$i"_CPProfilingYolla" --error /scratch/lab_menche/Images_Yolla/JobLogs/iter$i"_%j.err" runSingleJob.sh; sleep 3; done

    from os import listdir
    import pandas as pd

    allImages = pd.DataFrame()
    for dir in listdir():
        assert "ExportCPP" in listdir(dir)
        assert "NK2_Image.csv" in listdir(dir+'/ExportCPP')
        allImages = allImages.append(pd.read_csv(dir+'/ExportCPP/NK2_Image.csv'))

    allImages.to_csv("allImages.csv")

    # coding: utf-8

    import os, subprocess

    nbFailures = 0

    # Rerun job n
    def rerun(n, nbFailures):
        cmdSlurm = 'sbatch --export=iter={0} --output /scratch/lab_menche/Images_Yolla/JobLogs/iter{0}_%j.log --job-name={0}_CPProfilingYolla --error /scratch/lab_menche/Images_Yolla/JobLogs/iter{0}_%j.err runSingleJob.sh'.format(n)
    #     subprocess.call(cmdSlurm, shell=True)
        print(cmdSlurm)
        return(nbFailures + 1)

    rootFolder = 'Results/'

    folders = os.listdir(rootFolder)
    for ibatch in range(1,1020):
        folder = 'batch_'+str(ibatch)
        if folder not in folders:
            nbFailures = rerun(ibatch, nbFailures)
        else:
            content = os.listdir(rootFolder+'/'+folder)
            if not content:
                nbFailures = rerun(ibatch, nbFailures)
            else:
                subcontent = os.listdir(rootFolder+'/'+folder+'/ExportCPP')
                if "NK2_Image.csv" not in subcontent:
                    nbFailures = rerun(ibatch, nbFailures)

    print(nbFailures)

In [None]:
library(RMariaDB)
library(ggplot2)

In [None]:
# Connect to database
dbNetworks = dbConnect(MariaDB(), group="networks")
# NB: here all the information is contained in a file stored in "~/.my.cnf" following the following format
#   [networks]
#   host=#host
#   user=#username
#   password=#password
#   database=networks

In [None]:
# Get all entries corresponding to a querry
getAllQuerry <- function(querryString){
    querry = dbSendQuery(dbNetworks, querryString)
    chunk = dbFetch(querry)
    dbClearResult(querry)
    return(chunk)
}

In [None]:
# Define the querry corresponding to selected inputs
querryString = paste0("SELECT networks.ppi_tsne_biopro_2D.*, ",
        "Results_Funclusters.gene2clusterID_d8.clusterID FROM ",
        "networks.ppi_tsne_biopro_2D INNER JOIN ",
        "Results_Funclusters.gene2clusterID_d8 ON networks.ppi_tsne_biopro_2D.entrezID ",
        "= Results_Funclusters.gene2clusterID_d8.entrezID")
querryString = paste0("SELECT * FROM ImageAnalysis_YollaSH.NK2T_Per_Image")

netw = getAllQuerry(querryString) # Get node information

In [None]:
dim(netw)
names(netw)

In [None]:
image_file_data = read.csv("NK2_Image.csv")

In [None]:
dim(image_file_data)
names(image_file_data)

In [None]:
length(grep("Granularity", names(netw)))
length(grep("Granularity", names(image_file_data)))

In [None]:
names(netw)[grep("Granularity", names(netw))]
names(image_file_data)[grep("Granularity", names(image_file_data))]

In [None]:
querryString = paste0("SELECT * FROM ImageAnalysis_YollaSH.NK2T_Per_Experiment")
netw = getAllQuerry(querryString) # Get node information
dim(netw)
names(netw)

In [None]:
sessionInfo()