# Create training/test sets for leave-category-out cross validations, and generate 'numseq'-formatted data (that FastWY program can read)  
- "category"基準での分割ではなく"bio_category"がPaired-List(手動定義)に該当するかどうか基準での分割をする

In [1]:
%matplotlib inline

# Options
#onlyList = [] # If this is not empty, only datasets for the categories are generated
onlyList = ['All']

skipList = [] # datasets for the categories here are not generated

parallel_runs = 3 # how many FastWY programs can run in parallel. One program consumes approximately 8GB memory.

In [2]:
import pandas as pd
import numpy as np
from IPython.display import display
import sys
from tqdm import tqdm_notebook as tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import re #正規表現
import pickle
import warnings
import os
from os import path
import subprocess

from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold, KFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, f1_score, roc_curve, roc_auc_score, precision_score, recall_score, accuracy_score
from sklearn import svm

from multiprocessing import Process, Pool

from protein_utility_test import *
import protein_utility.parameters as parameters


sns.set()

warnings.filterwarnings('ignore') #警告無視注意


SEED=1

pd.set_option("display.max_columns", 100)

# aa_list =["A","B","C","D","E","F","G","H","I","K","L","M","N","P","Q","R","S","T","U","V","W","X","Y","Z"]
aa_list =["A","B","C","D","E","F","G","H","I","K","L","M","N","O","P","Q","R","S","T","U","V","W","X","Y","Z"]

categoryList = ["Apple", "Bovine", "Buckwheat", "Carrot", "Chicken", "Corn", "Crab", "Kiwi", "Mustard", "Olive", "Oyster", "Peach", "Peanut", "Potato", "Rice", "Salmon", "Shrimp", "Soybean", "Tomato", "Wheat", "All"]

categoryDict = {"All": "All", "Apple": "Malus domestica", "Bovine": "Bos taurus", "Buckwheat": "Fagopyrum esculentum", "Carrot": "Daucus carota", "Chicken": "Gallus gallus", "Corn": "Zea mays", "Crab": "Scylla serrata", "Kiwi": "Actinidia deliciosa", "Mustard": "Sinapis alba", "Olive": "Olea europaea", "Oyster": "Crassostrea gigas", "Peach": "Prunus persica", "Peanut": "Arachis hypogaea", "Potato": "Solanum tuberosum", "Rice": "Oryza sativa", "Salmon": "Salmo salar", "Shrimp": "Penaeus monodon", "Soybean": "Glycine max", "Tomato": "Solanum lycopersicum", "Wheat": "Triticum aestivum"}

In [3]:
params = parameters.Parameters(FOOD_WITH_MTEC_ORDER = True, Jan2021 = True)

Parameters loaded.
FOOD_NAIVE          : False
FOOD_ORDER          : False
FOOD_ONLY           : False
NF_NA_ADD           : False
MTEC                : False
MTEC_ORDER          : False
FOOD_WITH_MTEC_NAIVE: False
FOOD_WITH_MTEC_ORDER: True
FOOD_ONLY_WITH_MTEC_ORDER: False

topdir              : food_with_mtec_order_Jan2021
fname_tail          : _food_with_mtec_order_Jan2021



# train, testの分割, numseq化train出力

In [4]:
#DATA_SOURCE = 'divided'
DATA_SOURCE = 'supData1'

if DATA_SOURCE == 'divided':
    #Allergen
    df_plus = pd.read_csv("../data/aaseq/aa_COMPARE2020_Dec2020.csv", index_col="id")

    #NonAllergen
    df_minus = pd.read_csv("../data/aaseq/aa_all_foods_Dec2020.csv", index_col = "id")
    df_mtec = pd.read_csv("../data/aaseq/aa_mtec_all.csv", index_col = "id")
    df_nfna = pd.read_csv("../data/aaseq/aa_nfna.csv", index_col = "id")
    df_tame2020 = pd.read_csv("../data/aaseq/aa_add_foods_Dec2020.csv", index_col = 0)
    # df_na = pd.read_csv("../data_aaseq/aa_na.csv", index_col = "id")

    if params.MTEC or params.MTEC_ORDER:
        df_minus = df_mtec
        df_food_ref = pd.read_csv("../data/aaseq/aa_all_foods_Dec2020.csv", index_col="id") #非アレルゲンのtest用に参照するdf. 後でリファクタリング

    if params.FOOD_WITH_MTEC_NAIVE or params.FOOD_WITH_MTEC_ORDER or params.FOOD_ONLY_WITH_MTEC_ORDER:
        df_mtec["bio_category"] = "mTEC"
        df_minus = pd.concat([df_minus, df_mtec], ignore_index = True)

    if params.NF_NA_ADD:
        df_minus = pd.concat([df_minus, df_nfna], ignore_index = True)

    if params.tame2020 or params.Dec2020 or params.Jan2021:
        df_minus = pd.concat([df_minus, df_tame2020], ignore_index = True)
elif DATA_SOURCE == 'supData1':
    df_full = pd.read_csv("../data/supData1_full.csv", index_col=0)
    df_plus = df_full[df_full["label"] == 1]
    df_minus = df_full[df_full["label"] == -1]
else:
    raise RuntimeError('Undefined data source %s' % DATA_SOURCE)    
    
display(df_plus)
display(df_minus)

Unnamed: 0,sequence,bio_category,label
0,MKFLLVAALCALVAIGSCKPTREEIKTFEQFKKVFGKVYRNAEEEA...,oribatida,1
1,EIASQIAQEDQSTCEVSKGDFKTFDRMSFTCSFNKSCNVVVAQDCT...,gallus gallus,1
2,MSWQTYVDDHLMCEIEGNYLTSAAIIGQDGSIWAQSASFPQFKPEE...,actinidia deliciosa,1
3,MKCLLLALALTCGAQALIVTQTMKGLDIQKVAGTWYSLAMAASDIS...,bos taurus,1
4,MWFLALCLAMSLGWTGAEPHFQPRIIGGRECLKNSQPWQVAVYHNG...,carnivora,1
...,...,...,...
2243,SDKPQQLLEECRYLXRI,fagopyrum esculentum,1
2244,SIQKIHAREI,gallus gallus,1
2245,AIGDKPGPKITATYXXKWLEAKATFYGSNPRGAA,poales,1
2246,MKSTIVLLACFVGIAFSDDAANQLVDQVVDALKTQKGFDSMHVGKH...,oribatida,1


Unnamed: 0,sequence,bio_category,label
2248,MALKMVKGSIDRMFDKNLQDLVRGIRNHKEDEAKYISQCIDEIKQE...,bos taurus,-1
2249,MLWRLVQQWSVAVFLLSYSVPSCGRSVEELGRRLKRAVSEHQLLHD...,bos taurus,-1
2250,MHRGVGPAFQLSRKMAASGGEPQILVQYLVLRKDLTQPPFSWPAGA...,bos taurus,-1
2251,MASCVGSRTLSKDDVNYKMHFRMINEQQVEDITIDFFYRPHTITLL...,bos taurus,-1
2252,MHISVILFCALWSAVSAENSDDYELMYVNLDNEIDNGLHPTEDPTP...,bos taurus,-1
...,...,...,...
21149,STRTPEFLGLDNQCGVWA,solanum lycopersicum,-1
21150,MARALVQSTSIPSSVAGERTTKFNGSGKTKRAVTMLCNAQSSSLTL...,solanum lycopersicum,-1
21151,GTVVAPMVGLEVKVLVKDGEKVQEGQPVLVLEAMKMEHVVKAPANG...,solanum lycopersicum,-1
21152,MGEALKYSIMDSVRSVVFKESENLEGSCTKIEGYDFNKGVNYAELI...,solanum lycopersicum,-1


In [5]:
# 条件に従ってPairedじゃないデータを削除

# アレルゲンデータ
if params.FOOD_ONLY or params.FOOD_ONLY_WITH_MTEC_ORDER:
    df_plus_new = df_plus[df_plus["bio_category"].isin(list([x.lower() for x in categoryDict.values()]))]
    del_num = len(df_plus[~df_plus["bio_category"].isin(list([x.lower() for x in categoryDict.values()]))])
    df_plus = df_plus_new
    print("(allergen) not paired: {}".format(del_num))

# 非アレルゲンデータ
if params.FOOD_ONLY or params.FOOD_ORDER:
    df_minus_new = df_minus[df_minus["bio_category"].isin(list([x.lower() for x in categoryDict.values()]))]
    del_num = len(df_minus[~df_minus["bio_category"].isin(list([x.lower() for x in categoryDict.values()]))])
    df_minus = df_minus_new
    print("(non allergen) not paired: {}".format(del_num))

In [6]:
#各categoryについてtrain, testの分割, 保存, numseq生成, "置換, numseq保存
print("sum = Allergen : NonAllergen\n")
print_for_tex = "\hline\nFood & Train or Test & Allergen & NonAllergen & Total \\\ \hline\hline\n"
for idx, category in enumerate(categoryList):
    if category in skipList:
        continue
    if len(onlyList) != 0 and category not in onlyList:
        continue
    print("----- {} -----".format(category))
    
    bio_category = categoryDict[category].lower()
    print("bio_category: {}".format(bio_category))
    df_plus_train = df_plus[df_plus["bio_category"]!=bio_category]
    df_minus_train = df_minus[df_minus["bio_category"]!=bio_category]
    df_train = pd.concat([df_plus_train, df_minus_train], ignore_index=True)

    df_plus_test = df_plus[df_plus["bio_category"]==bio_category]
    df_minus_test = df_minus[df_minus["bio_category"]==bio_category]
    if params.MTEC or params.MTEC_ORDER: #nonA:mTECのみ の場合はテストデータにだけLeaveした種の非アレルゲンデータを使用
        df_minus_test = df_food_ref[df_food_ref["bio_category"]==bio_category]
    df_test = pd.concat([df_plus_test, df_minus_test], ignore_index=True)
    
    os.makedirs(path.dirname(params.def_aa(category, train_test = "train")), exist_ok=True)
    df_train.to_csv(params.def_aa(category, train_test = "train"))

    numseq_train_fname = params.def_numseq_train(category)
    df_train_numseq = aa_to_numseq(df_train, aa_list)
    df_train_numseq.to_csv(numseq_train_fname, header=None, index=None, sep=" ")
    replaceForFile(numseq_train_fname, "\"", "")
    
    df_test.to_csv(params.def_aa(category, train_test = "test"))

    #目も考慮する場合にorderファイルも生成
    if params.FOOD_ORDER or params.MTEC_ORDER or params.FOOD_WITH_MTEC_ORDER or params.FOOD_ONLY_WITH_MTEC_ORDER:
        df_train["bio_category"].to_csv(params.def_order(category), header = False, index = False)

    print("train(not {0:<10}: {1:<5} = {2:<4} : {3}".format(category+")", df_train.shape[0], df_plus_train.shape[0], df_minus_train.shape[0]))
    print("test ({0:<14}: {1:<5} = {2:<4} : {3}\n".format(category+")", df_test.shape[0], df_plus_test.shape[0], df_minus_test.shape[0]))
    if idx % 2 == 1:
        print_for_tex += "\\rowcolor[gray]{0.9} "
    print_for_tex += " & train(not {0}) & {1} & {2} & {3} \\\ %\cline{{2-5}}\n".format(category, df_plus_train.shape[0], df_minus_train.shape[0], df_train.shape[0])
    if idx % 2 == 1:
        print_for_tex += "\\rowcolor[gray]{0.9} "
    print_for_tex += "\multirow{{-2}}{{*}}{{{0}}} & test({0}) & {1} & {2} & {3} \\\ \hline\n".format(category, df_plus_test.shape[0], df_minus_test.shape[0], df_test.shape[0])
print(print_for_tex)

sum = Allergen : NonAllergen

----- All -----
bio_category: all
aa_train        : food_with_mtec_order_Jan2021/all/data/aa_all_food_with_mtec_order_Jan2021_train.csv
aa_train        : food_with_mtec_order_Jan2021/all/data/aa_all_food_with_mtec_order_Jan2021_train.csv
numseq_train    : food_with_mtec_order_Jan2021/all/data/numseq_all_food_with_mtec_order_Jan2021_train.csv
aa_test         : food_with_mtec_order_Jan2021/all/data/aa_all_food_with_mtec_order_Jan2021_test.csv
order           : food_with_mtec_order_Jan2021/all/data/order_all_food_with_mtec_order_Jan2021_train.csv
train(not All)      : 21154 = 2248 : 18906
test (All)          : 0     = 0    : 0

\hline
Food & Train or Test & Allergen & NonAllergen & Total \\ \hline\hline
 & train(not All) & 2248 & 18906 & 21154 \\ %\cline{2-5}
\multirow{-2}{*}{All} & test(All) & 0 & 0 & 0 \\ \hline



## 全データ用系列作成

In [7]:
category = "all"

df_train = pd.concat([df_plus, df_minus], ignore_index=True)

os.makedirs(path.dirname(params.def_aa(category, train_test = "train")), exist_ok=True)
df_train.to_csv(params.def_aa(category, train_test = "train"))

numseq_train_fname = params.def_numseq_train(category)
df_train_numseq = aa_to_numseq(df_train, aa_list)
df_train_numseq.to_csv(numseq_train_fname, header=None, index=None, sep=" ")
replaceForFile(numseq_train_fname, "\"", "")

#目も考慮する場合にorderファイルも生成
if params.FOOD_ORDER or params.MTEC_ORDER or params.FOOD_WITH_MTEC_ORDER or params.FOOD_ONLY_WITH_MTEC_ORDER:
    df_train["bio_category"].to_csv(params.def_order(category), header = False, index = False)
    
print("train(not {0:<10}: {1:<5} = {2:<4} : {3}".format(category+")", df_train.shape[0], df_plus.shape[0], df_minus.shape[0]))

aa_train        : food_with_mtec_order_Jan2021/all/data/aa_all_food_with_mtec_order_Jan2021_train.csv
aa_train        : food_with_mtec_order_Jan2021/all/data/aa_all_food_with_mtec_order_Jan2021_train.csv
numseq_train    : food_with_mtec_order_Jan2021/all/data/numseq_all_food_with_mtec_order_Jan2021_train.csv
order           : food_with_mtec_order_Jan2021/all/data/order_all_food_with_mtec_order_Jan2021_train.csv
train(not all)      : 21154 = 2248 : 18906


# FastWY実行

In [8]:
L = 1800
R = 10000
M = 6

if params.NF_NA_ADD:
#     L = 36000
    L = 500

params.print_all()

FOOD_NAIVE          : False
FOOD_ORDER          : False
FOOD_ONLY           : False
NF_NA_ADD           : False
MTEC                : False
MTEC_ORDER          : False
FOOD_WITH_MTEC_NAIVE: False
FOOD_WITH_MTEC_ORDER: True
FOOD_ONLY_WITH_MTEC_ORDER: False

topdir              : food_with_mtec_order_Jan2021
fname_tail          : _food_with_mtec_order_Jan2021



In [9]:
commands = []

for category in categoryList:
    if category in skipList:
        continue
    if len(onlyList) != 0 and category not in onlyList:
        continue
    category = category.lower()

    # 2021.6.30 by Hanada: Create the directory for the log file beforehand
    params.op_tail = "temp" # 'def_output' requires 'op_tail' not to be empty
    os.makedirs(path.dirname(params.def_output(category, disp=False)), exist_ok=True)
    
    if params.FOOD_NAIVE:
        #CloSpanあり-可変両側
        params.op_tail = "C1L1800R10k"
        cmd = "nohup ../train -C 1 -L {L} -R {R} -F {result} -A {A} {numseq} > {output} &".format(L = L, R = R, A = params.def_result_all(category, disp=False), numseq = params.def_numseq_train(category, disp=False), result = params.def_result(category, disp=False), output = params.def_output(category, disp=False))
#         commands.append(cmd)
        
        #+'vs-
        params.op_tail = "C1Z1L1800R10k"
        cmd = "nohup ../train -C 1 -Z 1 -L {L} -R {R} -F {result} -A {A} {numseq} > {output} &".format(L = L, R = R, A = params.def_result_all(category, disp=False), numseq = params.def_numseq_train(category, disp=False), result = params.def_result(category, disp=False), output = params.def_output(category, disp=False))
        commands.append(cmd)
        
    elif params.FOOD_ORDER:
        #CloSpanあり
        ##可変両側
        params.op_tail="C1GT1L1800R10k"
        cmd = "nohup ../train -C 1 -G {order} -T 1 -L {L} -R {R} -F {result} -A {A} {numseq} > {output} &".format(order = params.def_order(category, disp=False), L = L, R = R, A = params.def_result_all(category, disp=False), numseq = params.def_numseq_train(category, disp=False), result = params.def_result(category, disp=False), output = params.def_output(category, disp=False))
        commands.append(cmd)
        ##可変片側
        params.op_tail="C1S1GL1800R10k"
        cmd = "nohup ../train -C 1 -S 1 -G {order} -L {L} -R {R} -F {result} -A {A} {numseq} > {output} &".format(order = params.def_order(category, disp=False), L = L, R = R, A = params.def_result_all(category, disp=False), numseq = params.def_numseq_train(category, disp=False), result = params.def_result(category, disp=False), output = params.def_output(category, disp=False))
        #commands.append(cmd)
        ##6固定両側
        params.op_tail="C1M6GL6R10k"
        cmd = "nohup ../train -C 1 -M 6 -G {order} -L 6 -R {R} -F {result} -A {A} {numseq} > {output} &".format(order = params.def_order(category, disp=False), R = R, A = params.def_result_all(category, disp=False), numseq = params.def_numseq_train(category, disp=False), result = params.def_result(category, disp=False), output = params.def_output(category, disp=False))
#         commands.append(cmd)
        ##6固定片側
        params.op_tail="C1M6S1GL6R10k"
        cmd = "nohup ../train -C 1 -M 6 -S 1 -G {order} -L 6 -R {R} -F {result} -A {A} {numseq} > {output} &".format(order = params.def_order(category, disp=False), R = R, A = params.def_result_all(category, disp=False), numseq = params.def_numseq_train(category, disp=False), result = params.def_result(category, disp=False), output = params.def_output(category, disp=False))
        #commands.append(cmd)
        
        ##α=0.01
        params.op_tail = "C1Ga001L1800R10k"
        cmd = "nohup ../train -C 1 -G {order} -a 0.01 -L {L} -R {R} -F {result} -A {A} {numseq} > {output} &".format(order = params.def_order(category, disp=False), L = L, R = R, A = params.def_result_all(category, disp=False), numseq = params.def_numseq_train(category, disp=False), result = params.def_result(category, disp=False), output = params.def_output(category, disp=False))
#         commands.append(cmd)
        
        ##α=0.1
        params.op_tail = "C1Ga01L1800R10k"
        cmd = "nohup ../train -C 1 -G {order} -a 0.1 -L {L} -R {R} -F {result} -A {A} {numseq} > {output} &".format(order = params.def_order(category, disp=False), L = L, R = R, A = params.def_result_all(category, disp=False), numseq = params.def_numseq_train(category, disp=False), result = params.def_result(category, disp=False), output = params.def_output(category, disp=False))
#         commands.append(cmd)
        
        ##α=0.2
        params.op_tail = "C1Ga02L1800R10k"
        cmd = "nohup ../train -C 1 -G {order} -a 0.2 -L {L} -R {R} -F {result} -A {A} {numseq} > {output} &".format(order = params.def_order(category, disp=False), L = L, R = R, A = params.def_result_all(category, disp=False), numseq = params.def_numseq_train(category, disp=False), result = params.def_result(category, disp=False), output = params.def_output(category, disp=False))
#         commands.append(cmd)

        ##K=3
        params.op_tail="C1GK3L1800R10k"
        cmd = "nohup ../train -C 1 -G {order} -K 3 -L {L} -R {R} -F {result} -A {A} {numseq} > {output} &".format(order = params.def_order(category, disp=False), L = L, R = R, A = params.def_result_all(category, disp=False), numseq = params.def_numseq_train(category, disp=False), result = params.def_result(category, disp=False), output = params.def_output(category, disp=False))
#         commands.append(cmd)

    elif params.FOOD_ONLY:       
        #+' vs -, CloSpanあり
        ##可変両側
        params.op_tail="C1Z1L1800R10k"
        cmd = "nohup ../train -C 1 -Z 1 -L {L} -R {R} -F {result} -A {A} {numseq} > {output} &".format(L = L, R = R, A = params.def_result_all(category, disp=False), numseq = params.def_numseq_train(category, disp=False), result = params.def_result(category, disp=False), output = params.def_output(category, disp=False))
        commands.append(cmd)
        
        ##可変片側
        params.op_tail="C1Z1S1L1800R10k"
        cmd = "nohup ../train -C 1 -Z 1 -S 1 -L {L} -R {R} -F {result} -A {A} {numseq} > {output} &".format(L = L, R = R, A = params.def_result_all(category, disp=False), numseq = params.def_numseq_train(category, disp=False), result = params.def_result(category, disp=False), output = params.def_output(category, disp=False))
#         commands.append(cmd)
        
        ##6固定両側
        params.op_tail="C1Z1M6L6R10k"
        cmd = "nohup ../train -C 1 -Z 1 -M 6 -L 6 -R {R} -F {result} -A {A} {numseq} > {output} &".format(R = R, A = params.def_result_all(category, disp=False), numseq = params.def_numseq_train(category, disp=False), result = params.def_result(category, disp=False), output = params.def_output(category, disp=False))
#         commands.append(cmd)
        
        ##6固定片側
        params.op_tail="C1Z1M6S1L6R10k"
        cmd = "nohup ../train -C 1 -Z 1 -M 6 -S 1 -L 6 -R {R} -F {result} -A {A} {numseq} > {output} &".format(R = R, A = params.def_result_all(category, disp=False), numseq = params.def_numseq_train(category, disp=False), result = params.def_result(category, disp=False), output = params.def_output(category, disp=False))
#         commands.append(cmd)
        
        # α=0.01
        params.op_tail="C1a001L1800R10k"
        cmd = "nohup ../train -C 1 -a 0.01 -L {L} -R {R} -F {result} -A {A} {numseq} > {output} &".format(L = L, R = R, A = params.def_result_all(category, disp=False), numseq = params.def_numseq_train(category, disp=False), result = params.def_result(category, disp=False), output = params.def_output(category, disp=False))
#         commands.append(cmd)
        
        # α=0.1
        params.op_tail="C1a01L1800R10k"
        cmd = "nohup ../train -C 1 -a 0.1 -L {L} -R {R} -F {result} -A {A} {numseq} > {output} &".format(L = L, R = R, A = params.def_result_all(category, disp=False), numseq = params.def_numseq_train(category, disp=False), result = params.def_result(category, disp=False), output = params.def_output(category, disp=False))
#         commands.append(cmd)
        
        # α=0.2
        params.op_tail="C1a02L1800R10k"
        cmd = "nohup ../train -C 1 -a 0.2 -L {L} -R {R} -F {result} -A {A} {numseq} > {output} &".format(L = L, R = R, A = params.def_result_all(category, disp=False), numseq = params.def_numseq_train(category, disp=False), result = params.def_result(category, disp=False), output = params.def_output(category, disp=False))
#         commands.append(cmd)

    elif params.MTEC_ORDER:
        #CloSpanあり
        ##可変両側
        params.op_tail="C1GL1800R10k"
        cmd = "nohup ../train -C 1 -G {order} -L {L} -R {R} -F {result} -A {A} {numseq} > {output} &".format(order = params.def_order(category, disp=False), L = L, R = R, A = params.def_result_all(category, disp=False), numseq = params.def_numseq_train(category, disp=False), result = params.def_result(category, disp=False), output = params.def_output(category, disp=False))
        commands.append(cmd)
        ##可変片側
        params.op_tail="C1S1GL1800R10k"
        cmd = "nohup ../train -C 1 -S 1 -G {order} -L {L} -R {R} -F {result} -A {A} {numseq} > {output} &".format(order = params.def_order(category, disp=False), L = L, R = R, A = params.def_result_all(category, disp=False), numseq = params.def_numseq_train(category, disp=False), result = params.def_result(category, disp=False), output = params.def_output(category, disp=False))
        commands.append(cmd)
        ##6固定両側
        params.op_tail="C1M6GL6R10k"
        cmd = "nohup ../train -C 1 -M 6 -G {order} -L 6 -R {R} -F {result} -A {A} {numseq} > {output} &".format(order = params.def_order(category, disp=False), R = R, A = params.def_result_all(category, disp=False), numseq = params.def_numseq_train(category, disp=False), result = params.def_result(category, disp=False), output = params.def_output(category, disp=False))
        commands.append(cmd)
        ##6固定片側
        params.op_tail="C1M6S1GL6R10k"
        cmd = "nohup ../train -C 1 -M 6 -S 1 -G {order} -L 6 -R {R} -F {result} -A {A} {numseq} > {output} &".format(order = params.def_order(category, disp=False), R = R, A = params.def_result_all(category, disp=False), numseq = params.def_numseq_train(category, disp=False), result = params.def_result(category, disp=False), output = params.def_output(category, disp=False))
        commands.append(cmd)
    
    elif params.FOOD_WITH_MTEC_NAIVE:
#         #CloSpanあり-可変両側
#         params.op_tail="C1L1800R10k"
#         cmd = "nohup ../train -C 1 -L {L} -R {R} -F {result} -A {A} {numseq} > {output} &".format(L = L, R = R, A = params.def_result_all(category, disp=False), numseq = params.def_numseq_train(category, disp=False), result = params.def_result(category, disp=False), output = params.def_output(category, disp=False))
#         #commands.append(cmd)
                
#         #+'vs-
#         params.op_tail = "C1Z1L1800R10k"
#         cmd = "nohup ../train -C 1 -Z 1 -L {L} -R {R} -F {result} -A {A} {numseq} > {output} &".format(L = L, R = R, A = params.def_result_all(category, disp=False), numseq = params.def_numseq_train(category, disp=False), result = params.def_result(category, disp=False), output = params.def_output(category, disp=False))
#         commands.append(cmd)
        pass
    elif params.FOOD_WITH_MTEC_ORDER:
        #CloSpanあり
        ##可変両側
        params.op_tail="C1GT1L1800R10k"
        cmd = "nohup ../train -C 1 -G {order} -T 1 -L {L} -R {R} -F {result} -A {A} {numseq} > {output} &".format(order = params.def_order(category, disp=False), L = L, R = R, A = params.def_result_all(category, disp=False), numseq = params.def_numseq_train(category, disp=False), result = params.def_result(category, disp=False), output = params.def_output(category, disp=False))
        commands.append(cmd)
        ##可変片側
        params.op_tail="C1S1GL1800R10k"
        cmd = "nohup ../train -C 1 -S 1 -G {order} -L {L} -R {R} -F {result} -A {A} {numseq} > {output} &".format(order = params.def_order(category, disp=False), L = L, R = R, A = params.def_result_all(category, disp=False), numseq = params.def_numseq_train(category, disp=False), result = params.def_result(category, disp=False), output = params.def_output(category, disp=False))
#         commands.append(cmd)
        ##6固定両側
        params.op_tail="C1M6GL6R10k"
        cmd = "nohup ../train -C 1 -M 6 -G {order} -L 6 -R {R} -F {result} -A {A} {numseq} > {output} &".format(order = params.def_order(category, disp=False), R = R, A = params.def_result_all(category, disp=False), numseq = params.def_numseq_train(category, disp=False), result = params.def_result(category, disp=False), output = params.def_output(category, disp=False))
#         commands.append(cmd)
        ##6固定片側
        params.op_tail="C1M6S1GL6R10k"
        cmd = "nohup ../train -C 1 -M 6 -S 1 -G {order} -L 6 -R {R} -F {result} -A {A} {numseq} > {output} &".format(order = params.def_order(category, disp=False), R = R, A = params.def_result_all(category, disp=False), numseq = params.def_numseq_train(category, disp=False), result = params.def_result(category, disp=False), output = params.def_output(category, disp=False))
#         commands.append(cmd)

        #alpha=0.01
        params.op_tail="C1Ga001T1L1800R10k"
        cmd = "nohup ../train -C 1 -G {order} -a 0.01 -T 1 -L {L} -R {R} -F {result} -A {A} {numseq} > {output} &".format(order = params.def_order(category, disp=False), L = L, R = R, A = params.def_result_all(category, disp=False), numseq = params.def_numseq_train(category, disp=False), result = params.def_result(category, disp=False), output = params.def_output(category, disp=False))
        commands.append(cmd)
        
        #alpha=0.1
        params.op_tail="C1Ga01T1L1800R10k"
        cmd = "nohup ../train -C 1 -G {order} -a 0.1 -T 1 -L {L} -R {R} -F {result} -A {A} {numseq} > {output} &".format(order = params.def_order(category, disp=False), L = L, R = R, A = params.def_result_all(category, disp=False), numseq = params.def_numseq_train(category, disp=False), result = params.def_result(category, disp=False), output = params.def_output(category, disp=False))
#        commands.append(cmd)

        ##可変両側 r10
        params.op_tail="C1GT1r10L1800R10k"
        cmd = "nohup ../train -C 1 -G {order} -T 1 -r 10 -L {L} -R {R} -F {result} -A {A} {numseq} > {output} &".format(order = params.def_order(category, disp=False), L = L, R = R, A = params.def_result_all(category, disp=False), numseq = params.def_numseq_train(category, disp=False), result = params.def_result(category, disp=False), output = params.def_output(category, disp=False))
#         commands.append(cmd)
        
        ##可変両側 debug一時 (ファイル隔離だけ)
        params.op_tail="C1GT1L1800R10kdebug"
        cmd = "nohup ../train -C 1 -G {order} -T 1 -L {L} -R {R} -F {result} -A {A} {numseq} > {output} &".format(order = params.def_order(category, disp=False), L = L, R = R, A = params.def_result_all(category, disp=False), numseq = params.def_numseq_train(category, disp=False), result = params.def_result(category, disp=False), output = params.def_output(category, disp=False))
#         commands.append(cmd)
    
    
    elif params.FOOD_ONLY_WITH_MTEC_ORDER:
        #CloSpanあり
        ##可変両側
        params.op_tail="C1GT1L1800R10k"
        cmd = "nohup ../train -C 1 -G {order} -L {L} -T 1 -R {R} -F {result} -A {A} {numseq} > {output} &".format(order = params.def_order(category, disp=False), L = L, R = R, A = params.def_result_all(category, disp=False), numseq = params.def_numseq_train(category, disp=False), result = params.def_result(category, disp=False), output = params.def_output(category, disp=False))
        commands.append(cmd)
        
        #alpha=0.01
        params.op_tail="C1Ga001L1800R10k"
        cmd = "nohup ../train -C 1 -G {order} -a 0.01 -L {L} -R {R} -F {result} -A {A} {numseq} > {output} &".format(order = params.def_order(category, disp=False), L = L, R = R, A = params.def_result_all(category, disp=False), numseq = params.def_numseq_train(category, disp=False), result = params.def_result(category, disp=False), output = params.def_output(category, disp=False))
#         commands.append(cmd)
        
        #alpha=0.1
        params.op_tail="C1Ga01L1800R10k"
        cmd = "nohup ../train -C 1 -G {order} -a 0.1 -L {L} -R {R} -F {result} -A {A} {numseq} > {output} &".format(order = params.def_order(category, disp=False), L = L, R = R, A = params.def_result_all(category, disp=False), numseq = params.def_numseq_train(category, disp=False), result = params.def_result(category, disp=False), output = params.def_output(category, disp=False))
#         commands.append(cmd)
    print()

f_script = open('02_run_fastwy.sh', 'w')
f_script.write("#!/bin/sh\n")
for i, cmd in enumerate(commands):
    f_script.write(cmd+"\n")
    if i % parallel_runs == parallel_runs - 1 or i == len(commands) - 1:
        f_script.write("wait\n")
f_script.close()
print("%d experiments are written to the script file." % len(commands))


2 experiments are written to the script file.
