In [1]:
import csv
import pandas as pd
import os
import numpy as np
import random
import json
from sklearn.model_selection import KFold

random.seed(10)

In [2]:
#all用
def make_table_art(art_img_dir,art_json_dir):

    # nat_list = ["Austenite","CFRP","Isotropic"]
    nat_list = ["Austenite"]

    source_path =[]
    xmin =[]
    xmax =[]
    ymin =[]
    ymax =[]
    label =[]

    for nat in nat_list:
        for folder in os.listdir(path=art_json_dir+nat):
            json_file = json.load(open(art_json_dir+ nat + "/" + folder))
            wave_end = int(json_file["wave_end"])
            for wavetime in range(wave_end+1):
                assets = json_file["assets"][wavetime]
                source_path.append(art_img_dir + nat + "/" + folder.replace(".json","") + "/" + assets["time"]+".png")
                label.append("Non" if assets["cav"] == 0 else "Crack") #Non, Crack
                xmin.append(assets["bbox"]["xmin"])
                xmax.append(assets["bbox"]["xmax"])
                ymin.append(assets["bbox"]["ymin"])
                ymax.append(assets["bbox"]["ymax"])
    all = np.array([source_path,xmin,xmax,ymin,ymax,label]).T
    columns = ["path","x_min","x_max","y_min","y_max","label"]
    table = pd.DataFrame(all,columns=columns)
    table["x_min"][table.index[table['label'] == "Non"].tolist()] = np.nan
    table["x_max"][table.index[table['label'] == "Non"].tolist()] = np.nan
    table["y_min"][table.index[table['label'] == "Non"].tolist()] = np.nan
    table["y_max"][table.index[table['label'] == "Non"].tolist()] = np.nan

    return table
####
### normal用
def make_index(all_num,df,split_num): #2:1:2
    x = np.array([[x] for x in range(all_num)])
    y = np.array([x for x in range(all_num)])
    ss = KFold(n_splits=split_num, random_state=10,shuffle=True)
    for i,(train_index, test_index) in enumerate(ss.split(x, y)):
        tra_num = len(train_index)
        train_index = train_index.tolist()
        random.shuffle(train_index)
        add_test_num = int(tra_num/2)
        add_test = train_index[:add_test_num]
        val_num = int(tra_num/8)
        valid_index = train_index[add_test_num:val_num+add_test_num]
        train_index = train_index[val_num+add_test_num:]
        test_index = test_index.tolist()
        test_index[len(test_index):len(test_index)] = add_test
        
        all_idx = np.array(["111" for _ in range(all_num)])
        for j in train_index:
            all_idx[j] = "tra"
        for j in valid_index:
            all_idx[j] = "val"
        for j in test_index:
            all_idx[j] = "tst"
        df["sp"+str(i+1)] = all_idx
    return df

def make_table(img_dir,json_dir,df):
    source_path =[]
    xmin =[]
    xmax =[]
    ymin =[]
    ymax =[]
    label =[]

    for folder in df["center"]:
        json_file = json.load(open(json_dir+folder + ".json"))
        wave_end = int(json_file["wave_end"])
        for wavetime in range(wave_end+1):
            assets = json_file["assets"][wavetime]
            if 0 <= int(assets["time"]) <= 40:
                continue
            source_path.append(img_dir + folder + "/" + assets["time"]+".png")
            label.append("Non" if assets["cav"] == 0 else "Crack") #Non, Crack
            xmin.append(assets["bbox"]["xmin"])
            xmax.append(assets["bbox"]["xmax"])
            ymin.append(assets["bbox"]["ymin"])
            ymax.append(assets["bbox"]["ymax"])
    all = np.array([source_path,xmin,xmax,ymin,ymax,label]).T
    columns = ["path","x_min","x_max","y_min","y_max","label"]
    table = pd.DataFrame(all,columns=columns)
    table["x_min"][table.index[table['label'] == "Non"].tolist()] = np.nan
    table["x_max"][table.index[table['label'] == "Non"].tolist()] = np.nan
    table["y_min"][table.index[table['label'] == "Non"].tolist()] = np.nan
    table["y_max"][table.index[table['label'] == "Non"].tolist()] = np.nan
    return table
###
### pretrain 用
def make_artindex(all_num,df,split_num): #3:1:1
    x = np.array([[x] for x in range(all_num)])
    y = np.array([x for x in range(all_num)])
    ss = KFold(n_splits=split_num, random_state=10,shuffle=True)
    for i,(train_index, test_index) in enumerate(ss.split(x, y)):
        tra_num = len(train_index)
        train_index = train_index.tolist()
        random.shuffle(train_index)
        # add_test_num = int(tra_num/4)
        add_test_num = 0
        add_test = train_index[:add_test_num]
        val_num = int(tra_num/4)
        valid_index = train_index[add_test_num:val_num+add_test_num]
        train_index = train_index[val_num+add_test_num:]
        test_index = test_index.tolist()
        test_index[len(test_index):len(test_index)] = add_test
        
        all_idx = np.array(["111" for _ in range(all_num)])
        for j in train_index:
            all_idx[j] = "tra"
        for j in valid_index:
            all_idx[j] = "val"
        for j in test_index:
            all_idx[j] = "tst"
        df["sp"+str(i+1)] = all_idx
    return df

def get_art(img_dir,json_dir,split_num):
 
    # nat_list = ["Austenite","CFRP","Isotropic"]
    nat_list = ["Isotropic"]

    folder_list = []

    for nat in nat_list:
        folder = os.listdir(path= img_dir + nat)
        folder = [img_dir + nat + "/"+ x.split(".")[0] for x in folder]
        folder_list[len(folder_list):len(folder_list)] = folder

    df = pd.DataFrame(np.array([folder_list]).T,columns=["path"])     
    split = make_artindex(len(df),df,split_num)
    for i in range(1,6):
        sp = "sp"+str(i)
        df_tra = df[df[sp] == "tra"] 
        df_tra = df_tra[["path"]]   


        df_val = df[df[sp] == "val"] 
        df_val = df_val[["path"]]


        df_tst = df[df[sp] == "tst"]
        df_tst = df_tst[["path"]]

        train = make_arttable(img_dir,json_dir,df_tra)
        valid = make_arttable(img_dir,json_dir,df_val)
        test = make_arttable(img_dir,json_dir,df_tst)
        
        train.to_csv(sp + "_arttrain.csv",index=False)
        valid.to_csv(sp + "_artvalid.csv",index=False)
        test.to_csv(sp + "_arttest.csv",index=False)
        
    print(len(train))
    print(len(valid))
    print(len(test))
    return make_index(len(df),df,split_num)

def make_arttable(img_dir,json_dir,df):
    source_path =[]
    xmin =[]
    xmax =[]
    ymin =[]
    ymax =[]
    label =[]

    for folder in df["path"]:
        json_file = json.load(open(folder.replace("img","json") + ".json"))
        wave_end = int(json_file["wave_end"])
        for wavetime in range(wave_end+1):
            assets = json_file["assets"][wavetime]
            source_path.append(folder + "/" + assets["time"]+".png")
            label.append("Non" if assets["cav"] == 0 else "Crack") #Non, Crack
            xmin.append(assets["bbox"]["xmin"])
            xmax.append(assets["bbox"]["xmax"])
            ymin.append(assets["bbox"]["ymin"])
            ymax.append(assets["bbox"]["ymax"])
    all = np.array([source_path,xmin,xmax,ymin,ymax,label]).T
    columns = ["path","x_min","x_max","y_min","y_max","label"]
    table = pd.DataFrame(all,columns=columns)
    table["x_min"][table.index[table['label'] == "Non"].tolist()] = np.nan
    table["x_max"][table.index[table['label'] == "Non"].tolist()] = np.nan
    table["y_min"][table.index[table['label'] == "Non"].tolist()] = np.nan
    table["y_max"][table.index[table['label'] == "Non"].tolist()] = np.nan
    return table

####


In [3]:
list_not15 = []
list_15 = []

for x in range(1,50):
    
    if int(x/10)==0:
        list_15.append("0"+str(x)+"_15") 
    else:
        list_15.append(str(x)+"_15")
            
for y in range(11,19):
    for x in range(4,47,2):
        if y==15:
            continue
        
        if int(x/10)==0:
            list_not15.append("0"+str(x)+"_"+str(y)) 
        else:
            list_not15.append(str(x)+"_"+str(y))
            
list_not15[88:88] = list_15
folders = list_not15.copy()
df = pd.DataFrame(np.array([folders]).T,columns=["center"]) 

# bin_list = ["46_11","30_12","28_15","24_18","32_17","40_17","16_18","26_18","20_18","30_18","40_18","46_18","28_18"] #除くものを指定
# bin_list = ["42_16","28_18","11_15","36_14","14_15","46_12","44_16","30_14","16_13","04_13","26_14","22_17","38_13","24_18","42_14","15_15"]
bin_list = []
# for x in range(1,11):
    
#     if int(x/10)==0:
#         bin_list.append("0"+str(x)+"_15") 
#     else:
#         bin_list.append(str(x)+"_15")
        
# for x in range(36,50):
    
#     if int(x/10)==0:
#         bin_list.append("0"+str(x)+"_15")
#     else:
#         bin_list.append(str(x)+"_15")

# for x in range(4,19):
    
#     if int(x/10)==0:
#         bin_list.append("0"+str(x)+"_17") 
#     else:
#         bin_list.append(str(x)+"_17")

# for x in range(1,48):
    
#     if int(x/10)==0:
#         bin_list.append("0"+str(x)+"_18") 
#     else:
#         bin_list.append(str(x)+"_18")
        
# for x in range(1,48):
    
#     if int(x/10)==0:
#         bin_list.append("0"+str(x)+"_12") 
#     else:
#         bin_list.append(str(x)+"_12")

# for x in range(1,48):
    
#     if int(x/10)==0:
#         bin_list.append("0"+str(x)+"_11") 
#     else:
#         bin_list.append(str(x)+"_11")

for bin in bin_list:
    df = df[df.center != bin]

sp_num = 5 #split数

df = make_index(len(df),df,sp_num)

df.to_csv("all.csv",index=False)

art_json_dir = "/workspace/data/Artificialdata/crop_json/"
art_img_dir = "/workspace/data/Artificialdata/crop_img/"
sp=5
df_art = get_art(art_img_dir,art_json_dir,sp)
df_art


14223
4741
4741


Unnamed: 0,path,sp1,sp2,sp3,sp4,sp5
0,/workspace/data/Artificialdata/crop_img/Isotro...,tra,tra,tra,tst,tst
1,/workspace/data/Artificialdata/crop_img/Isotro...,tst,tst,tst,tst,tra
2,/workspace/data/Artificialdata/crop_img/Isotro...,tst,tst,tra,tra,val
3,/workspace/data/Artificialdata/crop_img/Isotro...,tst,tst,val,tra,tst
4,/workspace/data/Artificialdata/crop_img/Isotro...,val,tra,tst,tra,tra
5,/workspace/data/Artificialdata/crop_img/Isotro...,tst,tst,tst,tst,val
6,/workspace/data/Artificialdata/crop_img/Isotro...,tra,tst,tra,tst,tst
7,/workspace/data/Artificialdata/crop_img/Isotro...,tra,tst,tra,tst,tra
8,/workspace/data/Artificialdata/crop_img/Isotro...,tra,val,tst,tra,tst
9,/workspace/data/Artificialdata/crop_img/Isotro...,tst,tst,tst,val,tst


In [4]:
target_dir = "."
real_json_dir = "/workspace/data/Realdata/crop_json/"
real_img_dir = "/workspace/data/Realdata/crop_img/"
art_json_dir = "/workspace/data/Artificialdata/crop_json/"
art_img_dir = "/workspace/data/Artificialdata/crop_img/"
art_train = make_table_art(art_img_dir,art_json_dir)

artdataflag = ""
for i in range(1,6):
    sp = "sp"+str(i)
    df_tra = df[df[sp] == "tra"] 
    df_tra = df_tra[["center"]]   


    df_val = df[df[sp] == "val"] 
    df_val = df_val[["center"]]


    df_tst = df[df[sp] == "tst"]
    df_tst = df_tst[["center"]]


    train = make_table(real_img_dir,real_json_dir,df_tra)
    valid = make_table(real_img_dir,real_json_dir,df_val)
    test = make_table(real_img_dir,real_json_dir,df_tst)
    
    # train.to_csv("/workspace/src/config/" + sp + "_train.csv",index=False)
    # valid.to_csv("/workspace/src/config/" + sp + "_valid.csv",index=False)
    # test.to_csv("/workspace/src/config/" + sp + "_test.csv",index=False)
    # pd.concat([train,art_train]).to_csv("/workspace/src/config/" + sp + "_arttrain.csv",index=False)
    
    train.to_csv(sp + "_train.csv",index=False)
    valid.to_csv(sp + "_valid.csv",index=False)
    test.to_csv(sp + "_test.csv",index=False)
    pd.concat([train,art_train]).to_csv(sp + "_alltrain.csv",index=False)

print(len(train))
print(len(valid))
print(len(test))

5766
1860
11253


In [5]:
# 人工

target_file = "./sp1_arttrain.csv"
art_domain1 = pd.read_csv(target_file)
target_file = "./sp1_artvalid.csv"
art_domain2 = pd.read_csv(target_file)
target_file = "./sp1_arttest.csv"
art_domain3 = pd.read_csv(target_file)

art_domain = pd.concat([art_domain1,art_domain2,art_domain3])
art_domain = art_domain.reset_index()

indices = art_domain.index
new_filename = []
for i in indices:
    if 0<= i <=9:
        new_filename.append("0000" + str(i) + ".png")
    elif 10<= i <=99:
        new_filename.append("000" + str(i) + ".png")
    elif 100<= i <=999:
        new_filename.append("00" + str(i) + ".png")
    elif 1000<= i <=9999:
        new_filename.append("0" + str(i) + ".png")
    else:
        new_filename.append(str(i) + ".png")

art_domain["newfilename"] = new_filename
# 実
target_file = "./sp1_train.csv"
real_domain1 = pd.read_csv(target_file)
target_file = "./sp1_valid.csv"
real_domain2 = pd.read_csv(target_file)
target_file = "./sp1_test.csv"
real_domain3 = pd.read_csv(target_file)

real_domain = pd.concat([real_domain1,real_domain2,real_domain3])
real_domain = real_domain.reset_index()
indices = real_domain.index
new_filename = []
for i in indices:
    if 0<= i <=9:
        new_filename.append("0000" + str(i) + ".png")
    elif 10<= i <=99:
        new_filename.append("000" + str(i) + ".png")
    elif 100<= i <=999:
        new_filename.append("00" + str(i) + ".png")
    elif 1000<= i <=9999:
        new_filename.append("0" + str(i) + ".png")
    else:
        new_filename.append(str(i) + ".png")

real_domain["newfilename"] = new_filename



In [6]:
import shutil
import os

rootpath = "/workspace/src/datasets/luvt"
if not os.path.exists(rootpath):
    os.mkdir(rootpath)
if not os.path.exists(rootpath+"/trainA"):
    os.mkdir(rootpath+"/trainA")
if not os.path.exists(rootpath+"/trainB"):
    os.mkdir(rootpath+"/trainB")    

origin_trainA = art_domain["path"].to_list()
target_trainA = art_domain["newfilename"].to_list()

for src,copy in zip(origin_trainA,target_trainA):
    shutil.copyfile(src,"/workspace/src/datasets/luvt/trainA/"+copy)

origin_trainB = real_domain["path"].to_list()
target_trainB = real_domain["newfilename"].to_list()

for src,copy in zip(origin_trainB,target_trainB):
    shutil.copyfile(src,"/workspace/src/datasets/luvt/trainB/"+copy)

In [7]:
art_domain.to_csv("newfilename.csv",index=False)
print(art_domain)

       index                                               path  x_min  x_max  \
0          0  /workspace/data/Artificialdata/crop_img/Isotro...    NaN    NaN   
1          1  /workspace/data/Artificialdata/crop_img/Isotro...    NaN    NaN   
2          2  /workspace/data/Artificialdata/crop_img/Isotro...    NaN    NaN   
3          3  /workspace/data/Artificialdata/crop_img/Isotro...    NaN    NaN   
4          4  /workspace/data/Artificialdata/crop_img/Isotro...    NaN    NaN   
...      ...                                                ...    ...    ...   
23700   4736  /workspace/data/Artificialdata/crop_img/Isotro...    NaN    NaN   
23701   4737  /workspace/data/Artificialdata/crop_img/Isotro...    NaN    NaN   
23702   4738  /workspace/data/Artificialdata/crop_img/Isotro...    NaN    NaN   
23703   4739  /workspace/data/Artificialdata/crop_img/Isotro...    NaN    NaN   
23704   4740  /workspace/data/Artificialdata/crop_img/Isotro...    NaN    NaN   

       y_min  y_max label n

In [8]:
print(real_domain)

       index                                             path  x_min  x_max  \
0          0  /workspace/data/Realdata/crop_img/04_11/041.png    NaN    NaN   
1          1  /workspace/data/Realdata/crop_img/04_11/042.png    NaN    NaN   
2          2  /workspace/data/Realdata/crop_img/04_11/043.png    NaN    NaN   
3          3  /workspace/data/Realdata/crop_img/04_11/044.png    NaN    NaN   
4          4  /workspace/data/Realdata/crop_img/04_11/045.png    NaN    NaN   
...      ...                                              ...    ...    ...   
18874  11341  /workspace/data/Realdata/crop_img/42_18/129.png    NaN    NaN   
18875  11342  /workspace/data/Realdata/crop_img/42_18/130.png    NaN    NaN   
18876  11343  /workspace/data/Realdata/crop_img/42_18/131.png    NaN    NaN   
18877  11344  /workspace/data/Realdata/crop_img/42_18/132.png    NaN    NaN   
18878  11345  /workspace/data/Realdata/crop_img/42_18/133.png    NaN    NaN   

       y_min  y_max label newfilename  
0        Na