In [1]:
using DelimitedFiles, DataFrames, CSV, Random
using Plots

In [2]:
PATH = pwd() * "/Results/Data/"
filenames = readdir(PATH);

In [5]:
s = 24000;
ChemFieldFiles = filenames[contains.(filenames, "Chemical")]
SecretionFiles = filenames[contains.(filenames, "Secretion")]
UptakeFiles = filenames[contains.(filenames, "Uptake")];

In [10]:
@assert size(unique(ChemFieldFiles),1) == s
@assert size(unique(SecretionFiles),1) == s
@assert size(unique(UptakeFiles),1) == s

In [37]:
trIdx = (randperm(s) .- 1)[1:16000];
idx = setdiff(0:s-1, trIdx);
teIdx = shuffle(idx)[1:Int(0.5*size(idx,1))];
valIdx = setdiff(idx, teIdx);
@assert size(trIdx,1) == 16000
@assert size(teIdx,1) == 4000
@assert size(valIdx,1) == 4000
@assert size(vcat(trIdx, teIdx, valIdx),1) == s
@assert size(unique(vcat(trIdx, teIdx, valIdx)),1) == s

In [38]:
trCSV = DataFrame(Chemical = ["Chemical$(i).dat" for i in trIdx], 
    Uptake = ["Uptake$(i).dat" for i in trIdx], Secretion = ["Secretion$(i).dat" for i in trIdx]);
teCSV = DataFrame(Chemical = ["Chemical$(i).dat" for i in teIdx], 
    Uptake = ["Uptake$(i).dat" for i in teIdx], Secretion = ["Secretion$(i).dat" for i in teIdx]);
valCSV = DataFrame(Chemical = ["Chemical$(i).dat" for i in valIdx], 
    Uptake = ["Uptake$(i).dat" for i in valIdx], Secretion = ["Secretion$(i).dat" for i in valIdx]);

In [39]:
CSV.write(PATH * "train.csv", trCSV);
CSV.write(PATH * "test.csv", teCSV);
CSV.write(PATH * "val.csv", valCSV);

In [40]:
isdir(PATH * "train") || mkpath(PATH * "train")
isdir(PATH * "test") || mkpath(PATH * "test")
isdir(PATH * "val") || mkpath(PATH * "val")
for i in 1:size(trIdx,1)
    for j in 1:3
        mv(PATH * trCSV[i,j], PATH * "train/" * trCSV[i,j])
    end
end
for i in 1:size(teIdx,1)
    for j in 1:3
        mv(PATH * teCSV[i,j], PATH * "test/" * teCSV[i,j])
    end
end
for i in 1:size(valIdx,1)
    for j in 1:3
        mv(PATH * valCSV[i,j], PATH * "val/" * valCSV[i,j])
    end
end