In [11]:
import os
import math
import configparser
import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors
import tensorflow as tf
from tensorflow import keras
import py
import mylib
import cv2 as cv
import pytesseract
from tqdm import tqdm
from typing import Optional, List, Dict, Set, Tuple
from scml.nlp import strip_punctuation, to_ascii_str

In [12]:
#os.environ["OMP_THREAD_LIMIT"] = "1"
pd.set_option("use_inf_as_na", True)
pd.set_option("display.max_columns", 9999)
pd.set_option("display.max_rows", 9999)
pd.set_option('max_colwidth', 9999)

In [13]:
train = pd.read_csv("input/train.csv", engine="c", low_memory=False)
train["target"] = mylib.target_label(train)
train["image_path"] = "input/train_images/" + train["image"]
posting_ids = train["posting_id"].tolist()
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 34250 entries, 0 to 34249
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   posting_id   34250 non-null  object
 1   image        34250 non-null  object
 2   image_phash  34250 non-null  object
 3   title        34250 non-null  object
 4   label_group  34250 non-null  int64 
 5   target       34250 non-null  object
 6   image_path   34250 non-null  object
dtypes: int64(1), object(6)
memory usage: 1.8+ MB


In [14]:
model_dir = "models/eb0_arc_20210508_0000"
m0 = keras.models.load_model(f"{model_dir}/trial_0/model.h5")
m0 = keras.models.Model(inputs=m0.input[0], outputs=m0.get_layer("embedding_output").output)
m0.summary()

Model: "model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
image_input (InputLayer)     [(None, 224, 224, 3)]     0         
_________________________________________________________________
efficientnetb0 (Functional)  (None, 1280)              4049571   
_________________________________________________________________
layer_normalization_1 (Layer (None, 1280)              2560      
_________________________________________________________________
dense_1 (Dense)              (None, 1280)              1639680   
_________________________________________________________________
embedding_output (LayerNorma (None, 1280)              2560      
Total params: 5,694,371
Trainable params: 1,644,800
Non-trainable params: 4,049,571
_________________________________________________________________


In [15]:
MODEL = 'efficientnetb0'
CONF = configparser.ConfigParser()
CONF.read("app.ini")
resolution = int(CONF[MODEL]["resolution"])
print(f"resolution={resolution}")

resolution=224


In [16]:
idg = keras.preprocessing.image.ImageDataGenerator(
    #shear_range=0.2,
    #zoom_range=0.2,
    #horizontal_flip=True,
    rescale=1./255,
    data_format="channels_last",
    dtype=np.float32
)
data = idg.flow_from_dataframe(
    dataframe=train,
    x_col="image",
    y_col="label_group",
    directory="input/train_images",
    target_size=(resolution, resolution),
    color_mode="rgb",
    batch_size=1024,
    shuffle=False,
    class_mode="raw",
    interpolation="nearest",
)
y0 = m0.predict(data, verbose=1)
#y1 = m1.predict(data, verbose=1)
#y2 = m2.predict(data, verbose=1)
#y3 = m3.predict(data, verbose=1)
#y4 = m4.predict(data, verbose=1)
#assert y0.shape == y1.shape == y2.shape == y3.shape == y4.shape
print(f"y0.shape={y0.shape}")

Found 34250 validated image filenames.
y0.shape=(34250, 1280)


In [17]:
#res = []
#for i in range(len(y0)):
    #a = np.vstack((y0[i], y1[i], y2[i], y3[i], y4[i]))
    #a = np.vstack((y0[i], y1[i]))
    #m = np.mean(a, axis=0)
    #res.append(m)
#em = np.array(res, dtype=np.float32)
#assert y0.shape == em.shape
#print(f"em.shape={em.shape}")

In [18]:
em = y0.astype(np.float32)
print(f"em.shape={em.shape}")

em.shape=(34250, 1280)


In [51]:
%%time
threshold = 0.0001
nn = NearestNeighbors(
    n_neighbors=min(49, len(posting_ids) - 1), metric="euclidean"
)
nn.fit(em)
distances, indices = nn.kneighbors()
res: List[List[str]] = [[] for _ in range(len(indices))]
for i in range(len(indices)):
    for j in range(len(indices[0])):
        if distances[i][j] > threshold:
            break
        res[i].append(posting_ids[indices[i][j]])
train["image_matches"] = res

CPU times: user 1min 31s, sys: 22.9 s, total: 1min 54s
Wall time: 53.8 s


th=.25, f1=.586
th=.30, f1=.586
th=.35, f1=.587
th=.40, f1=.583

In [33]:
%%time
train["phash_matches"] = mylib.phash_matches(train, threshold=0.3)

CPU times: user 31.3 s, sys: 16.2 s, total: 47.6 s
Wall time: 48.4 s


In [34]:
%%time
train["title_p"] = train.apply(mylib.preprocess("title"), axis=1)

CPU times: user 34.6 s, sys: 219 ms, total: 34.8 s
Wall time: 35.3 s


In [35]:
%%time
st_name = "stsb-distilbert-base"
#st_name = "paraphrase-distilroberta-base-v1"
#st_name = "paraphrase-xlm-r-multilingual-v1"
train["title_matches"] = mylib.sbert_matches(
    model_path=f"pretrained/sentence-transformers/{st_name}",
    sentences=train["title_p"].tolist(),
    posting_ids=posting_ids,
    threshold=0.5
)

CPU times: user 1h 22min 21s, sys: 48.5 s, total: 1h 23min 10s
Wall time: 20min 36s


In [36]:
def erode_dilate(img):
    kernel = np.ones((2, 2), np.uint8)
    img = cv.erode(img, kernel, iterations=1)
    img = cv.dilate(img, kernel, iterations=1)
    return img


def image_to_text(img_path, mode: str, timeout: float, neighbours: int=41, psm: int=3) -> Optional[str]:
    config = f"--psm {psm}"
    s1, s2 = None, None
    img = cv.imread(img_path, cv.IMREAD_GRAYSCALE)
    #img = cv.resize(img, None, fx=0.5, fy=0.5, interpolation=cv.INTER_AREA)
    img = cv.medianBlur(img, 3)
    if mode == "binary_inverted" or mode == "binary":
        th = cv.adaptiveThreshold(img, 255, cv.ADAPTIVE_THRESH_GAUSSIAN_C, cv.THRESH_BINARY, neighbours, 2)
        th = erode_dilate(th)
        try:
            s1 = pytesseract.image_to_string(th, timeout=timeout, config=config)
        except:
            s1 = None
    if mode == "binary_inverted" or mode == "inverted":
        th = cv.adaptiveThreshold(img, 255, cv.ADAPTIVE_THRESH_GAUSSIAN_C, cv.THRESH_BINARY_INV, neighbours, 2)
        th = erode_dilate(th)
        try:
            s2 = pytesseract.image_to_string(th, timeout=timeout, config=config)
        except:
            s2 = None
    if s1 is None and s2 is None:
        return None
    tokens = []
    if s1 is not None:
        s1 = to_ascii_str(s1)
        s1 = strip_punctuation(s1)
        tokens += s1.split()
    if s2 is not None:
        s2 = to_ascii_str(s2)
        s2 = strip_punctuation(s2)
        tokens += s2.split()
    return " ".join(tokens)

In [37]:
OCR = False
if OCR:
    res = []
    n_timeout = 0
    for t in tqdm(train.itertuples()):
        img_path = getattr(t, "image_path")
        s = image_to_text(img_path, mode="inverted", timeout=0.4, neighbours=41, psm=11)
        if s is None:
            s = ""
            n_timeout += 1
        res.append(s)
    print(f"n_timeout={n_timeout}")

In [38]:
if OCR:
    train["itext"] = res
    train["text"] = train["title"] + " " + train["itext"]
    cols = ["text", "itext", "title"]
    train[cols].head()

In [39]:
%%time
if OCR:
    train["text_p"] = train.apply(mylib.preprocess("text"), axis=1)

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 11.7 µs


In [40]:
if OCR:
    st_name = "stsb-distilbert-base"
    #st_name = "paraphrase-distilroberta-base-v1"
    #st_name = "paraphrase-xlm-r-multilingual-v1"
    train["text_matches"] = mylib.sbert_matches(
        model_path=f"pretrained/sentence-transformers/{st_name}",
        sentences=train["text_p"].tolist(),
        posting_ids=posting_ids,
        threshold=0.5
    )

In [52]:
fs = ["phash_matches", "title_matches", "image_matches"]
if OCR:
    fs.append("text_matches")
train["matches"] = train.apply(mylib.combine_as_list(fs), axis=1)
train["f1"] = train.apply(mylib.metric_per_row("matches"), axis=1)
print(f"Combined score={train.f1.mean():.3f}")

Combined score=0.515


In [50]:
res = [
    {
        "score": 0.514,
        "phash_threshold": 0.3,
        "title_threshold": 0.5,
        "image_threshold": 0.001,
        "text_threshold": None,
        "ocr_threshold": None,
        "ocr_timeout": None,
        "ocr_neighbours": None,
        "ocr_psm": None
    },
    {
        "score": 0.498,
        "phash_threshold": 0.3,
        "title_threshold": 0.5,
        "image_threshold": 0.01,
        "text_threshold": None,
        "ocr_threshold": None,
        "ocr_timeout": None,
        "ocr_neighbours": None,
        "ocr_psm": None
    },
    {
        "score": 0.136,
        "phash_threshold": 0.3,
        "title_threshold": 0.5,
        "image_threshold": 0.05,
        "text_threshold": None,
        "ocr_threshold": None,
        "ocr_timeout": None,
        "ocr_neighbours": None,
        "ocr_psm": None
    },
    {
        "score": 0.674,
        "phash_threshold": 0.3,
        "title_threshold": 0.5,
        "text_threshold": 0.5,
        "image_threshold": None,
        "ocr_threshold": "inverted",
        "ocr_timeout": 0.4,
        "ocr_neighbours": 41,
        "ocr_psm": 11
    },
    {
        "score": 0.674,
        "phash_threshold": 0.3,
        "title_threshold": 0.5,
        "text_threshold": 0.5,
        "image_threshold": None,
        "ocr_threshold": "binary",
        "ocr_timeout": 0.4,
        "ocr_neighbours": 41,
        "ocr_psm": 11
    },
    {
        "score": 0.674,
        "phash_threshold": 0.3,
        "title_threshold": 0.5,
        "image_threshold": None,
        "text_threshold": None,
        "ocr_threshold": None,
        "ocr_timeout": None,
        "ocr_neighbours": None,
        "ocr_psm": None
    }
]
df = pd.DataFrame.from_records(res)
df.sort_values("score", ascending=False, inplace=True, ignore_index=True)
df.T.head(30)

Unnamed: 0,0,1,2,3,4,5
score,0.674,0.674,0.674,0.514,0.498,0.136
phash_threshold,0.3,0.3,0.3,0.3,0.3,0.3
title_threshold,0.5,0.5,0.5,0.5,0.5,0.5
image_threshold,,,,0.001,0.01,0.05
text_threshold,0.5,0.5,,,,
ocr_threshold,inverted,binary,,,,
ocr_timeout,0.4,0.4,,,,
ocr_neighbours,41.0,41.0,,,,
ocr_psm,11.0,11.0,,,,


In [43]:
cols = ["f1", "target", "matches"] + fs
train[cols].head(30)

Unnamed: 0,f1,target,matches,phash_matches,title_matches,image_matches
0,0.038462,"[train_3497907844, train_4175229751, train_2930186018, train_515008716, train_1354360830, train_516663932, train_2677100375, train_3480877149, train_2041937727, train_1699906038, train_3534764813, train_664339299, train_1296711926, train_1591104350, train_2918032299, train_2531777612, train_3731734583, train_1086903685, train_658862317, train_2123729460, train_2708192355, train_3454652975, train_1913449144, train_1540761220, train_3195325438, train_365461299, train_4060693827, train_2664816648, train_3332324456, train_1850201761, train_3956550694, train_1799291694, train_3178715139, train_2230152472, train_3945800827, train_2789820394, train_4003658555, train_240158006, train_2497676427, train_1943472851, train_711042017, train_2146279151, train_2813067780, train_846192553, train_627475146, train_3366673512, train_4199111972, train_2114123891, train_4178955354, train_112182868, train_3668806308]",[train_112182868],[],[],[]
1,0.038462,"[train_4206743389, train_984412308, train_121852154, train_762535630, train_3517115705, train_2262735115, train_3755300225, train_368430234, train_2317288464, train_3208392688, train_1458933053, train_4282331229, train_3949355563, train_1086352645, train_1446216827, train_2642455336, train_3838978688, train_98545496, train_3569591480, train_797513730, train_863149549, train_1580073987, train_2081303877, train_3194609880, train_678936454, train_2677777576, train_864736904, train_2184916710, train_1551941123, train_902429172, train_3338833167, train_1079287407, train_1966427691, train_2974564568, train_225670337, train_1905044128, train_1806160391, train_592604757, train_2128389901, train_4028145462, train_2333705544, train_686541160, train_2710828031, train_3340371184, train_1724556465, train_3003421228, train_1166656904, train_150004523, train_2057764463, train_1273401808, train_219281992]",[train_2333705544],[],[],[]
2,0.038462,"[train_4184037897, train_1010868925, train_1561375840, train_2963630570, train_2382946865, train_2486671168, train_61811259, train_2358518833, train_1521931249, train_408229003, train_543954089, train_3486600899, train_417261045, train_2344885693, train_226112794, train_1423119969, train_1374193074, train_880338666, train_210204308, train_2073151758, train_303073611, train_1750065022, train_59393500, train_2864046583, train_2566709185, train_3307737696, train_47863261, train_11694834, train_3434987130, train_1000804730, train_1500350068, train_1641622956, train_3173013958, train_784571098, train_2074205316, train_1302902041, train_1785829508, train_1714224702, train_2919386341, train_3897008118, train_64918479, train_843663648, train_477561587, train_1643485252, train_889614566, train_118604281, train_1157582002, train_1710200794, train_3099122287, train_1431505272, train_2767483557]",[train_3486600899],[],[],[]
3,0.038462,"[train_3497907844, train_4175229751, train_2930186018, train_515008716, train_1354360830, train_516663932, train_2677100375, train_3480877149, train_2041937727, train_1699906038, train_3534764813, train_664339299, train_1296711926, train_1591104350, train_2918032299, train_2531777612, train_3731734583, train_1086903685, train_658862317, train_2123729460, train_2708192355, train_3454652975, train_1913449144, train_1540761220, train_3195325438, train_365461299, train_4060693827, train_2664816648, train_3332324456, train_1850201761, train_3956550694, train_1799291694, train_3178715139, train_2230152472, train_3945800827, train_2789820394, train_4003658555, train_240158006, train_2497676427, train_1943472851, train_711042017, train_2146279151, train_2813067780, train_846192553, train_627475146, train_3366673512, train_4199111972, train_2114123891, train_4178955354, train_112182868, train_3668806308]",[train_3178715139],[],[],[]
4,0.038462,"[train_4184037897, train_1010868925, train_1561375840, train_2963630570, train_2382946865, train_2486671168, train_61811259, train_2358518833, train_1521931249, train_408229003, train_543954089, train_3486600899, train_417261045, train_2344885693, train_226112794, train_1423119969, train_1374193074, train_880338666, train_210204308, train_2073151758, train_303073611, train_1750065022, train_59393500, train_2864046583, train_2566709185, train_3307737696, train_47863261, train_11694834, train_3434987130, train_1000804730, train_1500350068, train_1641622956, train_3173013958, train_784571098, train_2074205316, train_1302902041, train_1785829508, train_1714224702, train_2919386341, train_3897008118, train_64918479, train_843663648, train_477561587, train_1643485252, train_889614566, train_118604281, train_1157582002, train_1710200794, train_3099122287, train_1431505272, train_2767483557]",[train_2486671168],[],[],[]
5,0.038462,"[train_3497907844, train_4175229751, train_2930186018, train_515008716, train_1354360830, train_516663932, train_2677100375, train_3480877149, train_2041937727, train_1699906038, train_3534764813, train_664339299, train_1296711926, train_1591104350, train_2918032299, train_2531777612, train_3731734583, train_1086903685, train_658862317, train_2123729460, train_2708192355, train_3454652975, train_1913449144, train_1540761220, train_3195325438, train_365461299, train_4060693827, train_2664816648, train_3332324456, train_1850201761, train_3956550694, train_1799291694, train_3178715139, train_2230152472, train_3945800827, train_2789820394, train_4003658555, train_240158006, train_2497676427, train_1943472851, train_711042017, train_2146279151, train_2813067780, train_846192553, train_627475146, train_3366673512, train_4199111972, train_2114123891, train_4178955354, train_112182868, train_3668806308]",[train_2114123891],[],[],[]
6,0.038462,"[train_4184037897, train_1010868925, train_1561375840, train_2963630570, train_2382946865, train_2486671168, train_61811259, train_2358518833, train_1521931249, train_408229003, train_543954089, train_3486600899, train_417261045, train_2344885693, train_226112794, train_1423119969, train_1374193074, train_880338666, train_210204308, train_2073151758, train_303073611, train_1750065022, train_59393500, train_2864046583, train_2566709185, train_3307737696, train_47863261, train_11694834, train_3434987130, train_1000804730, train_1500350068, train_1641622956, train_3173013958, train_784571098, train_2074205316, train_1302902041, train_1785829508, train_1714224702, train_2919386341, train_3897008118, train_64918479, train_843663648, train_477561587, train_1643485252, train_889614566, train_118604281, train_1157582002, train_1710200794, train_3099122287, train_1431505272, train_2767483557]",[train_1010868925],[],[],[]
7,0.038462,"[train_4184037897, train_1010868925, train_1561375840, train_2963630570, train_2382946865, train_2486671168, train_61811259, train_2358518833, train_1521931249, train_408229003, train_543954089, train_3486600899, train_417261045, train_2344885693, train_226112794, train_1423119969, train_1374193074, train_880338666, train_210204308, train_2073151758, train_303073611, train_1750065022, train_59393500, train_2864046583, train_2566709185, train_3307737696, train_47863261, train_11694834, train_3434987130, train_1000804730, train_1500350068, train_1641622956, train_3173013958, train_784571098, train_2074205316, train_1302902041, train_1785829508, train_1714224702, train_2919386341, train_3897008118, train_64918479, train_843663648, train_477561587, train_1643485252, train_889614566, train_118604281, train_1157582002, train_1710200794, train_3099122287, train_1431505272, train_2767483557]",[train_2358518833],[],[],[]
8,0.038462,"[train_3497907844, train_4175229751, train_2930186018, train_515008716, train_1354360830, train_516663932, train_2677100375, train_3480877149, train_2041937727, train_1699906038, train_3534764813, train_664339299, train_1296711926, train_1591104350, train_2918032299, train_2531777612, train_3731734583, train_1086903685, train_658862317, train_2123729460, train_2708192355, train_3454652975, train_1913449144, train_1540761220, train_3195325438, train_365461299, train_4060693827, train_2664816648, train_3332324456, train_1850201761, train_3956550694, train_1799291694, train_3178715139, train_2230152472, train_3945800827, train_2789820394, train_4003658555, train_240158006, train_2497676427, train_1943472851, train_711042017, train_2146279151, train_2813067780, train_846192553, train_627475146, train_3366673512, train_4199111972, train_2114123891, train_4178955354, train_112182868, train_3668806308]",[train_2789820394],[],[],[]
9,0.038462,"[train_3497907844, train_4175229751, train_2930186018, train_515008716, train_1354360830, train_516663932, train_2677100375, train_3480877149, train_2041937727, train_1699906038, train_3534764813, train_664339299, train_1296711926, train_1591104350, train_2918032299, train_2531777612, train_3731734583, train_1086903685, train_658862317, train_2123729460, train_2708192355, train_3454652975, train_1913449144, train_1540761220, train_3195325438, train_365461299, train_4060693827, train_2664816648, train_3332324456, train_1850201761, train_3956550694, train_1799291694, train_3178715139, train_2230152472, train_3945800827, train_2789820394, train_4003658555, train_240158006, train_2497676427, train_1943472851, train_711042017, train_2146279151, train_2813067780, train_846192553, train_627475146, train_3366673512, train_4199111972, train_2114123891, train_4178955354, train_112182868, train_3668806308]",[train_2708192355],[],[],[]


In [44]:
train.sort_values("f1", ascending=True, inplace=True, ignore_index=True)
train[cols].head()

Unnamed: 0,f1,target,matches,phash_matches,title_matches,image_matches
0,0.024096,"[train_4206743389, train_984412308, train_121852154, train_762535630, train_3517115705, train_2262735115, train_3755300225, train_368430234, train_2317288464, train_3208392688, train_1458933053, train_4282331229, train_3949355563, train_1086352645, train_1446216827, train_2642455336, train_3838978688, train_98545496, train_3569591480, train_797513730, train_863149549, train_1580073987, train_2081303877, train_3194609880, train_678936454, train_2677777576, train_864736904, train_2184916710, train_1551941123, train_902429172, train_3338833167, train_1079287407, train_1966427691, train_2974564568, train_225670337, train_1905044128, train_1806160391, train_592604757, train_2128389901, train_4028145462, train_2333705544, train_686541160, train_2710828031, train_3340371184, train_1724556465, train_3003421228, train_1166656904, train_150004523, train_2057764463, train_1273401808, train_219281992]","[train_2264749547, train_4018291087, train_3717579619, train_1460066202, train_3353232992, train_3990138687, train_610451951, train_2666290091, train_2695036247, train_2718842453, train_516309314, train_4248334667, train_1344984885, train_1819066328, train_1447215678, train_57158484, train_3225495514, train_686441719, train_2589489734, train_1785659805, train_3302866803, train_2959620192, train_313149015, train_3425409472, train_4153355828, train_1477692249, train_4238545699, train_1893838090, train_1318281105, train_984412308, train_540806590, train_46242622]",[],"[train_3353232992, train_4153355828, train_2264749547, train_3225495514, train_46242622, train_4018291087, train_540806590, train_313149015, train_4238545699, train_57158484, train_3425409472, train_4248334667, train_1460066202, train_2666290091, train_1447215678, train_2718842453, train_1477692249, train_610451951, train_1344984885, train_1893838090, train_516309314, train_1318281105, train_3990138687, train_686441719, train_2959620192, train_3302866803, train_3717579619, train_2589489734, train_2695036247, train_1819066328, train_1785659805]",[]
1,0.024096,"[train_4206743389, train_984412308, train_121852154, train_762535630, train_3517115705, train_2262735115, train_3755300225, train_368430234, train_2317288464, train_3208392688, train_1458933053, train_4282331229, train_3949355563, train_1086352645, train_1446216827, train_2642455336, train_3838978688, train_98545496, train_3569591480, train_797513730, train_863149549, train_1580073987, train_2081303877, train_3194609880, train_678936454, train_2677777576, train_864736904, train_2184916710, train_1551941123, train_902429172, train_3338833167, train_1079287407, train_1966427691, train_2974564568, train_225670337, train_1905044128, train_1806160391, train_592604757, train_2128389901, train_4028145462, train_2333705544, train_686541160, train_2710828031, train_3340371184, train_1724556465, train_3003421228, train_1166656904, train_150004523, train_2057764463, train_1273401808, train_219281992]","[train_2264749547, train_4018291087, train_3717579619, train_1460066202, train_3353232992, train_3990138687, train_610451951, train_2666290091, train_2695036247, train_2718842453, train_516309314, train_4248334667, train_1344984885, train_1819066328, train_1447215678, train_1352413166, train_57158484, train_3225495514, train_686441719, train_2589489734, train_1785659805, train_3302866803, train_3340371184, train_2959620192, train_313149015, train_3425409472, train_1477692249, train_4238545699, train_1893838090, train_1318281105, train_540806590, train_46242622]",[],"[train_3353232992, train_1352413166, train_2264749547, train_3225495514, train_46242622, train_4018291087, train_540806590, train_313149015, train_4238545699, train_57158484, train_3425409472, train_4248334667, train_1460066202, train_2666290091, train_1447215678, train_2718842453, train_1477692249, train_610451951, train_1344984885, train_1893838090, train_516309314, train_1318281105, train_3990138687, train_686441719, train_2959620192, train_3302866803, train_3717579619, train_2589489734, train_2695036247, train_1819066328, train_1785659805]",[]
2,0.024691,"[train_4206743389, train_984412308, train_121852154, train_762535630, train_3517115705, train_2262735115, train_3755300225, train_368430234, train_2317288464, train_3208392688, train_1458933053, train_4282331229, train_3949355563, train_1086352645, train_1446216827, train_2642455336, train_3838978688, train_98545496, train_3569591480, train_797513730, train_863149549, train_1580073987, train_2081303877, train_3194609880, train_678936454, train_2677777576, train_864736904, train_2184916710, train_1551941123, train_902429172, train_3338833167, train_1079287407, train_1966427691, train_2974564568, train_225670337, train_1905044128, train_1806160391, train_592604757, train_2128389901, train_4028145462, train_2333705544, train_686541160, train_2710828031, train_3340371184, train_1724556465, train_3003421228, train_1166656904, train_150004523, train_2057764463, train_1273401808, train_219281992]","[train_1447215678, train_4018291087, train_1352413166, train_3717579619, train_57158484, train_3225495514, train_686441719, train_1460066202, train_3353232992, train_2589489734, train_1785659805, train_2959620192, train_313149015, train_2348197741, train_3425409472, train_610451951, train_4153355828, train_2666290091, train_2695036247, train_2718842453, train_1477692249, train_1893838090, train_4238545699, train_516309314, train_1318281105, train_4248334667, train_4206743389, train_1819066328, train_540806590, train_46242622]",[],"[train_3717579619, train_313149015, train_1893838090, train_686441719, train_1447215678, train_4248334667, train_3425409472, train_2666290091, train_1318281105, train_4238545699, train_46242622, train_516309314, train_1460066202, train_2959620192, train_3225495514, train_2718842453, train_540806590, train_4018291087, train_57158484, train_3353232992, train_4153355828, train_1352413166, train_1477692249, train_610451951, train_2348197741, train_2695036247, train_1785659805, train_2589489734, train_1819066328]",[]
3,0.024691,"[train_4206743389, train_984412308, train_121852154, train_762535630, train_3517115705, train_2262735115, train_3755300225, train_368430234, train_2317288464, train_3208392688, train_1458933053, train_4282331229, train_3949355563, train_1086352645, train_1446216827, train_2642455336, train_3838978688, train_98545496, train_3569591480, train_797513730, train_863149549, train_1580073987, train_2081303877, train_3194609880, train_678936454, train_2677777576, train_864736904, train_2184916710, train_1551941123, train_902429172, train_3338833167, train_1079287407, train_1966427691, train_2974564568, train_225670337, train_1905044128, train_1806160391, train_592604757, train_2128389901, train_4028145462, train_2333705544, train_686541160, train_2710828031, train_3340371184, train_1724556465, train_3003421228, train_1166656904, train_150004523, train_2057764463, train_1273401808, train_219281992]","[train_1447215678, train_426994012, train_2264749547, train_1352413166, train_3717579619, train_57158484, train_3225495514, train_686441719, train_1460066202, train_3353232992, train_2589489734, train_1344984885, train_1806160391, train_1785659805, train_2959620192, train_2348197741, train_313149015, train_3425409472, train_4153355828, train_2695036247, train_2666290091, train_2718842453, train_1477692249, train_1893838090, train_516309314, train_1318281105, train_4248334667, train_1819066328, train_540806590, train_46242622]",[],"[train_540806590, train_57158484, train_1477692249, train_516309314, train_2718842453, train_1318281105, train_2959620192, train_2589489734, train_1785659805, train_1819066328, train_2695036247, train_46242622, train_1352413166, train_4153355828, train_3353232992, train_2666290091, train_2348197741, train_1460066202, train_686441719, train_1893838090, train_313149015, train_1344984885, train_3425409472, train_4248334667, train_1447215678, train_2264749547, train_3717579619, train_3225495514]",[train_426994012]
4,0.025,"[train_4206743389, train_984412308, train_121852154, train_762535630, train_3517115705, train_2262735115, train_3755300225, train_368430234, train_2317288464, train_3208392688, train_1458933053, train_4282331229, train_3949355563, train_1086352645, train_1446216827, train_2642455336, train_3838978688, train_98545496, train_3569591480, train_797513730, train_863149549, train_1580073987, train_2081303877, train_3194609880, train_678936454, train_2677777576, train_864736904, train_2184916710, train_1551941123, train_902429172, train_3338833167, train_1079287407, train_1966427691, train_2974564568, train_225670337, train_1905044128, train_1806160391, train_592604757, train_2128389901, train_4028145462, train_2333705544, train_686541160, train_2710828031, train_3340371184, train_1724556465, train_3003421228, train_1166656904, train_150004523, train_2057764463, train_1273401808, train_219281992]","[train_1447215678, train_2264749547, train_4018291087, train_1352413166, train_225670337, train_3717579619, train_3225495514, train_686441719, train_1460066202, train_3353232992, train_2589489734, train_1344984885, train_1785659805, train_2959620192, train_313149015, train_2348197741, train_3425409472, train_4153355828, train_2666290091, train_2695036247, train_2718842453, train_1477692249, train_1893838090, train_516309314, train_1318281105, train_4248334667, train_1819066328, train_540806590, train_46242622]",[],"[train_540806590, train_4018291087, train_1477692249, train_516309314, train_1318281105, train_2959620192, train_2718842453, train_46242622, train_1460066202, train_2666290091, train_3353232992, train_4153355828, train_1352413166, train_1819066328, train_1785659805, train_2589489734, train_2695036247, train_1893838090, train_313149015, train_686441719, train_2348197741, train_1344984885, train_3425409472, train_4248334667, train_2264749547, train_1447215678, train_3717579619, train_3225495514]",[]
