In [1]:
import os
import math
import configparser
import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors
import tensorflow as tf
from tensorflow import keras
import py
import mylib
import cv2 as cv
import pytesseract
from tqdm import tqdm
from typing import Optional, List, Dict, Set, Tuple
from scml.nlp import strip_punctuation, to_ascii_str

In [2]:
IMAGE = True
TITLE = True
PHASH = True
OCR = False
MODEL = 'efficientnetb3'
pd.set_option("use_inf_as_na", True)
pd.set_option("display.max_columns", 9999)
pd.set_option("display.max_rows", 9999)
pd.set_option('max_colwidth', 9999)
#os.environ["OMP_THREAD_LIMIT"] = "1"
CONF = configparser.ConfigParser()
CONF.read("app.ini")
resolution = int(CONF[MODEL]["resolution"])
print(f"resolution={resolution}")

resolution=300


In [3]:
train = pd.read_csv("input/train.csv", engine="c", low_memory=False)
train["target"] = mylib.target_label(train)
train["image_path"] = "input/train_images/" + train["image"]
posting_ids = train["posting_id"].tolist()
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 34250 entries, 0 to 34249
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   posting_id   34250 non-null  object
 1   image        34250 non-null  object
 2   image_phash  34250 non-null  object
 3   title        34250 non-null  object
 4   label_group  34250 non-null  int64 
 5   target       34250 non-null  object
 6   image_path   34250 non-null  object
dtypes: int64(1), object(6)
memory usage: 1.8+ MB


In [4]:
%%time
# required for post-processing
train["title_p"] = train.apply(mylib.preprocess("title"), axis=1)

CPU times: user 35.7 s, sys: 188 ms, total: 35.9 s
Wall time: 36 s


In [5]:
imap = {}
for t in tqdm(train.itertuples()):
    pid = getattr(t, "posting_id")
    title = getattr(t, "title_p")
    imap[pid] = mylib.extract(title)

34250it [02:07, 268.18it/s]


# PHash
th=.25, f1=.586 | th=.30, f1=.586 | th=.35, f1=.587 | th=.40, f1=.583

In [6]:
%%time
if PHASH:
    train["phash_matches"] = mylib.phash_matches(train, threshold=0.3)

CPU times: user 57.7 s, sys: 53.6 s, total: 1min 51s
Wall time: 55.6 s


# Title

In [7]:
%%time
if TITLE:
    st_name = "stsb-distilbert-base"
    #st_name = "paraphrase-distilroberta-base-v1"
    #st_name = "paraphrase-xlm-r-multilingual-v1"
    train["title_matches"] = mylib.sbert_matches(
        model_path=f"pretrained/sentence-transformers/{st_name}",
        sentences=train["title_p"].tolist(),
        posting_ids=posting_ids,
        threshold=0.5
    )

CPU times: user 22min 14s, sys: 34.2 s, total: 22min 48s
Wall time: 4min 30s


# Image 

In [8]:
if IMAGE:
    model_dir = "models/eb3_arc_20210509_1400"
    m0 = keras.models.load_model(f"{model_dir}/trial_0/model.h5")
    m0 = keras.models.Model(inputs=m0.input[0], outputs=m0.get_layer("embedding_output").output)
    m0.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
image_input (InputLayer)     [(None, 300, 300, 3)]     0         
_________________________________________________________________
efficientnetb3 (Functional)  (None, 1536)              10783535  
_________________________________________________________________
layer_normalization_1 (Layer (None, 1536)              3072      
_________________________________________________________________
dense_1 (Dense)              (None, 1536)              2360832   
_________________________________________________________________
embedding_output (LayerNorma (None, 1536)              3072      
Total params: 13,150,511
Trainable params: 2,366,976
Non-trainable params: 10,783,535
_________________________________________________________________


In [9]:
if IMAGE:
    idg = keras.preprocessing.image.ImageDataGenerator(
        rescale=1./255,
        data_format="channels_last",
        dtype=np.float32
    )
    data = idg.flow_from_dataframe(
        dataframe=train,
        x_col="image",
        y_col="label_group",
        directory="input/train_images",
        target_size=(resolution, resolution),
        color_mode="rgb",
        batch_size=1024,
        shuffle=False,
        class_mode="raw",
        interpolation="nearest",
    )
    y0 = m0.predict(data, verbose=1)
    #y1 = m1.predict(data, verbose=1)
    #y2 = m2.predict(data, verbose=1)
    #y3 = m3.predict(data, verbose=1)
    #y4 = m4.predict(data, verbose=1)
    #assert y0.shape == y1.shape == y2.shape == y3.shape == y4.shape
    #print(f"y0.shape={y0.shape}")
    em = y0.astype(np.float32)
    print(f"em.shape={em.shape}")

Found 34250 validated image filenames.
em.shape=(34250, 1536)


In [10]:
#res = []
#for i in range(len(y0)):
    #a = np.vstack((y0[i], y1[i], y2[i], y3[i], y4[i]))
    #a = np.vstack((y0[i], y1[i]))
    #m = np.mean(a, axis=0)
    #res.append(m)
#em = np.array(res, dtype=np.float32)
#assert y0.shape == em.shape
#print(f"em.shape={em.shape}")

In [31]:
%%time
if IMAGE:
    threshold = 1e-3
    nn = NearestNeighbors(
        n_neighbors=min(49, len(posting_ids) - 1), metric="euclidean", n_jobs=-1
    )
    nn.fit(em)
    distances, indices = nn.kneighbors()
    res: List[List[str]] = [[] for _ in range(len(indices))]
    for i in range(len(indices)):
        for j in range(len(indices[0])):
            if distances[i][j] > threshold:
                break
            res[i].append(posting_ids[indices[i][j]])
    train["image_matches"] = res

CPU times: user 2min 28s, sys: 26.8 s, total: 2min 55s
Wall time: 57.5 s


# OCR

In [32]:
def erode_dilate(img):
    kernel = np.ones((2, 2), np.uint8)
    img = cv.erode(img, kernel, iterations=1)
    img = cv.dilate(img, kernel, iterations=1)
    return img


def image_to_text(img_path, mode: str, timeout: float, neighbours: int=41, psm: int=3) -> Optional[str]:
    config = f"--psm {psm}"
    s1, s2 = None, None
    img = cv.imread(img_path, cv.IMREAD_GRAYSCALE)
    #img = cv.resize(img, None, fx=0.5, fy=0.5, interpolation=cv.INTER_AREA)
    img = cv.medianBlur(img, 3)
    if mode == "binary_inverted" or mode == "binary":
        th = cv.adaptiveThreshold(img, 255, cv.ADAPTIVE_THRESH_GAUSSIAN_C, cv.THRESH_BINARY, neighbours, 2)
        th = erode_dilate(th)
        try:
            s1 = pytesseract.image_to_string(th, timeout=timeout, config=config)
        except:
            s1 = None
    if mode == "binary_inverted" or mode == "inverted":
        th = cv.adaptiveThreshold(img, 255, cv.ADAPTIVE_THRESH_GAUSSIAN_C, cv.THRESH_BINARY_INV, neighbours, 2)
        th = erode_dilate(th)
        try:
            s2 = pytesseract.image_to_string(th, timeout=timeout, config=config)
        except:
            s2 = None
    if s1 is None and s2 is None:
        return None
    tokens = []
    if s1 is not None:
        s1 = to_ascii_str(s1)
        s1 = strip_punctuation(s1)
        tokens += s1.split()
    if s2 is not None:
        s2 = to_ascii_str(s2)
        s2 = strip_punctuation(s2)
        tokens += s2.split()
    return " ".join(tokens)

In [33]:
if OCR:
    res = []
    n_timeout = 0
    for t in tqdm(train.itertuples()):
        img_path = getattr(t, "image_path")
        s = image_to_text(img_path, mode="inverted", timeout=0.4, neighbours=41, psm=11)
        if s is None:
            s = ""
            n_timeout += 1
        res.append(s)
    print(f"n_timeout={n_timeout}")

In [34]:
if OCR:
    train["itext"] = res
    train["text"] = train["title"] + " " + train["itext"]
    cols = ["text", "itext", "title"]
    train[cols].head()

In [35]:
%%time
if OCR:
    train["text_p"] = train.apply(mylib.preprocess("text"), axis=1)

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 10.7 µs


In [36]:
if OCR:
    st_name = "stsb-distilbert-base"
    #st_name = "paraphrase-distilroberta-base-v1"
    #st_name = "paraphrase-xlm-r-multilingual-v1"
    train["text_matches"] = mylib.sbert_matches(
        model_path=f"pretrained/sentence-transformers/{st_name}",
        sentences=train["text_p"].tolist(),
        posting_ids=posting_ids,
        threshold=0.5
    )

# Result

In [37]:
fs = []
if IMAGE:
    fs.append("image_matches")
if TITLE:
    fs.append("title_matches")
if PHASH:
    fs.append("phash_matches")
if OCR:
    fs.append("text_matches")
train["matches"] = train.apply(mylib.combine_as_list(
    fs,
    imap=imap,
    brand_threshold=0.5,
    measurement_threshold=0.5,
), axis=1)
train["f1"] = train.apply(mylib.metric_per_row("matches"), axis=1)
print(f"Combined score={train.f1.mean():.3f}")

Combined score=0.644


In [38]:
res = [
    {
        "score": 0.654,
        "phash_threshold": 0.3,
        "title_threshold": 0.5,
        "image_threshold": 1e-6,
        "image_pretrained": "enb3",
        "brand_theshold": 0.3,
        "measurement_threshold": 0.3,
        "text_threshold": None,
        "ocr_threshold": None,
        "ocr_timeout": None,
        "ocr_neighbours": None,
        "ocr_psm": None
    },
    {
        "score": 0.654,
        "phash_threshold": 0.3,
        "title_threshold": 0.5,
        "image_threshold": 1e-5,
        "image_pretrained": "enb3",
        "brand_theshold": 0.3,
        "measurement_threshold": 0.3,
        "text_threshold": None,
        "ocr_threshold": None,
        "ocr_timeout": None,
        "ocr_neighbours": None,
        "ocr_psm": None
    },
    {
        "score": 0.654,
        "phash_threshold": 0.3,
        "title_threshold": 0.5,
        "image_threshold": 1e-4,
        "image_pretrained": "enb3",
        "brand_theshold": 0.3,
        "measurement_threshold": 0.3,
        "text_threshold": None,
        "ocr_threshold": None,
        "ocr_timeout": None,
        "ocr_neighbours": None,
        "ocr_psm": None
    },
    {
        "score": 0.645,
        "phash_threshold": 0.3,
        "title_threshold": 0.5,
        "image_threshold": 1e-3,
        "image_pretrained": "enb3",
        "brand_theshold": 0.3,
        "measurement_threshold": 0.3,
        "text_threshold": None,
        "ocr_threshold": None,
        "ocr_timeout": None,
        "ocr_neighbours": None,
        "ocr_psm": None
    },
    {
        "score": 0.656,
        "phash_threshold": 0.3,
        "title_threshold": 0.5,
        "image_threshold": 5e-3,
        "image_pretrained": "enb3",
        "text_threshold": None,
        "ocr_threshold": None,
        "ocr_timeout": None,
        "ocr_neighbours": None,
        "ocr_psm": None
    },
    {
        "score": 0.522,
        "phash_threshold": None,
        "title_threshold": None,
        "image_threshold": 5e-3,
        "image_pretrained": "enb3",
        "text_threshold": None,
        "ocr_threshold": None,
        "ocr_timeout": None,
        "ocr_neighbours": None,
        "ocr_psm": None
    },
    {
        "score": 0.473,
        "phash_threshold": None,
        "title_threshold": None,
        "image_threshold": 0.01,
        "image_pretrained": "enb3",
        "text_threshold": None,
        "ocr_threshold": None,
        "ocr_timeout": None,
        "ocr_neighbours": None,
        "ocr_psm": None
    },
    {
        "score": 0.502,
        "phash_threshold": None,
        "title_threshold": None,
        "image_threshold": 1e-3,
        "image_pretrained": "enb3",
        "text_threshold": None,
        "ocr_threshold": None,
        "ocr_timeout": None,
        "ocr_neighbours": None,
        "ocr_psm": None
    },
    {
        "score": 0.651,
        "phash_threshold": 0.2,
        "title_threshold": 0.5,
        "image_threshold": 1e-4,
        "image_pretrained": "enb3",
        "text_threshold": None,
        "ocr_threshold": None,
        "ocr_timeout": None,
        "ocr_neighbours": None,
        "ocr_psm": None
    },
    {
        "score": 0.654,
        "phash_threshold": 0.2,
        "title_threshold": 0.5,
        "image_threshold": 1e-5,
        "image_pretrained": "enb3",
        "text_threshold": None,
        "ocr_threshold": None,
        "ocr_timeout": None,
        "ocr_neighbours": None,
        "ocr_psm": None
    },
    {
        "score": 0.658,
        "phash_threshold": 0.3,
        "title_threshold": 0.5,
        "image_threshold": 1e-5,
        "image_pretrained": "enb3",
        "text_threshold": None,
        "ocr_threshold": None,
        "ocr_timeout": None,
        "ocr_neighbours": None,
        "ocr_psm": None
    },
    {
        "score": 0.656,
        "phash_threshold": 0.3,
        "title_threshold": 0.5,
        "image_threshold": 1e-4,
        "image_pretrained": "enb3",
        "text_threshold": None,
        "ocr_threshold": None,
        "ocr_timeout": None,
        "ocr_neighbours": None,
        "ocr_psm": None
    },
    {
        "score": 0.562,
        "phash_threshold": 0.3,
        "title_threshold": 0.5,
        "image_threshold": 0.001,
        "image_pretrained": "enb3",
        "text_threshold": None,
        "ocr_threshold": None,
        "ocr_timeout": None,
        "ocr_neighbours": None,
        "ocr_psm": None
    },
    {
        "score": 0.514,
        "phash_threshold": 0.3,
        "title_threshold": 0.5,
        "image_threshold": 0.001,
        "image_pretrained": "enb0",
        "text_threshold": None,
        "ocr_threshold": None,
        "ocr_timeout": None,
        "ocr_neighbours": None,
        "ocr_psm": None
    },
    {
        "score": 0.498,
        "phash_threshold": 0.3,
        "title_threshold": 0.5,
        "image_threshold": 0.01,
        "image_pretrained": "enb0",
        "text_threshold": None,
        "ocr_threshold": None,
        "ocr_timeout": None,
        "ocr_neighbours": None,
        "ocr_psm": None
    },
    {
        "score": 0.136,
        "phash_threshold": 0.3,
        "title_threshold": 0.5,
        "image_threshold": 0.05,
        "image_pretrained": "enb0",
        "text_threshold": None,
        "ocr_threshold": None,
        "ocr_timeout": None,
        "ocr_neighbours": None,
        "ocr_psm": None
    },
    {
        "score": 0.674,
        "phash_threshold": 0.3,
        "title_threshold": 0.5,
        "text_threshold": 0.5,
        "image_threshold": None,
        "image_pretrained": None,
        "ocr_threshold": "inverted",
        "ocr_timeout": 0.4,
        "ocr_neighbours": 41,
        "ocr_psm": 11
    },
    {
        "score": 0.674,
        "phash_threshold": 0.3,
        "title_threshold": 0.5,
        "text_threshold": 0.5,
        "image_threshold": None,
        "image_pretrained": None,
        "ocr_threshold": "binary",
        "ocr_timeout": 0.4,
        "ocr_neighbours": 41,
        "ocr_psm": 11
    },
    {
        "score": 0.674,
        "phash_threshold": 0.3,
        "title_threshold": 0.5,
        "image_threshold": None,
        "image_pretrained": None,
        "text_threshold": None,
        "ocr_threshold": None,
        "ocr_timeout": None,
        "ocr_neighbours": None,
        "ocr_psm": None
    }
]
df = pd.DataFrame.from_records(res)
df.sort_values("score", ascending=False, inplace=True, ignore_index=True)
df.T.head(30)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18
score,0.674,0.674,0.674,0.658,0.656,0.656,0.654,0.654,0.654,0.654,0.651,0.645,0.562,0.522,0.514,0.502,0.498,0.473,0.136
phash_threshold,0.3,0.3,0.3,0.3,0.3,0.3,0.3,0.3,0.3,0.2,0.2,0.3,0.3,,0.3,,0.3,,0.3
title_threshold,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,,0.5,,0.5,,0.5
image_threshold,,,,0.00001,0.005,0.0001,0.00001,0.000001,0.0001,0.00001,0.0001,0.001,0.001,0.005,0.001,0.001,0.01,0.01,0.05
image_pretrained,,,,enb3,enb3,enb3,enb3,enb3,enb3,enb3,enb3,enb3,enb3,enb3,enb0,enb3,enb0,enb3,enb0
brand_theshold,,,,,,,0.3,0.3,0.3,,,0.3,,,,,,,
measurement_threshold,,,,,,,0.3,0.3,0.3,,,0.3,,,,,,,
text_threshold,,0.5,0.5,,,,,,,,,,,,,,,,
ocr_threshold,,binary,inverted,,,,,,,,,,,,,,,,
ocr_timeout,,0.4,0.4,,,,,,,,,,,,,,,,


In [39]:
cols = ["f1", "target", "matches"] + fs
train[cols].head(30)

Unnamed: 0,f1,target,matches,image_matches,title_matches,phash_matches
0,1.0,"[train_129225211, train_2278313361]","[train_129225211, train_2278313361]",[],[train_2278313361],[]
1,1.0,"[train_3386243561, train_3423213080]","[train_3423213080, train_3386243561]",[],[train_3423213080],[]
2,1.0,"[train_2288590299, train_3803689425]","[train_2288590299, train_3803689425]",[],[train_3803689425],[]
3,0.333333,"[train_2406599165, train_3342059966]","[train_2406599165, train_3576714541, train_1744956981, train_3526771004]",[],"[train_1744956981, train_3576714541, train_3526771004]",[]
4,1.0,"[train_3369186413, train_921438619]","[train_921438619, train_3369186413]",[],[train_921438619],[]
5,0.5,"[train_2464356923, train_2753295474, train_305884580]",[train_2464356923],[],[],[train_2753295474]
6,0.444444,"[train_1802986387, train_1396161074, train_713073906, train_1275191373, train_2490201622, train_2411544001, train_1859060005]","[train_1396161074, train_1802986387]",[],[train_1396161074],[]
7,0.666667,"[train_1806152124, train_3227306976]",[train_1806152124],[],[],[]
8,0.5,"[train_86570404, train_2837452969, train_77364776]",[train_86570404],[],[],[]
9,0.666667,"[train_831680791, train_3031035861]",[train_831680791],[],[],[]


In [40]:
df = train.sort_values("f1", ascending=True, ignore_index=True)
df[cols].head()

Unnamed: 0,f1,target,matches,image_matches,title_matches,phash_matches
0,0.026667,"[train_2743440279, train_377301035, train_3156194756, train_2319893226, train_1498884418, train_3872555759, train_983071994, train_285976786, train_481517549, train_3781067628, train_2760540023, train_1675156782, train_3721412582, train_2251689922, train_4036033948, train_2122350750, train_789986060, train_2919110849, train_538463136, train_321020794, train_2632984003, train_241440525, train_4285122625, train_98619170, train_673681421]","[train_2815610753, train_3770397438, train_2672786546, train_493482917, train_194536717, train_3189871060, train_4284769420, train_2047191940, train_1040521384, train_48285579, train_4152112601, train_969367345, train_3231189905, train_3100676375, train_2680308038, train_2743440279, train_1985214769, train_445817771, train_1843989717, train_3949833931, train_3162954286, train_1757679059, train_1909100939, train_1890023758, train_1461409280, train_2172282321, train_4024729812, train_3986138123, train_932644594, train_3730694224, train_2759622847, train_2883389512, train_3500331962, train_1161311757, train_3091874860, train_1315934554, train_2540368161, train_3874667288, train_1824611732, train_1164484477, train_3011723573, train_1037865617, train_2518862130, train_1434682905, train_4112263956, train_366219921, train_4149928554, train_2997354311, train_3684396189, train_3764873619]","[train_2759622847, train_969367345, train_4024729812, train_1985214769, train_3500331962, train_1843989717, train_4149928554, train_3100676375, train_3874667288, train_2672786546, train_3011723573, train_2815610753, train_3189871060, train_1037865617, train_3770397438, train_48285579, train_4284769420, train_1757679059, train_2540368161, train_1434682905, train_445817771, train_1040521384, train_2680308038, train_1161311757, train_493482917, train_4112263956, train_2997354311, train_2883389512, train_1890023758, train_1824611732, train_3091874860, train_1909100939, train_3986138123, train_932644594, train_3949833931, train_366219921, train_3764873619, train_3231189905, train_3162954286, train_2518862130, train_4152112601, train_3684396189, train_1164484477, train_1461409280, train_2172282321, train_1315934554, train_2047191940, train_194536717, train_3730694224]",[],[]
1,0.027397,"[train_4148281123, train_2109174539, train_283631508, train_4294325542, train_2856557965, train_1112045438, train_2582841861, train_4189323816, train_3639024230, train_59789283, train_4202230454, train_280888942, train_1578471936, train_1026809962, train_1255441640]","[train_2608772223, train_2713478960, train_4064455035, train_488600623, train_1157207566, train_4162431143, train_2655218618, train_4110457830, train_3417411762, train_2986571983, train_3877179943, train_2526779027, train_1586609700, train_2490728331, train_1205995076, train_2081102137, train_4171932335, train_2055595206, train_396434532, train_949854060, train_1724760722, train_2840010156, train_2568497922, train_4032813586, train_1662537134, train_3918597348, train_2044493665, train_4294325542, train_20723777, train_424048545, train_3175853833, train_3333914170, train_2796548237, train_1478931043, train_1404157115, train_3509671394, train_677908857, train_548892835, train_3692728330, train_1643562750, train_82243744, train_888082882, train_1160450240, train_3374438773, train_707196409, train_1337901649, train_1701305917, train_1090204965, train_808634018, train_2438146037, train_1949346618, train_979063662, train_2460467218, train_2215208747, train_89647500, train_88994643, train_880215134, train_4224487314]","[train_880215134, train_2490728331, train_488600623, train_1662537134, train_4110457830, train_3333914170, train_2568497922, train_1478931043, train_3417411762, train_2055595206, train_1160450240, train_2713478960, train_2215208747, train_396434532, train_2044493665, train_2986571983, train_949854060, train_424048545, train_2526779027, train_2438146037, train_2840010156, train_1586609700, train_2081102137, train_707196409, train_2796548237, train_979063662, train_1404157115, train_4171932335, train_2655218618, train_1205995076, train_4162431143, train_20723777, train_1949346618, train_3175853833, train_4224487314, train_1337901649, train_4064455035]",[train_3877179943],"[train_2460467218, train_888082882, train_1643562750, train_3509671394, train_1724760722, train_88994643, train_3374438773, train_1090204965, train_1701305917, train_3692728330, train_548892835, train_1157207566, train_2608772223, train_808634018, train_89647500, train_677908857, train_4032813586, train_3918597348, train_82243744]"
2,0.027778,"[train_3883860001, train_3330610755]","[train_1673464025, train_4014822321, train_4203244639, train_3811570286, train_3975724135, train_3330610755, train_2470897215, train_274322148, train_1165116650, train_493255960, train_3414954481, train_1783496192, train_3114280436, train_4154363656, train_1048853262, train_3857398510, train_4162349436, train_4057450652, train_1724760722, train_3652618812, train_1880996073, train_1026142696, train_1983528486, train_1455468961, train_4032813586, train_2072099738, train_3320065150, train_545577069, train_4269751718, train_3509671394, train_754099419, train_677908857, train_2034763060, train_3816837053, train_548892835, train_901288632, train_3032730238, train_1580750435, train_2481279023, train_3426503436, train_1299268426, train_1660747540, train_55159423, train_1643562750, train_3374438773, train_3630484623, train_2789795007, train_1701305917, train_548982238, train_2493407517, train_2606026595, train_1971526036, train_1601385496, train_622757487, train_1482993046, train_1646573458, train_383219687, train_3145079693, train_3835291007, train_1283623292, train_3677721055, train_928541043, train_2128418514, train_2051118753, train_285828586, train_89647500, train_3908367902, train_3240884969, train_1090204965, train_3074559172]","[train_901288632, train_285828586, train_4154363656, train_1026142696, train_1299268426, train_3240884969, train_3677721055, train_2051118753, train_493255960, train_3908367902, train_3652618812, train_3811570286, train_622757487, train_3630484623, train_1783496192, train_2072099738, train_3032730238, train_3816837053, train_4014822321, train_2493407517, train_1482993046, train_3975724135, train_3114280436, train_3857398510, train_2481279023, train_1048853262, train_1455468961, train_545577069, train_4057450652, train_1880996073, train_754099419, train_3426503436, train_1983528486, train_1580750435, train_3074559172, train_2128418514, train_274322148, train_3414954481, train_2789795007, train_1971526036, train_2606026595, train_3835291007, train_1673464025, train_1646573458, train_3145079693, train_4162349436, train_1283623292, train_1660747540, train_55159423]",[train_3320065150],"[train_3374438773, train_677908857, train_1724760722, train_1601385496, train_2034763060, train_2470897215, train_1701305917, train_548892835, train_4032813586, train_383219687, train_89647500, train_4203244639, train_3509671394, train_1090204965, train_548982238, train_4269751718, train_1643562750, train_928541043, train_1165116650]"
3,0.028986,"[train_3730238611, train_3972088306, train_2302471727, train_1951510014, train_3792816944, train_1724394898, train_1201541529, train_1839693367, train_3569100320, train_732582216, train_3355832936, train_3029410027, train_2594763545, train_829141579, train_495638834, train_975561537, train_1110783679, train_2422727042]","[train_1886476415, train_4153223724, train_493482917, train_2924796930, train_194536717, train_1258915345, train_2597285700, train_1456539795, train_4152112601, train_1002628427, train_1472649548, train_4128551344, train_2361997442, train_745074374, train_2783074831, train_4032813586, train_3942479788, train_4231509941, train_1255733781, train_1461409280, train_932644594, train_1890023758, train_3955123018, train_1702027645, train_3516250866, train_3509671394, train_677908857, train_413612040, train_2573060829, train_3569100320, train_3919702208, train_378911549, train_1161311757, train_353691984, train_1701305917, train_3081508651, train_2849895384, train_2620814242, train_1824611732, train_2452897945, train_1412085886, train_3079449343, train_1072756022, train_1435161567, train_558827082, train_768264269, train_3234392334, train_1713565479, train_422235145, train_2942491561, train_239849978]","[train_2783074831, train_1002628427, train_2620814242, train_1886476415, train_745074374, train_2573060829, train_3942479788, train_4231509941, train_2361997442, train_1435161567, train_768264269, train_932644594, train_378911549, train_239849978, train_3081508651, train_1472649548, train_3234392334, train_1824611732, train_422235145, train_353691984, train_1456539795, train_4032813586, train_677908857, train_2452897945, train_3509671394, train_1701305917, train_1255733781, train_2849895384, train_493482917, train_1072756022, train_1461409280, train_194536717, train_3516250866, train_1412085886, train_4152112601, train_558827082, train_3079449343, train_1258915345, train_1890023758, train_3919702208, train_2942491561, train_3955123018, train_1713565479, train_1161311757, train_2597285700, train_1702027645, train_4153223724, train_2924796930, train_4128551344]",[],[train_413612040]
4,0.032258,"[train_908687404, train_1963882714, train_2485628928, train_1255622678, train_540475989, train_1714924923, train_510683765, train_4145091985, train_1632311532, train_3575257305, train_4111091628, train_1086014309]","[train_1878848141, train_4014822321, train_4146813717, train_1653596628, train_1378164451, train_430222341, train_2534415584, train_2514556108, train_1753496181, train_3916960828, train_1822237971, train_869737991, train_3795529388, train_119103124, train_3334114492, train_1445525920, train_1983528486, train_1829436365, train_2072099738, train_3528783070, train_545577069, train_1041715554, train_3459074757, train_3821081767, train_63196596, train_3147258181, train_1643520212, train_3904614457, train_275292465, train_2853695827, train_2630784639, train_3273040224, train_2048556998, train_1034182272, train_1898686038, train_3191385878, train_149240701, train_1482993046, train_1646573458, train_2006253338, train_3835291007, train_2188846652, train_3677721055, train_44709323, train_3074559172, train_839534994, train_1714924923, train_1952616317, train_4151017426, train_3087233573]","[train_1653596628, train_3334114492, train_3916960828, train_1898686038, train_3273040224, train_3087233573, train_1878848141, train_1753496181, train_2048556998, train_3147258181, train_3191385878, train_4146813717, train_1822237971, train_2514556108, train_545577069, train_4151017426, train_1829436365, train_63196596, train_2630784639, train_839534994, train_1643520212, train_149240701, train_119103124, train_3795529388, train_3821081767, train_1952616317, train_3528783070, train_430222341, train_2072099738, train_1041715554, train_2006253338, train_3459074757, train_2188846652, train_1482993046, train_2853695827, train_1445525920, train_869737991, train_3677721055, train_2534415584, train_1378164451, train_1034182272, train_3904614457, train_44709323, train_4014822321, train_3074559172, train_1983528486, train_1646573458, train_3835291007, train_275292465]",[],[]
