In [1]:
import os
import math
import configparser
import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors
import tensorflow as tf
from tensorflow import keras
import py
import mylib
import cv2 as cv
import pytesseract
from tqdm import tqdm
from typing import Optional, List, Dict, Set, Tuple
from scml.nlp import strip_punctuation, to_ascii_str

In [102]:
IMAGE = True
TITLE = True
PHASH = True
OCR = False
MODEL = 'efficientnetb3'
pd.set_option("use_inf_as_na", True)
pd.set_option("display.max_columns", 9999)
pd.set_option("display.max_rows", 9999)
pd.set_option('max_colwidth', 9999)
#os.environ["OMP_THREAD_LIMIT"] = "1"
CONF = configparser.ConfigParser()
CONF.read("app.ini")
resolution = int(CONF[MODEL]["resolution"])
print(f"resolution={resolution}")

resolution=300


In [50]:
train = pd.read_csv("input/train.csv", engine="c", low_memory=False)
train["target"] = mylib.target_label(train)
train["image_path"] = "input/train_images/" + train["image"]
posting_ids = train["posting_id"].tolist()
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 34250 entries, 0 to 34249
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   posting_id   34250 non-null  object
 1   image        34250 non-null  object
 2   image_phash  34250 non-null  object
 3   title        34250 non-null  object
 4   label_group  34250 non-null  int64 
 5   target       34250 non-null  object
 6   image_path   34250 non-null  object
dtypes: int64(1), object(6)
memory usage: 1.8+ MB


In [52]:
%%time
# required for post-processing
train["title_p"] = train.apply(mylib.preprocess("title"), axis=1)

CPU times: user 33.2 s, sys: 31.2 ms, total: 33.3 s
Wall time: 33.3 s


In [53]:
imap = {}
for t in tqdm(train.itertuples()):
    pid = getattr(t, "posting_id")
    title = getattr(t, "title_p")
    imap[pid] = mylib.extract(title)

34250it [02:01, 281.74it/s]


# PHash
th=.25, f1=.586 | th=.30, f1=.586 | th=.35, f1=.587 | th=.40, f1=.583

In [104]:
%%time
if PHASH:
    train["phash_matches"] = mylib.phash_matches(train, threshold=0.3)

CPU times: user 28.2 s, sys: 15.1 s, total: 43.4 s
Wall time: 43.5 s


# Title

In [105]:
%%time
if TITLE:
    st_name = "stsb-distilbert-base"
    #st_name = "paraphrase-distilroberta-base-v1"
    #st_name = "paraphrase-xlm-r-multilingual-v1"
    train["title_matches"] = mylib.sbert_matches(
        model_path=f"pretrained/sentence-transformers/{st_name}",
        sentences=train["title_p"].tolist(),
        posting_ids=posting_ids,
        threshold=0.5
    )

CPU times: user 26min 27s, sys: 1min 19s, total: 27min 47s
Wall time: 5min 36s


# Image 

In [4]:
if IMAGE:
    model_dir = "models/eb3_arc_20210509_0000"
    m0 = keras.models.load_model(f"{model_dir}/trial_0/model.h5")
    m0 = keras.models.Model(inputs=m0.input[0], outputs=m0.get_layer("embedding_output").output)
    m0.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
image_input (InputLayer)     [(None, 300, 300, 3)]     0         
_________________________________________________________________
efficientnetb3 (Functional)  (None, 1536)              10783535  
_________________________________________________________________
layer_normalization_1 (Layer (None, 1536)              3072      
_________________________________________________________________
dense_1 (Dense)              (None, 1536)              2360832   
_________________________________________________________________
embedding_output (LayerNorma (None, 1536)              3072      
Total params: 13,150,511
Trainable params: 2,366,976
Non-trainable params: 10,783,535
_________________________________________________________________


In [5]:
if IMAGE:
    idg = keras.preprocessing.image.ImageDataGenerator(
        rescale=1./255,
        data_format="channels_last",
        dtype=np.float32
    )
    data = idg.flow_from_dataframe(
        dataframe=train,
        x_col="image",
        y_col="label_group",
        directory="input/train_images",
        target_size=(resolution, resolution),
        color_mode="rgb",
        batch_size=8000,
        shuffle=False,
        class_mode="raw",
        interpolation="nearest",
    )
    y0 = m0.predict(data, verbose=1)
    #y1 = m1.predict(data, verbose=1)
    #y2 = m2.predict(data, verbose=1)
    #y3 = m3.predict(data, verbose=1)
    #y4 = m4.predict(data, verbose=1)
    #assert y0.shape == y1.shape == y2.shape == y3.shape == y4.shape
    #print(f"y0.shape={y0.shape}")
    em = y0.astype(np.float32)
    print(f"em.shape={em.shape}")

Found 34250 validated image filenames.
y0.shape=(34250, 1536)


In [6]:
#res = []
#for i in range(len(y0)):
    #a = np.vstack((y0[i], y1[i], y2[i], y3[i], y4[i]))
    #a = np.vstack((y0[i], y1[i]))
    #m = np.mean(a, axis=0)
    #res.append(m)
#em = np.array(res, dtype=np.float32)
#assert y0.shape == em.shape
#print(f"em.shape={em.shape}")

In [103]:
%%time
if IMAGE:
    threshold = 5e-3
    nn = NearestNeighbors(
        n_neighbors=min(49, len(posting_ids) - 1), metric="euclidean", n_jobs=-1
    )
    nn.fit(em)
    distances, indices = nn.kneighbors()
    res: List[List[str]] = [[] for _ in range(len(indices))]
    for i in range(len(indices)):
        for j in range(len(indices[0])):
            if distances[i][j] > threshold:
                break
            res[i].append(posting_ids[indices[i][j]])
    train["image_matches"] = res

CPU times: user 2min 29s, sys: 27.1 s, total: 2min 56s
Wall time: 56.6 s


# OCR

In [106]:
def erode_dilate(img):
    kernel = np.ones((2, 2), np.uint8)
    img = cv.erode(img, kernel, iterations=1)
    img = cv.dilate(img, kernel, iterations=1)
    return img


def image_to_text(img_path, mode: str, timeout: float, neighbours: int=41, psm: int=3) -> Optional[str]:
    config = f"--psm {psm}"
    s1, s2 = None, None
    img = cv.imread(img_path, cv.IMREAD_GRAYSCALE)
    #img = cv.resize(img, None, fx=0.5, fy=0.5, interpolation=cv.INTER_AREA)
    img = cv.medianBlur(img, 3)
    if mode == "binary_inverted" or mode == "binary":
        th = cv.adaptiveThreshold(img, 255, cv.ADAPTIVE_THRESH_GAUSSIAN_C, cv.THRESH_BINARY, neighbours, 2)
        th = erode_dilate(th)
        try:
            s1 = pytesseract.image_to_string(th, timeout=timeout, config=config)
        except:
            s1 = None
    if mode == "binary_inverted" or mode == "inverted":
        th = cv.adaptiveThreshold(img, 255, cv.ADAPTIVE_THRESH_GAUSSIAN_C, cv.THRESH_BINARY_INV, neighbours, 2)
        th = erode_dilate(th)
        try:
            s2 = pytesseract.image_to_string(th, timeout=timeout, config=config)
        except:
            s2 = None
    if s1 is None and s2 is None:
        return None
    tokens = []
    if s1 is not None:
        s1 = to_ascii_str(s1)
        s1 = strip_punctuation(s1)
        tokens += s1.split()
    if s2 is not None:
        s2 = to_ascii_str(s2)
        s2 = strip_punctuation(s2)
        tokens += s2.split()
    return " ".join(tokens)

In [107]:
if OCR:
    res = []
    n_timeout = 0
    for t in tqdm(train.itertuples()):
        img_path = getattr(t, "image_path")
        s = image_to_text(img_path, mode="inverted", timeout=0.4, neighbours=41, psm=11)
        if s is None:
            s = ""
            n_timeout += 1
        res.append(s)
    print(f"n_timeout={n_timeout}")

In [108]:
if OCR:
    train["itext"] = res
    train["text"] = train["title"] + " " + train["itext"]
    cols = ["text", "itext", "title"]
    train[cols].head()

In [109]:
%%time
if OCR:
    train["text_p"] = train.apply(mylib.preprocess("text"), axis=1)

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 11 µs


In [110]:
if OCR:
    st_name = "stsb-distilbert-base"
    #st_name = "paraphrase-distilroberta-base-v1"
    #st_name = "paraphrase-xlm-r-multilingual-v1"
    train["text_matches"] = mylib.sbert_matches(
        model_path=f"pretrained/sentence-transformers/{st_name}",
        sentences=train["text_p"].tolist(),
        posting_ids=posting_ids,
        threshold=0.5
    )

# Result

In [111]:
fs = []
if IMAGE:
    fs.append("image_matches")
if TITLE:
    fs.append("title_matches")
if PHASH:
    fs.append("phash_matches")
if OCR:
    fs.append("text_matches")
train["matches"] = train.apply(mylib.combine_as_list(
    fs,
    imap=imap,
    brand_threshold=0.0001,
), axis=1)
train["f1"] = train.apply(mylib.metric_per_row("matches"), axis=1)
print(f"Combined score={train.f1.mean():.3f}")

Combined score=0.656


In [115]:
res = [
    {
        "score": 0.656,
        "phash_threshold": 0.3,
        "title_threshold": 0.5,
        "image_threshold": 5e-3,
        "image_pretrained": "enb3",
        "text_threshold": None,
        "ocr_threshold": None,
        "ocr_timeout": None,
        "ocr_neighbours": None,
        "ocr_psm": None
    },
    {
        "score": 0.522,
        "phash_threshold": None,
        "title_threshold": None,
        "image_threshold": 5e-3,
        "image_pretrained": "enb3",
        "text_threshold": None,
        "ocr_threshold": None,
        "ocr_timeout": None,
        "ocr_neighbours": None,
        "ocr_psm": None
    },
    {
        "score": 0.473,
        "phash_threshold": None,
        "title_threshold": None,
        "image_threshold": 0.01,
        "image_pretrained": "enb3",
        "text_threshold": None,
        "ocr_threshold": None,
        "ocr_timeout": None,
        "ocr_neighbours": None,
        "ocr_psm": None
    },
    {
        "score": 0.502,
        "phash_threshold": None,
        "title_threshold": None,
        "image_threshold": 1e-3,
        "image_pretrained": "enb3",
        "text_threshold": None,
        "ocr_threshold": None,
        "ocr_timeout": None,
        "ocr_neighbours": None,
        "ocr_psm": None
    },
    {
        "score": 0.651,
        "phash_threshold": 0.2,
        "title_threshold": 0.5,
        "image_threshold": 1e-4,
        "image_pretrained": "enb3",
        "text_threshold": None,
        "ocr_threshold": None,
        "ocr_timeout": None,
        "ocr_neighbours": None,
        "ocr_psm": None
    },
    {
        "score": 0.654,
        "phash_threshold": 0.2,
        "title_threshold": 0.5,
        "image_threshold": 1e-5,
        "image_pretrained": "enb3",
        "text_threshold": None,
        "ocr_threshold": None,
        "ocr_timeout": None,
        "ocr_neighbours": None,
        "ocr_psm": None
    },
    {
        "score": 0.658,
        "phash_threshold": 0.3,
        "title_threshold": 0.5,
        "image_threshold": 1e-5,
        "image_pretrained": "enb3",
        "text_threshold": None,
        "ocr_threshold": None,
        "ocr_timeout": None,
        "ocr_neighbours": None,
        "ocr_psm": None
    },
    {
        "score": 0.656,
        "phash_threshold": 0.3,
        "title_threshold": 0.5,
        "image_threshold": 1e-4,
        "image_pretrained": "enb3",
        "text_threshold": None,
        "ocr_threshold": None,
        "ocr_timeout": None,
        "ocr_neighbours": None,
        "ocr_psm": None
    },
    {
        "score": 0.562,
        "phash_threshold": 0.3,
        "title_threshold": 0.5,
        "image_threshold": 0.001,
        "image_pretrained": "enb3",
        "text_threshold": None,
        "ocr_threshold": None,
        "ocr_timeout": None,
        "ocr_neighbours": None,
        "ocr_psm": None
    },
    {
        "score": 0.514,
        "phash_threshold": 0.3,
        "title_threshold": 0.5,
        "image_threshold": 0.001,
        "image_pretrained": "enb0",
        "text_threshold": None,
        "ocr_threshold": None,
        "ocr_timeout": None,
        "ocr_neighbours": None,
        "ocr_psm": None
    },
    {
        "score": 0.498,
        "phash_threshold": 0.3,
        "title_threshold": 0.5,
        "image_threshold": 0.01,
        "image_pretrained": "enb0",
        "text_threshold": None,
        "ocr_threshold": None,
        "ocr_timeout": None,
        "ocr_neighbours": None,
        "ocr_psm": None
    },
    {
        "score": 0.136,
        "phash_threshold": 0.3,
        "title_threshold": 0.5,
        "image_threshold": 0.05,
        "image_pretrained": "enb0",
        "text_threshold": None,
        "ocr_threshold": None,
        "ocr_timeout": None,
        "ocr_neighbours": None,
        "ocr_psm": None
    },
    {
        "score": 0.674,
        "phash_threshold": 0.3,
        "title_threshold": 0.5,
        "text_threshold": 0.5,
        "image_threshold": None,
        "image_pretrained": None,
        "ocr_threshold": "inverted",
        "ocr_timeout": 0.4,
        "ocr_neighbours": 41,
        "ocr_psm": 11
    },
    {
        "score": 0.674,
        "phash_threshold": 0.3,
        "title_threshold": 0.5,
        "text_threshold": 0.5,
        "image_threshold": None,
        "image_pretrained": None,
        "ocr_threshold": "binary",
        "ocr_timeout": 0.4,
        "ocr_neighbours": 41,
        "ocr_psm": 11
    },
    {
        "score": 0.674,
        "phash_threshold": 0.3,
        "title_threshold": 0.5,
        "image_threshold": None,
        "image_pretrained": None,
        "text_threshold": None,
        "ocr_threshold": None,
        "ocr_timeout": None,
        "ocr_neighbours": None,
        "ocr_psm": None
    }
]
df = pd.DataFrame.from_records(res)
df.sort_values("score", ascending=False, inplace=True, ignore_index=True)
df.T.head(30)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
score,0.674,0.674,0.674,0.658,0.656,0.656,0.654,0.651,0.562,0.522,0.514,0.502,0.498,0.473,0.136
phash_threshold,0.3,0.3,0.3,0.3,0.3,0.3,0.2,0.2,0.3,,0.3,,0.3,,0.3
title_threshold,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,,0.5,,0.5,,0.5
image_threshold,,,,0.00001,0.005,0.0001,0.00001,0.0001,0.001,0.005,0.001,0.001,0.01,0.01,0.05
image_pretrained,,,,enb3,enb3,enb3,enb3,enb3,enb3,enb3,enb0,enb3,enb0,enb3,enb0
text_threshold,0.5,0.5,,,,,,,,,,,,,
ocr_threshold,inverted,binary,,,,,,,,,,,,,
ocr_timeout,0.4,0.4,,,,,,,,,,,,,
ocr_neighbours,41.0,41.0,,,,,,,,,,,,,
ocr_psm,11.0,11.0,,,,,,,,,,,,,


In [113]:
cols = ["f1", "target", "matches"] + fs
train[cols].head(30)

Unnamed: 0,f1,target,matches,image_matches,title_matches,phash_matches
0,1.0,"[train_129225211, train_2278313361]","[train_129225211, train_2278313361]",[],[train_2278313361],[]
1,1.0,"[train_3386243561, train_3423213080]","[train_3386243561, train_3423213080]",[],[train_3423213080],[]
2,1.0,"[train_2288590299, train_3803689425]","[train_3803689425, train_2288590299]",[],[train_3803689425],[]
3,0.333333,"[train_2406599165, train_3342059966]","[train_1744956981, train_2406599165, train_3526771004, train_3576714541]",[],"[train_1744956981, train_3576714541, train_3526771004]",[]
4,1.0,"[train_3369186413, train_921438619]","[train_921438619, train_3369186413]",[],[train_921438619],[]
5,0.8,"[train_2464356923, train_2753295474, train_305884580]","[train_2464356923, train_2753295474]",[],[],[train_2753295474]
6,0.444444,"[train_1802986387, train_1396161074, train_713073906, train_1275191373, train_2490201622, train_2411544001, train_1859060005]","[train_1802986387, train_1396161074]",[],[train_1396161074],[]
7,0.666667,"[train_1806152124, train_3227306976]",[train_1806152124],[],[],[]
8,0.5,"[train_86570404, train_2837452969, train_77364776]",[train_86570404],[],[],[]
9,0.666667,"[train_831680791, train_3031035861]",[train_831680791],[],[],[]


In [114]:
df = train.sort_values("f1", ascending=True, ignore_index=True)
df[cols].head()

Unnamed: 0,f1,target,matches,image_matches,title_matches,phash_matches
0,0.024691,"[train_653606694, train_3694367120, train_1676438253, train_723113580, train_718395879, train_3839423258, train_2983368523, train_527397407, train_2599912967, train_1525971777]","[train_3529044589, train_2237957231, train_3146856007, train_3712296585, train_4231654590, train_783949322, train_1203349175, train_1029471304, train_563561141, train_946746566, train_3215748240, train_3742101210, train_2051118753, train_784255673, train_752725135, train_3942479788, train_2756143895, train_1248209230, train_655833377, train_415825584, train_2080010196, train_288413860, train_3123468195, train_547195405, train_2095256261, train_3642455607, train_954707076, train_209942350, train_2446942306, train_2200603553, train_2361997442, train_1073270861, train_3079456842, train_326059127, train_2692569829, train_1496458006, train_1676438253, train_1904352281, train_1807756594, train_4203591002, train_2900656427, train_4071624010, train_3926545112, train_1604307785, train_4227945312, train_1484961804, train_3186793774, train_1949346618, train_767102330, train_2585583630, train_2044493665, train_3998359010, train_2072099738, train_979063662, train_1950488887, train_3347240993, train_1506002495, train_1096240097, train_2334402973, train_3375267005, train_808634018, train_3011723573, train_3857398510, train_1461409280, train_1948840703, train_4064455035, train_839588818, train_639259659, train_285071767, train_2940792503, train_1751994187]","[train_4227945312, train_4071624010, train_3079456842, train_1807756594, train_2940792503, train_3712296585, train_2585583630, train_3926545112, train_285071767, train_3215748240, train_4203591002, train_3998359010, train_808634018, train_1751994187, train_1484961804, train_3529044589, train_3742101210, train_3347240993, train_547195405, train_3146856007, train_954707076, train_4231654590, train_946746566, train_2900656427, train_783949322, train_1948840703, train_1496458006, train_839588818, train_3375267005, train_3642455607, train_2692569829, train_979063662, train_326059127, train_3186793774, train_2756143895, train_1096240097, train_784255673, train_1949346618, train_4064455035, train_2334402973, train_752725135, train_655833377, train_1203349175, train_1904352281, train_2080010196, train_767102330, train_2237957231, train_1950488887, train_2200603553]",[],"[train_3079456842, train_3011723573, train_1248209230, train_1461409280, train_3857398510, train_2072099738, train_3942479788, train_639259659, train_288413860, train_2080010196, train_209942350, train_1029471304, train_1073270861, train_1506002495, train_2446942306, train_2361997442, train_563561141, train_2044493665, train_2095256261, train_1604307785, train_2051118753, train_415825584, train_3123468195]"
1,0.026316,"[train_653794355, train_2692569829, train_1258915345, train_2446942306]","[train_3529044589, train_3146856007, train_3712296585, train_4231654590, train_745074374, train_2081102137, train_946746566, train_3215748240, train_2036043800, train_3742101210, train_3942479788, train_2756143895, train_655833377, train_1833257672, train_2772640946, train_2080010196, train_2046266547, train_3224735141, train_547195405, train_2573060829, train_4153223724, train_954707076, train_2740619290, train_2215208747, train_2200603553, train_4128551344, train_3500331962, train_3079456842, train_2361997442, train_2692569829, train_3096365585, train_1482993046, train_2580172640, train_1676438253, train_2930414767, train_3334561888, train_1904352281, train_1807756594, train_2900656427, train_2717736891, train_4071624010, train_3926545112, train_2343771203, train_239849978, train_4227945312, train_1484961804, train_3147258181, train_1949346618, train_2585583630, train_3447753287, train_1143576193, train_328133185, train_3998359010, train_979063662, train_424048545, train_3347240993, train_422235145, train_2334402973, train_808634018, train_2713478960, train_949854060, train_3902313622, train_1948840703, train_4064455035, train_839588818, train_2047191940, train_285071767, train_378911549, train_1586609700, train_1642454560, train_2048556998, train_1751994187]","[train_2900656427, train_946746566, train_3529044589, train_4227945312, train_4071624010, train_954707076, train_1807756594, train_1676438253, train_4231654590, train_1949346618, train_2772640946, train_979063662, train_2081102137, train_3998359010, train_2585583630, train_4064455035, train_839588818, train_2215208747, train_1484961804, train_808634018, train_424048545, train_1751994187, train_655833377, train_2756143895, train_1586609700, train_3079456842, train_2713478960, train_3926545112, train_949854060, train_1143576193, train_3334561888, train_2080010196, train_1948840703, train_1904352281, train_2334402973, train_3215748240, train_328133185, train_3447753287, train_2046266547, train_2200603553, train_285071767, train_3224735141, train_3096365585, train_3347240993, train_3742101210, train_547195405, train_3146856007, train_3902313622, train_3712296585]",[],"[train_4153223724, train_2343771203, train_422235145, train_4128551344, train_3500331962, train_745074374, train_2573060829, train_2047191940, train_1833257672, train_3147258181, train_2361997442, train_378911549, train_1482993046, train_2036043800, train_2048556998, train_3942479788, train_2580172640, train_2930414767, train_2740619290, train_1642454560, train_239849978, train_2717736891]"
2,0.027027,"[train_3213892962, train_4113561720, train_3295787991, train_2633847312, train_3478645172, train_1862165351, train_878591984, train_975024728, train_2041221256, train_3931655891, train_1819136823, train_1833762547, train_3408111079, train_2114671117, train_4239246783, train_1648147422, train_2749665224, train_900993758, train_1874877324, train_3593794428, train_3591837251, train_3434132357, train_2032985524, train_2533634158]","[train_3730694224, train_1941027655, train_524624312, train_900993758, train_1757385143, train_295591376, train_4281317659, train_2573807077, train_2124123795, train_445128381, train_3734061671, train_187062043, train_1248209230, train_4269204505, train_3594148706, train_2518862130, train_3949833931, train_2928059404, train_3210516128, train_693293648, train_2737521623, train_2968757035, train_2996249881, train_810268867, train_1088209506, train_246633348, train_1158422043, train_1805853097, train_3771945381, train_503667939, train_2166321815, train_3325571658, train_1119934345, train_45599683, train_1021380081, train_1935811539, train_2004769517, train_3652054905, train_3632221444, train_2188932227, train_2762069909, train_2231906914, train_3300322305, train_2296885666, train_3114989385, train_4278577450, train_1941882643, train_2095066661, train_1898331644, train_388479397]","[train_3210516128, train_1941882643, train_503667939, train_4278577450, train_1119934345, train_4281317659, train_2762069909, train_2996249881, train_3734061671, train_3325571658, train_1935811539, train_1021380081, train_4269204505, train_3632221444, train_3594148706, train_3300322305, train_2124123795, train_3771945381, train_2968757035, train_187062043, train_1088209506, train_1248209230, train_810268867, train_246633348, train_3652054905, train_2737521623, train_45599683, train_2573807077, train_1805853097, train_388479397, train_1158422043, train_3114989385, train_295591376, train_1941027655, train_3949833931, train_1757385143, train_524624312, train_2231906914, train_2296885666, train_2166321815, train_693293648, train_2928059404, train_445128381, train_3730694224, train_2095066661, train_1898331644, train_2004769517, train_2188932227, train_2518862130]",[],[]
3,0.027027,"[train_3079456842, train_3459472836]","[train_3529044589, train_1029471304, train_3712296585, train_4231654590, train_1203349175, train_2081102137, train_563561141, train_946746566, train_3215748240, train_2051118753, train_784255673, train_504592404, train_3942479788, train_2756143895, train_1248209230, train_80072737, train_2772640946, train_415825584, train_288413860, train_2080010196, train_3123468195, train_2095256261, train_954707076, train_209942350, train_24385055, train_2446942306, train_2361997442, train_1073270861, train_3264535764, train_1764746826, train_3459621696, train_3079456842, train_2692569829, train_3096365585, train_1676438253, train_111376792, train_1807756594, train_4203591002, train_2928591493, train_2900656427, train_2500863221, train_4071624010, train_3926545112, train_3223148626, train_1604307785, train_4227945312, train_3186793774, train_3401246194, train_767102330, train_1484961804, train_2585583630, train_2052876872, train_2044493665, train_3998359010, train_2072099738, train_424048545, train_1950488887, train_1506002495, train_1096240097, train_2334402973, train_1186336765, train_2713478960, train_808634018, train_3011723573, train_3902313622, train_1948840703, train_3857398510, train_1461409280, train_639259659, train_285071767, train_2940792503, train_1751994187]","[train_3712296585, train_1676438253, train_4071624010, train_4203591002, train_285071767, train_3529044589, train_1807756594, train_3186793774, train_4227945312, train_111376792, train_3926545112, train_1751994187, train_1096240097, train_2713478960, train_2081102137, train_2928591493, train_2334402973, train_1948840703, train_784255673, train_3902313622, train_504592404, train_808634018, train_424048545, train_1186336765, train_3401246194, train_767102330, train_2585583630, train_3459621696, train_1764746826, train_2692569829, train_4231654590, train_24385055, train_3215748240, train_1950488887, train_954707076, train_1203349175, train_2940792503, train_2772640946, train_80072737, train_3223148626, train_3998359010, train_1484961804, train_946746566, train_2900656427, train_3096365585, train_2756143895, train_2052876872, train_2500863221, train_3264535764]",[],"[train_1676438253, train_3011723573, train_1248209230, train_1461409280, train_3857398510, train_2072099738, train_3942479788, train_639259659, train_288413860, train_2080010196, train_209942350, train_1029471304, train_1073270861, train_1506002495, train_2446942306, train_2361997442, train_563561141, train_2044493665, train_2095256261, train_1604307785, train_2051118753, train_415825584, train_3123468195]"
4,0.027778,"[train_3883860001, train_3330610755]","[train_1455468961, train_622757487, train_149240701, train_3145079693, train_493255960, train_754099419, train_2493407517, train_3414954481, train_1090204965, train_1299268426, train_1880996073, train_2051118753, train_2606026595, train_2470897215, train_3630484623, train_3908367902, train_3374438773, train_3835291007, train_89647500, train_444299226, train_3677721055, train_548982238, train_285828586, train_3320065150, train_2616510041, train_1601385496, train_4241764579, train_1482993046, train_1165116650, train_3878285093, train_4203244639, train_3074559172, train_2034763060, train_3509671394, train_2653319241, train_3007328370, train_4269751718, train_901288632, train_1283623292, train_1048853262, train_540840396, train_4057450652, train_383219687, train_3652618812, train_1646573458, train_1701305917, train_2072099738, train_63196596, train_4032813586, train_1983528486, train_3240884969, train_1843113760, train_3330610755, train_3412316361, train_1724760722, train_3975724135, train_1026142696, train_639259659, train_3114280436, train_3816837053, train_3032730238, train_928541043, train_1643562750, train_677908857, train_4154363656, train_548892835, train_4014822321, train_545577069, train_2789795007, train_3811570286]","[train_285828586, train_901288632, train_2493407517, train_3652618812, train_444299226, train_3816837053, train_3811570286, train_1299268426, train_4014822321, train_1482993046, train_2051118753, train_1455468961, train_3878285093, train_4154363656, train_1983528486, train_3630484623, train_493255960, train_1026142696, train_3032730238, train_1646573458, train_3835291007, train_3677721055, train_622757487, train_1048853262, train_2072099738, train_3414954481, train_2789795007, train_639259659, train_754099419, train_1880996073, train_4057450652, train_545577069, train_3908367902, train_2606026595, train_540840396, train_63196596, train_3114280436, train_3145079693, train_1843113760, train_3074559172, train_3240884969, train_3412316361, train_4241764579, train_149240701, train_3975724135, train_2653319241, train_3007328370, train_2616510041, train_1283623292]",[train_3320065150],"[train_3374438773, train_677908857, train_1724760722, train_1601385496, train_2034763060, train_2470897215, train_1701305917, train_548892835, train_4032813586, train_383219687, train_89647500, train_4203244639, train_3509671394, train_1090204965, train_548982238, train_4269751718, train_1643562750, train_928541043, train_1165116650]"
