In [5]:
import os, sys
import numpy as np
import pandas as pd

module_path = os.path.abspath(os.path.join('.'))
if module_path not in sys.path:
    sys.path.append(module_path)
    
from dataset import *

In [18]:
dataset_small = {
    'deepmatcher_structured_amazon_google': deepmatcher_structured_amazon_google,
    'deepmatcher_structured_dblp_acm': deepmatcher_structured_dblp_acm,
    'deepmatcher_structured_dblp_google_scholar': deepmatcher_structured_dblp_google_scholar,
    'deepmatcher_textual_abt_buy':deepmatcher_textual_abt_buy,
    'deepmatcher_structured_walmart_amazon': deepmatcher_structured_walmart_amazon,
    'deepmatcher_structured_itunes_amazon': deepmatcher_structured_itunes_amazon,
}

start = """
\\begin{table}[h]
    \\centering
    \\begin{tabular}{l|c|c|c|c} 
    \\hline 
        \\textbf{Dataset} & \\begin{tabular}[c]{@{}l@{}}\\textbf{Train}\\\ {[}Pos/Tot{]}\\end{tabular} & \\begin{tabular}[c]{@{}l@{}}\\textbf{Test}\\\ {[}Pos/Tot{]}\\end{tabular} & \\begin{tabular}[c]{@{}l@{}}\\textbf{Val}\\\ {[}Pos/Tot{]}\\end{tabular} & \\begin{tabular}[c]{@{}l@{}}\\textbf{Total}\\\ {[}Pos/Tot{]}\\end{tabular} \\\ 
    \\hline
"""
end = """
    \\hline 
    \\end{tabular} 
    \\caption{Number of positive examples over total examples in the dataset-splits.} 
    \\label{tab:pos_rate} 
\end{table}"""

In [19]:
%%capture
table = ''
for dataset in dataset_small:
    d = dataset_small[dataset]()
    d.load()
    train, test, val = d.matches_train, d.matches_test, d.matches_val
    train_1, test_1, val_1 = train[train.matching==True], test[test.matching==True], val[val.matching==True]

    table += (f'{d.name.split("/")[-1]} & '
    f'{len(train_1)}/{len(train)} & '
    f'{len(test_1)}/{len(test)} & '
    f'{len(val_1)}/{len(val)} & '
    f'{len(train_1) + len(test_1) + len(val_1)}/{len(train) + len(test) + len(val)}\\\ \hline \n')

In [20]:
print(start+table+end)


\begin{table}[h]
    \centering
    \begin{tabular}{l|c|c|c|c} 
    \hline 
        \textbf{Dataset} & \begin{tabular}[c]{@{}l@{}}\textbf{Train}\\ {[}Pos/Tot{]}\end{tabular} & \begin{tabular}[c]{@{}l@{}}\textbf{Test}\\ {[}Pos/Tot{]}\end{tabular} & \begin{tabular}[c]{@{}l@{}}\textbf{Val}\\ {[}Pos/Tot{]}\end{tabular} & \begin{tabular}[c]{@{}l@{}}\textbf{Total}\\ {[}Pos/Tot{]}\end{tabular} \\ 
    \hline
Amazon-Google & 699/6874 & 234/2293 & 234/2293 & 1167/11460\\ \hline 
DBLP-ACM & 1332/7417 & 444/2473 & 444/2473 & 2220/12363\\ \hline 
DBLP-GoogleScholar & 3207/17223 & 1070/5742 & 1070/5742 & 5347/28707\\ \hline 
Abt-Buy & 616/5743 & 206/1916 & 206/1916 & 1028/9575\\ \hline 
Walmart-Amazon & 576/6144 & 193/2049 & 193/2049 & 962/10242\\ \hline 
iTunes-Amazon & 78/321 & 27/109 & 27/109 & 132/539\\ \hline 

    \hline 
    \end{tabular} 
    \caption{Number of positive examples over total examples in the dataset-splits.} 
    \label{tab:pos_rate} 
\end{table}
