In [None]:
import pandas as pd
import numpy as np

from collections import defaultdict
from datetime import datetime

In [None]:
dct = defaultdict(list)

with open('./data/triplets.tsv', 'r') as fh:
    for line in fh:
        sub, rel, obj, edat = line.rstrip('\n').split('\t')
        triple = (sub, rel, obj)
        dct[triple].append(datetime.strptime(edat, '%Y-%m-%d').date())

min_edat = {k: min(dct[k]) for k in dct}

In [None]:
df = pd.Series(min_edat).reset_index()
df.columns = ['subject', 'relation', 'object', 'edat']

In [None]:
TH1 = datetime.strptime('2012-01-01', '%Y-%m-%d').date()
TH2 = datetime.strptime('2020-03-11', '%Y-%m-%d').date()
df_train = df[df.edat <= TH1]
df_valid = df[(df.edat > TH1) & (df.edat <= TH2)]
df_test = df[df.edat > TH2]

In [None]:
df_train.to_csv('./train/train.tsv', sep='\t', header=None, columns=['subject', 'relation', 'object'], index=False)
df_valid.to_csv('./train/valid.tsv', sep='\t', header=None, columns=['subject', 'relation', 'object'], index=False)
df_test.to_csv('./train/test.tsv', sep='\t', header=None, columns=['subject', 'relation', 'object'], index=False)

In [None]:
# TransE
!DGLBACKEND=pytorch dglke_train --model_name TransE_l1 --data_path ./train --save_path ./data/results/final  --dataset COVID-19  --format raw_udd_hrt \
    --data_files train.tsv valid.tsv test.tsv \
    --log_interval 1000 --batch_size 2070 --batch_size_eval 1020 --neg_sample_size 30 \
    --lr 0.01 --hidden_dim 400 -rc 2e-08 \
    --gpu 0 --mix_cpu_gpu --max_step 20000

In [None]:
!DGLBACKEND=pytorch dglke_eval --dataset COVID-19 --data_path ./train --data_files train.tsv valid.tsv test.tsv \
    --format raw_udd_hrt --model_name TransE_l1 --hidden_dim 400 \
    --gpu 0 --model_path ./data/results/final/TransE_l1_COVID-19_0 --batch_size_eval 1020 --neg_sample_size_eval 30

In [None]:
# DistMult
!DGLBACKEND=pytorch dglke_train --model_name DistMult --data_path ./train --save_path ./data/results/final  --dataset COVID-19  --format raw_udd_hrt \
    --data_files train.tsv valid.tsv test.tsv \
    --log_interval 1000 --batch_size 2070 --batch_size_eval 1020 --neg_sample_size 30 \
    --lr 0.1 -adv --hidden_dim 50 -rc 2e-08 \
    --gpu 0 --mix_cpu_gpu --max_step 20000

In [None]:
!DGLBACKEND=pytorch dglke_eval --dataset COVID-19 --data_path ./train --data_files train.tsv valid.tsv test.tsv \
    --format raw_udd_hrt --model_name DistMult --hidden_dim 50 \
    --gpu 0 --model_path ./data/results/final/DistMult_COVID-19_1 --batch_size_eval 1020 --neg_sample_size_eval 30

In [None]:
# ComplEx
!DGLBACKEND=pytorch dglke_train --model_name ComplEx --data_path ./train --save_path ./data/results/final  --dataset COVID-19  --format raw_udd_hrt \
    --data_files train.tsv valid.tsv test.tsv \
    --log_interval 1000 --batch_size 2070 --batch_size_eval 1020 --neg_sample_size 30 \
    --lr 0.1 -adv --hidden_dim 50 -rc 2e-08 \
    --gpu 0 --mix_cpu_gpu --max_step 20000

In [None]:
!DGLBACKEND=pytorch dglke_eval --dataset COVID-19 --data_path ./train --data_files train.tsv valid.tsv test.tsv \
    --format raw_udd_hrt --model_name ComplEx --hidden_dim 50 \
    --gpu 0 --model_path ./data/results/final/ComplEx_COVID-19_1 --batch_size_eval 1020 --neg_sample_size_eval 30

In [None]:
# RotatE
!DGLBACKEND=pytorch dglke_train --model_name RotatE --data_path ./train --save_path ./data/results/final  --dataset COVID-19  --format raw_udd_hrt \
    --data_files train.tsv valid.tsv test.tsv \
    --log_interval 1000 --batch_size 2070 --batch_size_eval 1020 --neg_sample_size 30 \
    --lr 0.01 -de --hidden_dim 250 -rc 2e-06 -g 5 \
    --gpu 0 --mix_cpu_gpu --max_step 20000

In [None]:
!DGLBACKEND=pytorch dglke_eval --dataset COVID-19 --data_path ./train --data_files train.tsv valid.tsv test.tsv \
    --format raw_udd_hrt --model_name RotatE --hidden_dim 250 \
    --gpu 0 --model_path ./data/results/final/RotatE_COVID-19_0 --batch_size_eval 1020 --neg_sample_size_eval 30