-
Notifications
You must be signed in to change notification settings - Fork 1
/
metadata.py
46 lines (34 loc) · 1.4 KB
/
metadata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import string
from gensim.models.doc2vec import Doc2Vec
from gensim.models import FastText
import numpy as np
import os
from transformers import *
import json
import argparse
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize as wt
import torch
stop_words = set(stopwords.words('english'))
cwd = os.getcwd()
sys_home = os.path.join(cwd,'outputs')
if not os.path.exists(sys_home):
os.mkdir(sys_home)
data_loc=os.path.join(cwd,'data')
output_folder=os.path.join(sys_home,'column_predict_WikiTablesCollection')
data_folder=os.path.join(data_loc,'WikiTablesCollection')
data_folder_debug=os.path.join(data_loc,'WikiTablesCollectionDebug')
if not os.path.exists(output_folder):
os.mkdir(output_folder)
if not os.path.exists(data_folder_debug):
os.mkdir(data_folder_debug)
train_features_path =os.path.join(output_folder,'train_features.json')
test_features_path =os.path.join(output_folder,'test_features.json')
word2int_path = os.path.join(output_folder,'word2int.npy')
train_files=os.path.join(output_folder,'train_files.npy')
test_files=os.path.join(output_folder,'test_files.npy')
test_debug=os.path.join(output_folder,'test_debug.npy')
train_debug=os.path.join(output_folder,'train_debug.npy')
seen_labels_file=os.path.join(output_folder,'seen_labels.npy')
freq_seen_labels_file=os.path.join(output_folder,'freq_seen_labels.npy')
model_file = os.path.join(output_folder,'selab_model.pt')