In [1]:
!pip install tqdm



In [1]:
import numpy as np
import os
from tqdm import tqdm

## TSV file create

### TSV define: A tab-separated values (TSV) file is a simple text format for storing data in a tabular structure

 Filename extension:  .tsv
 Internet media type: text/tab-separated-values

## Story Name

In [2]:
story_names = ["ACaseOfIdentity_formatted_S2",
                 "CrookedMan_formatted_S2",
                 "DancingMen_formatted_S2",
                 "DevilsFoot_formatted_S2",
                 "SpeckledBand_formatted_S2"]

In [3]:
model_name = "albert_xxlarge"
pattern = 'p1'

In [4]:
class TabFileConvert:
    
    def __init__(self, file_base: str):
        tsv_vector_file_name = file_base + "-vector.tsv"
        tsv_label_file_name = file_base + "-label.tsv"

        print("====> " + model_file_name)
        npz = np.load(model_file_name, allow_pickle=True)
        x = npz['arr_0']
        y = npz['arr_1']
        print(x.shape)
        print(y.shape)
        
        self._makeVector(tsv_vector_file_name, x)
        self._makeLabel(tsv_label_file_name, y)
        
    def _makeVector(self,file_name, x):
        f = open(file_name, 'w')
        
        lines = x.shape[0]
        layers = x.shape[1]
        cols = x.shape[2]

        with tqdm(total=lines) as pbar:
            for i in range(lines):
                t = x[i]
                r = []
                for j in range(cols):
                    r.append(str(t[layers - 1][j]))
                record = '\t'.join(r) + '\n'
                
                #print(record)                
                f.write(record)
                pbar.update(1)
        pbar.close()
        f.close()
        print('complete vector file save')

    def _makeLabel(self,file_name, y):
        f = open(file_name, 'w')
        
        lines = y.shape[0]
        labels = y.shape[1] #comment for p3

        with tqdm(total=lines) as pbar:
            for i in range(lines):
                t = y[i]
                r = [] #comment for p3
                for j in range(labels): #comment for p3
                    r.append(str(t[j])) #comment for p3
                record = ' '.join(r) + '\n' #comment for p3
                #uncomment for p3 record = str(t) + '\n' 
                
                #print(record)
                f.write(record)
                pbar.update(1)
        pbar.close()
        f.close()
        print('complete label file save')


## TEST

base_file_name = story_names[0] + "-" + model_name + "-" + pattern
model_file_name = base_file_name + ".npz"
TabFileConvert(base_file_name)

In [5]:
for story_name in story_names:
    base_file_name = story_name + "-" + model_name + "-" + pattern
    model_file_name = base_file_name + ".npz"
    TabFileConvert(base_file_name)

====> ACaseOfIdentity_formatted_S2-albert_xxlarge-p1.npz


  0%|          | 11/3701 [00:00<00:35, 104.29it/s]

(3701, 31, 4096)
(3701, 3)


100%|██████████| 3701/3701 [00:27<00:00, 135.59it/s]
100%|██████████| 3701/3701 [00:00<00:00, 41628.99it/s]


complete vector file save
complete label file save
====> CrookedMan_formatted_S2-albert_xxlarge-p1.npz


  0%|          | 9/2951 [00:00<00:35, 83.54it/s]

(2951, 45, 4096)
(2951, 3)


100%|██████████| 2951/2951 [00:46<00:00, 62.92it/s] 
100%|██████████| 2951/2951 [00:00<00:00, 30102.05it/s]


complete vector file save
complete label file save
====> DancingMen_formatted_S2-albert_xxlarge-p1.npz


  0%|          | 0/1702 [00:00<?, ?it/s]

(1702, 38, 4096)
(1702, 3)


100%|██████████| 1702/1702 [00:17<00:00, 97.18it/s] 
100%|██████████| 1702/1702 [00:00<00:00, 35594.40it/s]


complete vector file save
complete label file save
====> DevilsFoot_formatted_S2-albert_xxlarge-p1.npz


  0%|          | 14/3299 [00:00<00:24, 136.43it/s]

(3299, 32, 4096)
(3299, 3)


100%|██████████| 3299/3299 [00:53<00:00, 62.19it/s] 
100%|██████████| 3299/3299 [00:00<00:00, 36899.58it/s]


complete vector file save
complete label file save
====> SpeckledBand_formatted_S2-albert_xxlarge-p1.npz


  0%|          | 0/11487 [00:00<?, ?it/s]

(11487, 53, 4096)
(11487, 3)


100%|██████████| 11487/11487 [03:31<00:00, 54.19it/s]
  8%|▊         | 923/11487 [00:00<00:01, 9229.59it/s]

complete vector file save


100%|██████████| 11487/11487 [00:01<00:00, 6956.24it/s]


complete label file save
