In [1]:
import math
import numpy as np
import h5py
import os
#os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'
#os.environ["CUDA_VISIBLE_DEVICES"] = "-1"  
import sys
sys.path.append("..")
import tensorflow as tf
from Vocabulary import *
import time
tf.keras.backend.clear_session()
import csv
import cloudpickle
from csv_writer import *
import random

import threading, queue

In [2]:
class lr_On_plato:
    lowest_loss = -1
    lowest_time = 0
    patience = 10
    factor = 0.005
    
    def notify_loss(self,loss,epoch):
        if(self.lowest_loss == -1):
            self.lowest_loss = loss
            self.lowest_time = epoch
        if(loss < self.lowest_loss):
            self.lowest_loss = loss
            self.lowest_time = epoch
        if(loss > self.lowest_loss and self.lowest_time + 10 < epoch):
            self.lowest_loss = loss
            self.lowest_time = epoch
            print("decreased LR")
            self.factor = self.factor * 0.5
    
    def get_lr(self,epoch):
        return self.factor
        

In [3]:
class ModelTrainer:
    def __init__(self,vocab_length,block_path,vector_size = 100):
        self.vector_size = vector_size
        # AND HERE IT IS AGAIN
        self.block_length = 5000
        self.amount_split = math.ceil(vocab_length/float(self.block_length))
        print('amout_split: ' + str(self.amount_split))
        self.block_path = block_path
        self.vocab_length = vocab_length
        self.optimizer = None
    
    #start training for first time
    def prepare(self,basepath,experiment_name):
        self.basepath = basepath
        self.experiment_name = experiment_name
        self.f = h5py.File(basepath + '//{filename}.hdf5'.format(filename=experiment_name), "w")
        #initalize all the HDF files
        self.con_weights = self.f.create_dataset("context-weights", (self.vocab_length, self.vector_size))
        self.weights = self.f.create_dataset("weights",(self.vector_size,self.vocab_length))
        self.context_bias = self.f.create_dataset("context-bias", (self.vocab_length,1))
        self.bias = self.f.create_dataset("bias", (1,self.vocab_length))
        self.csv_writer = CSV_writer(basepath,experiment_name+".csv")

        self.init_matrices()
    
    
    def init_matrices(self,chunk_size=10000):
        self.init_hdf_matrix(self.weights,-0.5,0.5,chunk_size)
        self.init_hdf_matrix(self.con_weights,-0.5,0.5,chunk_size)
        self.init_hdf_matrix(self.context_bias,-0.5,0.5,chunk_size)
        self.init_hdf_matrix(self.bias,-0.5,0.5,chunk_size)
    
    def init_hdf_matrix(self,hdf_data,min_value,max_value,block_length):
        if len(hdf_data) > len(hdf_data[0]):
            iterations = int(math.ceil(len(hdf_data) / float(block_length)))
            for i in range(iterations):
                current_size = min(block_length,len(hdf_data)-block_length*i)
                hdf_data[i*block_length:(i+1)*block_length , :] = np.random.rand(current_size,len(hdf_data[0]))/self.vector_size
        else:
            iterations = int(math.ceil(len(hdf_data[0]) / float(block_length)))
            for i in range(iterations):
                current_size = min(block_length,len(hdf_data[0])-block_length*i)
                hdf_data[:,i*block_length:(i+1)*block_length] = np.random.rand(len(hdf_data),current_size)/self.vector_size
            
    
    def block_file_path(self,zeile,spalte):
        # load the hdf coocurence block
        if(zeile >= spalte):
            template = "tf_cooccurence_{i}_{j}.tensor".format(i=zeile,j=spalte)
        else:
            template = "tf_cooccurence_{i}_{j}.tensor".format(i=spalte,j=zeile)
        
        return  self.block_path + '\\' + template
        
    def load_block(self,zeile,spalte):        
        file_path =  self.block_file_path(zeile,spalte)
        
        with open(file_path, 'rb') as file:
            self.tf_co_occurences = cloudpickle.load(file)
        
        if (spalte > zeile):
            self.tf_co_occurences = tf.transpose(self.tf_co_occurences)
        
    
    q = queue.Queue()
    
    def load_block_async(self,zeile,spalte):
        self.thread = threading.Thread(target=self.thread_load,args=(zeile,spalte))
        self.thread.start()

    def get_block_async(self):
        self.thread.join()
        self.tf_co_occurences = self.q.get()
        
    
    def thread_load(self,zeile,spalte):
        file_path =  self.block_file_path(zeile,spalte)
        
        with open(file_path, 'rb') as file:
            tf_co_occurences = cloudpickle.load(file)
        
        if (spalte > zeile):
            tf_co_occurences = tf.transpose(tf_co_occurences)
        
        self.q.put(tf_co_occurences)
        tf_co_occurences = None
        
        
    def load_weights(self):
        self.tf_weights     = tf.Variable(initial_value=self.weights[:,:],dtype=tf.dtypes.float32)
        self.tf_con_weights =  tf.Variable(initial_value=self.con_weights[:,:],dtype=tf.dtypes.float32)
        self.tf_bias        = tf.Variable(initial_value=self.bias[:,:],dtype=tf.dtypes.float32)
        self.tf_con_bias    = tf.Variable(initial_value=self.context_bias[:,:],dtype=tf.dtypes.float32)
        
        
    def save_weights(self):
        self.context_bias[:,:] = self.tf_con_bias.numpy()
        self.bias[:,:] = self.tf_bias.numpy()
        self.con_weights[:,:] = self.tf_con_weights.numpy()
        self.weights[:,:] = self.tf_weights.numpy()
    
    def _close_files(self):
        self.f.close()
        self.csv_writer.close()
   
    def inner_loss(self,weights,context_weights,bias_mat,con_bias_mat,co_occurences):
        #co_occurences = tf.clip_by_value(co_occurences, clip_value_min = 0.0, clip_value_max=5000.0)
        bias_terms = bias_mat + con_bias_mat
        weight_matrix = tf.matmul(context_weights,weights)
        log_X = tf.math.log(co_occurences + 1)
        summe = bias_terms + weight_matrix - log_X
        summe = tf.math.square(summe)
        summe = self.cut_function(co_occurences) * summe
        reduced = tf.math.reduce_sum(summe)
        return reduced
    
    def loss(self,zeile,spalte,co_occurences):
        rest_zeilen = math.ceil(self.vocab_length - zeile*self.block_length)
        rest_spalten= math.ceil(self.vocab_length - spalte*self.block_length)
        weights     = tf.slice(self.tf_weights,(0,spalte*self.block_length)    , (-1,min(self.block_length,rest_spalten)))
        con_weights = tf.slice(self.tf_con_weights,(zeile*self.block_length,0) , (min(self.block_length, rest_zeilen),-1))
        bias        = tf.slice(self.tf_bias,(0,spalte*self.block_length)       , (-1,min(self.block_length,rest_spalten)))
        con_bias    = tf.slice(self.tf_con_bias,(zeile*self.block_length,0)    , (min(self.block_length, rest_zeilen),-1))
        
        ones_symetrical = tf.ones((self.block_length,self.block_length), dtype=tf.dtypes.float32, name=None)
    
        #just the words context
        if(zeile == self.amount_split - 1):
            difference = self.block_length - con_bias.shape[0]
            add2_context_bias   = tf.zeros((difference,1),dtype=tf.dtypes.float32)
            add2_context_weights = tf.zeros((difference,self.vector_size),dtype=tf.dtypes.float32)
            
            con_weights       = tf.concat([con_weights,add2_context_weights],axis = 0)
            con_bias_mat   = tf.concat([con_bias,add2_context_bias],axis = 0) * ones_symetrical
        else:
            con_weights       = con_weights
            con_bias_mat   = con_bias * ones_symetrical
        
        co_occurences = self.tf_co_occurences
        #just the words without context
        if(spalte == self.amount_split - 1):
            difference = self.block_length - bias.shape[1]
            add2_bias = tf.zeros((1,difference),dtype=tf.dtypes.float32)
            add2_weights = tf.zeros((self.vector_size,difference),dtype=tf.dtypes.float32)
            
            weights = tf.concat([weights,add2_weights],axis = 1)
            bias_mat = tf.concat([bias,add2_bias],axis=1) * ones_symetrical
        else:
            weights     = weights
            bias_mat = bias * ones_symetrical
          
        return self.inner_loss(weights,con_weights,bias_mat,con_bias_mat,co_occurences)
    
    alpha = tf.constant(0.75,dtype=tf.dtypes.float32)
    XMAX = tf.constant(100.0,dtype=tf.dtypes.float32)
    
    def cut_function(self,value):
        clipped = tf.clip_by_value(value, clip_value_min = 0.0, clip_value_max=100.0)
        return tf.pow(clipped / self.XMAX, self.alpha)
    
    #def save_optimizer(self):
    #    with open(self.basepath + '\\'+self.experiment_name + '.opt','wb+') as file:
    #        cloudpickle.dump(self.optimizer, file)   
    
    #def load_optimizer(self):
    #    with open(self.basepath + '\\'+self.experiment_name + '.opt','rb+') as file:
    #        self.optimizer = cloudpickle.load(file)
        
    def train_splitted(self,epochs,use_grad_clipping = False):
        
        if (self.optimizer == None and use_grad_clipping):
            self.optimizer = tf.keras.optimizers.Adagrad(learning_rate=0.01,clipvalue=100.0)
            self.load_weights()
        elif(self.optimizer == None):
            self.optimizer = tf.keras.optimizers.Adagrad(learning_rate=0.05)
            self.load_weights()
            
        for epoch in range(epochs):
            cur_loss = 0.0
            
            
            block_list = [(x,y) for x in range(self.amount_split) for y in range(self.amount_split) if x >= y]
            random.shuffle(block_list)
            #print(block_list)
        
            enumerated = enumerate(block_list)
            for id,(zeile,spalte) in enumerated:
                if(id == 0):
                    self.load_block(zeile,spalte)
                    self.load_block_async(block_list[id+1][0],block_list[id+1][1])
                else:
                    self.get_block_async()
                    if(id < len(block_list) - 1):#if not last id
                        next = block_list[id+1]
                        self.load_block_async(next[0],next[1])
                #print(zeile,spalte)
                
                #train one side
                self.load_block(zeile,spalte)
                    
                #train code
                with tf.GradientTape() as tape:
                    tmp_loss = self.loss(zeile,spalte,self.tf_co_occurences)
                    grads = tape.gradient(tmp_loss, [self.tf_con_bias,self.tf_bias,self.tf_con_weights,self.tf_weights])
                self.optimizer.apply_gradients(zip(grads, [self.tf_con_bias,self.tf_bias,self.tf_con_weights,self.tf_weights]))
                cur_loss += tmp_loss.numpy()
                     
                #train the other side
                if spalte != zeile:
                    self.tf_co_occurences = tf.transpose(self.tf_co_occurences)
                    
                    #train code
                    with tf.GradientTape() as tape:
                        tmp_loss = self.loss(spalte,zeile,self.tf_co_occurences)
                        grads = tape.gradient(tmp_loss, [self.tf_con_bias,self.tf_bias,self.tf_con_weights,self.tf_weights])
                    self.optimizer.apply_gradients(zip(grads, [self.tf_con_bias,self.tf_bias,self.tf_con_weights,self.tf_weights]))
                    cur_loss += tmp_loss.numpy()
                           
            self.save_weights()    
            print('epoch: '+str(epoch)+" loss: "+str(int(cur_loss)))
            #lrOnPlato.notify_loss(cur_loss.numpy(),epoch)
            self.csv_writer.write('Adagrad',0.5,epoch+1,cur_loss)
        #self._close_files()
        return None

    

In [4]:
import time
vocab = Vocabulary()
vocab.load('..\\vocabs\\wiki2021base')
size = vocab.get_size()

In [5]:
   
tf.keras.backend.clear_session()
trainer = ModelTrainer(size,"E:\\hdf_base_coocurrence_2021_5000",vector_size=400)
trainer.prepare('E:\\',"base2021_500noclip")

startTime = time.time()

trainer.train_splitted(100)

executionTime = (time.time() - startTime)
print('Execution time in seconds: ' + str(executionTime))


amout_split: 53


ResourceExhaustedError: failed to allocate memory [Op:Mul]

In [7]:
trainer.train_splitted(100)

ResourceExhaustedError: failed to allocate memory [Op:Pow]