In [1]:
import gensim.downloader as api
#dataset = api.load("text8")
import math
import numpy as np
import h5py
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"    
import tensorflow as tf
from Vocabulary import *
import time
tf.keras.backend.clear_session()
import csv

In [2]:
class CSV_writer:
    def __init__(self,name):
        self.csvfile = open('{name}.csv'.format(name=name), 'w', newline='') 
        fieldnames = ['Optimizer', 'learning_rate','epoch','loss']
        self.writer = csv.DictWriter(self.csvfile, fieldnames=fieldnames)
        self.writer.writeheader()
    
    def write(self,opt,learning_rate,epoch,loss):
        self.writer.writerow({'Optimizer': opt, 'learning_rate': learning_rate,'epoch':epoch,'loss':loss})
        
    
    

In [3]:
csv_writer = CSV_writer('AdamLRonPlato')


class ModelTrainer:
    def __init__(self,vocab,amount_split,vector_size = 300):
        self.vector_size = vector_size
        self.tf_bias = None
        self.tf_context_bias = None
        self.weights = None
        self.tf_context_weights = None
        self.tf_co_occurences = None
        self.zeilen = 0
        self.spalten = 0
        self.amount_split = amount_split
        self.block_length = math.ceil(vocab.get_size()/amount_split)

    
    def prepare(self,filename):
        self.f = h5py.File("{filename}.hdf5".format(filename=filename), "w")#plus experiment name
        #initalize all the HDF files
        self.HDF_weights = self.f.create_dataset("weights", (vocab_size, self.vector_size))
        self.HDF_context_weights = self.f.create_dataset("context-weights",(self.vector_size,vocab_size))
        self.HDF_bias = self.f.create_dataset("bias", (vocab_size,1))
        self.HDF_context_bias = self.f.create_dataset("context_bias", (1,vocab_size))

        self.hf = h5py.File("S:\\text8-filtered100-coocurences.hdf5", "r")
        self.HDF_coocurrence = self.hf.get('matrix')
        self.init_matrices()
        
    
    def init_matrices(self,chunk_size=10000):
        self.init_hdf_matrix(self.HDF_weights,-0.5,0.5,chunk_size)
        self.init_hdf_matrix(self.HDF_context_weights,-0.5,0.5,chunk_size)
        self.init_hdf_matrix(self.HDF_bias,-0.5,0.5,chunk_size)
        self.init_hdf_matrix(self.HDF_context_bias,-0.5,0.5,chunk_size)
    
    def init_hdf_matrix(self,hdf_data,min_value,max_value,block_length):
        if len(hdf_data) > len(hdf_data[0]):
            iterations = int(math.ceil(len(hdf_data) / block_length))
            for i in range(iterations):
                current_size = min(block_length,len(hdf_data)-block_length*i)
                hdf_data[i*block_length:(i+1)*block_length , :] = np.random.rand(current_size,len(hdf_data[0]))/self.vector_size
        else:
            iterations = int(math.ceil(len(hdf_data[0]) / block_length))
            for i in range(iterations):
                current_size = min(block_length,len(hdf_data[0])-block_length*i)
                hdf_data[:,i*block_length:(i+1)*block_length] = np.random.rand(len(hdf_data),current_size)/self.vector_size
            
    
    def load_blocks(self,zeilen,spalten):
        co_ocurences = self.HDF_coocurrence[zeilen*self.block_length:(zeilen+1)*self.block_length,spalten*self.block_length:(spalten+1)*self.block_length]
    
        self.tf_co_occurences = tf.convert_to_tensor(co_ocurences,dtype=tf.dtypes.float32)
        co_occurence = None
        #Use normal matrix, if epsilon Shift, than add one to co-ocurence table to fix scaling and log
        self.tf_bias = tf.Variable(initial_value=self.HDF_bias[zeilen*self.block_length:(zeilen+1)*self.block_length,:],dtype=tf.dtypes.float32)
        self.tf_context_bias = tf.Variable(initial_value=self.HDF_context_bias[:,spalten*self.block_length:(spalten+1)*self.block_length],dtype=tf.dtypes.float32)
        self.tf_weights =  tf.Variable(initial_value=self.HDF_weights[zeilen*self.block_length:(zeilen+1)*self.block_length,:],dtype=tf.dtypes.float32)
        self.tf_context_weights = tf.Variable(initial_value=self.HDF_context_weights[:,spalten*self.block_length:(spalten+1)*self.block_length],dtype=tf.dtypes.float32)
    
    def save_blocks(self,zeilen,spalten):
        self.HDF_bias[zeilen*self.block_length:(zeilen+1)*self.block_length,:] = self.tf_bias.numpy()
        self.HDF_context_bias[0,spalten*self.block_length:(spalten+1)*self.block_length] = self.tf_context_bias.numpy()
        self.HDF_weights[zeilen*self.block_length:(zeilen+1)*self.block_length,:] = self.tf_weights.numpy()
        self.HDF_context_weights[:,spalten*self.block_length:(spalten+1)*self.block_length] = self.tf_context_weights.numpy()
    
    def _close_files(self):
        self.f.close()
        self.hf.close()
        
    lowest = -1
    lowest_time = 0
    patience = 10
    def check_loss(self,value,epoch):
        if(self.lowest == -1):
            self.lowest = value
            self.lowest_time = epoch
            return 1
        if(value < self.lowest):
            self.lowest = value
            self.lowest_time = epoch
            return 1
        if(value > self.lowest and self.lowest_time + 10 < epoch):
            self.lowest = value
            self.lowest_time = epoch
            return 0.5
        return 1
    
    def train_splitted(self,epochs,optimizer):
        #co_occurence = np.zeros((len(vocab_local.word2Id),len(vocab_local.word2Id)), dtype=int)
        
        #These can stay in memory
        self.alpha = tf.constant(0.75,dtype=tf.dtypes.float32)
        self.XMAX = tf.constant(10000.0,dtype=tf.dtypes.float32)
        
        self.epsilon = tf.constant(1.0) * tf.ones((self.block_length,self.block_length),dtype=tf.dtypes.float32)
        
        #print(optimizer._learning_rate)
        print(optimizer.learning_rate)
        
        for epoch in range(0,epochs):
            cur_loss = 0
            for zeilen in range(0,self.amount_split):
                for spalten in range(0,self.amount_split):
                    tic = time.perf_counter()
                    #print("zeilen"+str(zeilen)+"spalten"+str(spalten))
                    self.zeilen = zeilen
                    self.spalten = spalten
                
                    self.load_blocks(zeilen,spalten)

                    cur_loss += self.loss().numpy()
                    train = optimizer.minimize(self.loss, var_list=[self.tf_bias,self.tf_context_bias,
                                                              self.tf_weights,self.tf_context_weights])
        
                    #print(cur_loss)
                
                    self.save_blocks(zeilen,spalten)
            print("loss:"+str(cur_loss))
            optimizer.learning_rate.assign(optimizer.learning_rate.numpy() * self.check_loss(cur_loss,epoch))
            print(optimizer.learning_rate)
            csv_writer.write(optimizer.get_config()['name'],optimizer.learning_rate.numpy(),epoch+1,cur_loss)
        self._close_files()
        return None

    def loss(self):
        ones_symetrical = tf.ones((self.block_length,self.block_length), dtype=tf.dtypes.float32, name=None)
    
        co_occurences = None
        #Append zero matrices if necessary
        if(self.zeilen == self.amount_split - 1):
            difference = self.block_length - self.tf_bias.shape[0]
            add_to_bias   = tf.zeros((difference,1),dtype=tf.dtypes.float32)
            add_to_co     = tf.zeros((difference,self.tf_context_bias.shape[1]),dtype=tf.dtypes.float32)
            co_occurences = tf.concat([self.tf_co_occurences,add_to_co],axis=0)
            add2_weights  = tf.zeros((difference,self.vector_size),dtype=tf.dtypes.float32)
            weights       = tf.concat([self.tf_weights,add2_weights],axis = 0)
            bias_matrix   = tf.concat([self.tf_bias,add_to_bias],axis = 0) * ones_symetrical
        else:
            bias_matrix   = self.tf_bias * ones_symetrical
            weights       = self.tf_weights
            co_occurences = self.tf_co_occurences
    
        if(self.spalten == self.amount_split - 1):
            difference = self.block_length - self.tf_context_bias.shape[1]
            add_to_con_bias = tf.zeros((1,difference),dtype=tf.dtypes.float32)
            add_to_co       = tf.zeros((self.block_length,difference),dtype=tf.dtypes.float32)
            if co_occurences == None:
                co_occurences   = tf.concat([self.tf_co_occurences,add_to_co],axis=1)
            else:
                co_occurences   = tf.concat([co_occurences,add_to_co],axis=1)
                add2_co_weights = tf.zeros((self.vector_size,difference),dtype=tf.dtypes.float32)
                context_weights = tf.concat([self.tf_context_weights,add2_co_weights],axis = 1)
                context_bias_matrix = tf.concat([self.tf_context_bias,add_to_con_bias],axis=1) * ones_symetrical
        else:
            if co_occurences == None:
                co_occurences   = self.tf_co_occurences
            context_weights     = self.tf_context_weights
            context_bias_matrix = self.tf_context_bias * ones_symetrical
          
                                                          
        bias_terms = context_bias_matrix + bias_matrix
    
        weight_matrix = tf.matmul(weights,context_weights)
        log_X = tf.math.log(co_occurences + self.epsilon)
        inner_sum = bias_terms + weight_matrix - log_X
        squared_sum = tf.math.square(inner_sum)
        weighted_sum = self.cut_function2(co_occurences) * squared_sum
        reduced = tf.math.reduce_sum(weighted_sum)
        return reduced
    
      
    def cut_function2(self,value):
        clipped = tf.clip_by_value(value, clip_value_min = 0.0, clip_value_max=1000.0)
        return tf.pow(clipped / self.XMAX, self.alpha)

In [4]:
lrPlato = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='loss', factor=0.1, patience=10, verbose=0,
    mode='auto', min_delta=0.0001, cooldown=0, min_lr=0,
)

In [5]:
vocab = Vocabulary()
vocab.load('base_filtered100')
vocab_size = len(vocab.word2Id)

    
opt = tf.keras.optimizers.Adam(learning_rate=0.1)
    
tf.keras.backend.clear_session()
trainer = ModelTrainer(vocab,1)
trainer.prepare("AdamLRonPlato")
trainer.train_splitted(300,opt)

csv_writer.csvfile.close()

<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.1>
loss:386226.03125
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.1>
loss:134825.484375
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.1>
loss:197573.0
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.1>
loss:118520.296875
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.1>
loss:171201.5625
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.1>
loss:136547.015625
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.1>
loss:169799.578125
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.1>
loss:128222.6640625
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.1>
loss:157187.171875
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.1>
loss:124751.71875
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.1>
loss:156100.59375
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.1>
loss:123

loss:40490.27734375
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.0125>
loss:40887.1953125
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.0125>
loss:40060.2109375
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.0125>
loss:40462.93359375
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.0125>
loss:39618.91015625
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.0125>
loss:40028.0546875
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.0125>
loss:39165.828125
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.0125>
loss:39583.9609375
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.0125>
loss:38712.8828125
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.0125>
loss:39130.58203125
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.0125>
loss:38258.18359375
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.0125>
loss:38674.0546875
<tf.Varia

loss:28143.474609375
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.0125>
loss:27866.90234375
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.0125>
loss:28070.326171875
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.0125>
loss:27800.20703125
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.0125>
loss:28008.6640625
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.0125>
loss:27726.05859375
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.0125>
loss:27950.345703125
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.0125>
loss:27663.78125
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.0125>
loss:27892.0078125
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.0125>
loss:27602.5
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.0125>
loss:27836.572265625
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.0125>
loss:27537.888671875
<tf.Vari

loss:26080.669921875
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.0125>
loss:26279.298828125
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.0125>
loss:26068.818359375
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.0125>
loss:26265.203125
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.0125>
loss:26060.2734375
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.0125>
loss:26254.7109375
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.0125>
loss:26053.966796875
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.0125>
loss:26245.017578125
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.0125>
loss:26047.888671875
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.0125>
loss:26236.79296875
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.0125>
loss:26042.9453125
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.0125>
loss:26230.05078125
