In [1]:
import gensim.downloader as api
#dataset = api.load("text8")
import math
import numpy as np
import h5py
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"    
import tensorflow as tf
from Vocabulary import *
import time
tf.keras.backend.clear_session()
import csv

In [2]:
class CSV_writer:
    def __init__(self,name):
        self.csvfile = open('{name}.csv'.format(name=name), 'w', newline='') 
        fieldnames = ['Optimizer', 'learning_rate','epoch','loss']
        self.writer = csv.DictWriter(self.csvfile, fieldnames=fieldnames)
        self.writer.writeheader()
    
    def write(self,opt,learning_rate,epoch,loss):
        lr = "{x:.2e}".format(x=learning_rate)
        loss = int(loss)
        self.writer.writerow({'Optimizer': opt, 'learning_rate': lr,'epoch':epoch,'loss':loss})
        
    
    

In [3]:

class ModelTrainer:
    def __init__(self,vocab,amount_split,vector_size = 300):
        self.vector_size = vector_size
        self.tf_bias = None
        self.tf_context_bias = None
        self.weights = None
        self.tf_context_weights = None
        self.tf_co_occurences = None
        self.zeilen = 0
        self.spalten = 0
        self.amount_split = amount_split
        self.block_length = math.ceil(vocab.get_size()/amount_split)

    
    def prepare(self,filename):
        self.f = h5py.File("S:\\{filename}.hdf5".format(filename=filename), "w")#plus experiment name
        #initalize all the HDF files
        self.HDF_weights = self.f.create_dataset("weights", (vocab_size, self.vector_size))
        self.HDF_context_weights = self.f.create_dataset("context-weights",(self.vector_size,vocab_size))
        self.HDF_bias = self.f.create_dataset("bias", (vocab_size,1))
        self.HDF_context_bias = self.f.create_dataset("context_bias", (1,vocab_size))

        self.hf = h5py.File("S:\\text8-filtered100-coocurences.hdf5", "r")
        self.HDF_coocurrence = self.hf.get('matrix')
        self.init_matrices()
        
    
    def init_matrices(self,chunk_size=10000):
        self.init_hdf_matrix(self.HDF_weights,-0.5,0.5,chunk_size)
        self.init_hdf_matrix(self.HDF_context_weights,-0.5,0.5,chunk_size)
        self.init_hdf_matrix(self.HDF_bias,-0.5,0.5,chunk_size)
        self.init_hdf_matrix(self.HDF_context_bias,-0.5,0.5,chunk_size)
    
    def init_hdf_matrix(self,hdf_data,min_value,max_value,block_length):
        if len(hdf_data) > len(hdf_data[0]):
            iterations = int(math.ceil(len(hdf_data) / block_length))
            for i in range(iterations):
                current_size = min(block_length,len(hdf_data)-block_length*i)
                hdf_data[i*block_length:(i+1)*block_length , :] = np.random.rand(current_size,len(hdf_data[0]))/self.vector_size
        else:
            iterations = int(math.ceil(len(hdf_data[0]) / block_length))
            for i in range(iterations):
                current_size = min(block_length,len(hdf_data[0])-block_length*i)
                hdf_data[:,i*block_length:(i+1)*block_length] = np.random.rand(len(hdf_data),current_size)/self.vector_size
            
    
    def load_blocks(self,zeilen,spalten):
        co_ocurences = self.HDF_coocurrence[zeilen*self.block_length:(zeilen+1)*self.block_length,spalten*self.block_length:(spalten+1)*self.block_length]
    
        self.tf_co_occurences = tf.convert_to_tensor(co_ocurences,dtype=tf.dtypes.float32)
        co_occurence = None
        #Use normal matrix, if epsilon Shift, than add one to co-ocurence table to fix scaling and log
        self.tf_bias = tf.Variable(initial_value=self.HDF_bias[zeilen*self.block_length:(zeilen+1)*self.block_length,:],dtype=tf.dtypes.float32)
        self.tf_context_bias = tf.Variable(initial_value=self.HDF_context_bias[:,spalten*self.block_length:(spalten+1)*self.block_length],dtype=tf.dtypes.float32)
        self.tf_weights =  tf.Variable(initial_value=self.HDF_weights[zeilen*self.block_length:(zeilen+1)*self.block_length,:],dtype=tf.dtypes.float32)
        self.tf_context_weights = tf.Variable(initial_value=self.HDF_context_weights[:,spalten*self.block_length:(spalten+1)*self.block_length],dtype=tf.dtypes.float32)
    
    def save_blocks(self,zeilen,spalten):
        self.HDF_bias[zeilen*self.block_length:(zeilen+1)*self.block_length,:] = self.tf_bias.numpy()
        self.HDF_context_bias[0,spalten*self.block_length:(spalten+1)*self.block_length] = self.tf_context_bias.numpy()
        self.HDF_weights[zeilen*self.block_length:(zeilen+1)*self.block_length,:] = self.tf_weights.numpy()
        self.HDF_context_weights[:,spalten*self.block_length:(spalten+1)*self.block_length] = self.tf_context_weights.numpy()
    
    def _close_files(self):
        self.f.close()
        self.hf.close()
    
    def train_splitted(self,epochs,optimizer):
        self.epsilon = tf.constant(1e-5) * tf.ones((self.block_length,self.block_length),dtype=tf.dtypes.float32)

        lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
        0.01,
        decay_steps= self.amount_split*self.amount_split * 10,
        decay_rate=0.5,
        staircase=True)
        
        #print(optimizer._learning_rate)
        print(optimizer.learning_rate)
        
        for epoch in range(0,epochs):
            cur_loss = 0
            for zeilen in range(0,self.amount_split):#CHANGE THIS BACK !!!!!!!!!!!!!!!!!!!
                for spalten in range(0,self.amount_split):
                    tic = time.perf_counter()
                    #print("zeilen"+str(zeilen)+"spalten"+str(spalten))
                    self.zeilen = zeilen
                    self.spalten = spalten
                
                    self.load_blocks(zeilen,spalten)

                    cur_loss += self.loss()
                    train = optimizer.minimize(self.loss, var_list=[self.tf_bias,self.tf_context_bias,
                                                              self.tf_weights,self.tf_context_weights])
        
                    #print(cur_loss)
                
                    self.save_blocks(zeilen,spalten)
            print("epoch" + str(epoch)+"loss:"+str(cur_loss.numpy()))
            csv_writer.write(optimizer.get_config()['name'],optimizer.learning_rate.numpy(),epoch+1,cur_loss.numpy())
        self._close_files()
        return None

    def loss(self):
        ones_symetrical = tf.ones((self.block_length,self.block_length), dtype=tf.dtypes.float32, name=None)
    
        co_occurences = None
        #Append zero matrices if necessary
        if(self.zeilen == self.amount_split - 1):
            difference = self.block_length - self.tf_bias.shape[0]
            add_to_bias   = tf.zeros((difference,1),dtype=tf.dtypes.float32)
            add_to_co     = tf.zeros((difference,self.tf_context_bias.shape[1]),dtype=tf.dtypes.float32)
            co_occurences = tf.concat([self.tf_co_occurences,add_to_co],axis=0)
            add2_weights  = tf.zeros((difference,self.vector_size),dtype=tf.dtypes.float32)
            weights       = tf.concat([self.tf_weights,add2_weights],axis = 0)
            bias_matrix   = tf.concat([self.tf_bias,add_to_bias],axis = 0) * ones_symetrical
        else:
            bias_matrix   = self.tf_bias * ones_symetrical
            weights       = self.tf_weights
            co_occurences = self.tf_co_occurences
    
        if(self.spalten == self.amount_split - 1):
            difference = self.block_length - self.tf_context_bias.shape[1]
            add_to_con_bias = tf.zeros((1,difference),dtype=tf.dtypes.float32)
            add_to_co       = tf.zeros((self.block_length,difference),dtype=tf.dtypes.float32)
            if co_occurences == None:
                co_occurences   = tf.concat([self.tf_co_occurences,add_to_co],axis=1)
            else:
                co_occurences   = tf.concat([co_occurences,add_to_co],axis=1)
                add2_co_weights = tf.zeros((self.vector_size,difference),dtype=tf.dtypes.float32)
                context_weights = tf.concat([self.tf_context_weights,add2_co_weights],axis = 1)
                context_bias_matrix = tf.concat([self.tf_context_bias,add_to_con_bias],axis=1) * ones_symetrical
        else:
            if co_occurences == None:
                co_occurences   = self.tf_co_occurences
            context_weights     = self.tf_context_weights
            context_bias_matrix = self.tf_context_bias * ones_symetrical
          
                                                          
        bias_terms = context_bias_matrix + bias_matrix
    
        weight_matrix = tf.matmul(weights,context_weights)
        log_X = tf.math.log(co_occurences + self.epsilon)
        inner_sum = bias_terms + weight_matrix - log_X
        squared_sum = tf.math.square(inner_sum)
        weighted_sum = self.cut_function2(co_occurences) * squared_sum
        reduced = tf.math.reduce_sum(weighted_sum)
        return reduced
    
    alpha = tf.constant(0.75,dtype=tf.dtypes.float32)
    XMAX = tf.constant(100.0,dtype=tf.dtypes.float32)  
    def cut_function2(self,value):
        clipped = tf.clip_by_value(value, clip_value_min = 0.0, clip_value_max=100.0)
        return tf.pow(clipped / self.XMAX, self.alpha)

In [None]:
vocab = Vocabulary()
vocab.load('base_filtered100')
vocab_size = len(vocab.word2Id)
csv_writer = CSV_writer('GridSearch100Adamax')

learning_rates = [0.1,0.05,0.01]
    
#tf.keras.optimizers.Adam(),tf.keras.optimizers.Adamax(),tf.keras.optimizers.RMSprop(),tf.keras.optimizers.Adadelta(),tf.keras.optimizers.Adagrad(),tf.keras.optimizers.Ftrl(),tf.keras.optimizers.Nadam(),tf.keras.optimizers.SGD()]
for lr in learning_rates:
    
        tf.keras.backend.clear_session()
        opt = tf.keras.optimizers.Adamax()
        trainer = ModelTrainer(vocab,1)
        trainer.prepare("GridSearch100Proper")
        print(opt)
        opt.learning_rate = lr
        print(opt.get_config())
        trainer.train_splitted(100,opt)
        del trainer
        csv_writer.csvfile.flush()

csv_writer.csvfile.close()


<tensorflow.python.keras.optimizer_v2.adamax.Adamax object at 0x0000016F2CD44340>
{'name': 'Adamax', 'learning_rate': 0.1, 'decay': 0.0, 'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-07}
<tf.Variable 'learning_rate:0' shape=() dtype=float32, numpy=0.1>
epoch0loss:6659837.0
epoch1loss:4009704.8
epoch2loss:2702432.2
epoch3loss:2611822.8
epoch4loss:1805664.0
epoch5loss:1705669.9
epoch6loss:1371519.4
epoch7loss:1304733.1
epoch8loss:1217232.6
epoch9loss:1210140.0
epoch10loss:1214231.1
epoch11loss:1215374.2
epoch12loss:1206915.8
epoch13loss:1193457.2
epoch14loss:1183473.5
epoch15loss:1161296.6
epoch16loss:1165328.5
epoch17loss:1147213.5
epoch18loss:1155915.6
epoch19loss:1136438.6
epoch20loss:1147643.4
epoch21loss:1128955.6
epoch22loss:1141798.6
epoch23loss:1122910.0
epoch24loss:1137416.1
epoch25loss:1119241.0
epoch26loss:1133388.5
epoch27loss:1115961.2
epoch28loss:1129951.6
epoch29loss:1113276.2
epoch30loss:1127202.5
epoch31loss:1111392.4
epoch32loss:1125309.0
epoch33loss:1110093.4
epoch34lo