In [1]:
import gensim.downloader as api
#dataset = api.load("text8")
import math
import numpy as np
import h5py
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '0'
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"  
import sys
sys.path.append("..")
import tensorflow as tf
from Vocabulary import *
import time
tf.keras.backend.clear_session()
import csv
import cloudpickle
from csv_writer import *

In [2]:
class lr_On_plato:
    lowest_loss = -1
    lowest_time = 0
    patience = 10
    factor = 0.005
    
    def notify_loss(self,loss,epoch):
        if(self.lowest_loss == -1):
            self.lowest_loss = loss
            self.lowest_time = epoch
        if(loss < self.lowest_loss):
            self.lowest_loss = loss
            self.lowest_time = epoch
        if(loss > self.lowest_loss and self.lowest_time + 10 < epoch):
            self.lowest_loss = loss
            self.lowest_time = epoch
            print("decreased LR")
            self.factor = self.factor * 0.5
    
    def get_lr(self,epoch):
        return self.factor
        

In [3]:
class ModelTrainer:
    def __init__(self,vocab_length,block_path,vector_size = 300):
        self.vector_size = vector_size
        # AND HERE IT IS AGAIN
        self.block_length = 20000
        self.amount_split = math.ceil(vocab_length/float(self.block_length))
        print('amout_split: ' + str(self.amount_split))
        self.block_path = block_path
        self.vocab_length = vocab_length
    
    #start training for first time
    def prepare(self,filename):
        self.f = h5py.File('S:\\{filename}.hdf5'.format(filename=filename), "w")#plus experiment name
        #initalize all the HDF files
        self.con_weights = self.f.create_dataset("context-weights", (self.vocab_length, self.vector_size))
        self.weights = self.f.create_dataset("weights",(self.vector_size,self.vocab_length))
        self.context_bias = self.f.create_dataset("context-bias", (self.vocab_length,1))
        self.bias = self.f.create_dataset("bias", (1,self.vocab_length))
        self.csv_writer = CSV_writer(filename+".csv")

        self.init_matrices()
    
    #return to training
    def resume(self,filename):
        self.f = h5py.File('S:\\{filename}.hdf5'.format(filename=filename), "r+")#plus experiment name
        #initalize all the HDF files
        self.con_weights = self.f.get("context-weights")
        self.weights = self.f.get("weights")
        self.context_bias = self.f.get("context-bias")
        self.bias = self.f.get("bias")
        self.csv_writer = CSV_writer(filename+".csv",appendmode=True)

    
    def init_matrices(self,chunk_size=10000):
        self.init_hdf_matrix(self.weights,-0.5,0.5,chunk_size)
        self.init_hdf_matrix(self.con_weights,-0.5,0.5,chunk_size)
        self.init_hdf_matrix(self.context_bias,-0.5,0.5,chunk_size)
        self.init_hdf_matrix(self.bias,-0.5,0.5,chunk_size)
    
    def init_hdf_matrix(self,hdf_data,min_value,max_value,block_length):
        if len(hdf_data) > len(hdf_data[0]):
            iterations = int(math.ceil(len(hdf_data) / float(block_length)))
            for i in range(iterations):
                current_size = min(block_length,len(hdf_data)-block_length*i)
                hdf_data[i*block_length:(i+1)*block_length , :] = np.random.rand(current_size,len(hdf_data[0]))/self.vector_size
        else:
            iterations = int(math.ceil(len(hdf_data[0]) / float(block_length)))
            for i in range(iterations):
                current_size = min(block_length,len(hdf_data[0])-block_length*i)
                hdf_data[:,i*block_length:(i+1)*block_length] = np.random.rand(len(hdf_data),current_size)/self.vector_size
            
    
    def load_blocks(self,zeile,spalte):
        # load the hdf coocurence block
        if(zeile >= spalte):
            template = "co_occurence_{i}_{j}.hdf5".format(i=zeile,j=spalte)
        else:
            template = "co_occurence_{i}_{j}.hdf5".format(i=spalte,j=zeile)
        
        file_path =  self.block_path + '\\' + template
        
        tmp_hf = h5py.File(file_path, "r")
        coocurrence = tmp_hf.get("co-ocurrence")[:]
        if (spalte > zeile):
            coocurrence = np.transpose(coocurrence)
        self.tf_co_occurences = tf.convert_to_tensor(coocurrence,dtype=tf.dtypes.float32)
        coocurrence = None
        tmp_hf.close()
        self.load_weights(zeile,spalte)
        

    def load_weights(self,zeile,spalte):
        self.tf_con_bias = tf.Variable(initial_value=self.context_bias[zeile*self.block_length:(zeile+1)*self.block_length,:],dtype=tf.dtypes.float32)
        self.tf_bias = tf.Variable(initial_value=self.bias[:,spalte*self.block_length:(spalte+1)*self.block_length],dtype=tf.dtypes.float32)
        self.tf_con_weights =  tf.Variable(initial_value=self.con_weights[zeile*self.block_length:(zeile+1)*self.block_length,:],dtype=tf.dtypes.float32)
        self.tf_weights = tf.Variable(initial_value=self.weights[:,spalte*self.block_length:(spalte+1)*self.block_length],dtype=tf.dtypes.float32)
    
    def save_blocks(self,zeile,spalte):
        self.context_bias[zeile*self.block_length:(zeile+1)*self.block_length,:] = self.tf_con_bias.numpy()
        self.bias[0,spalte*self.block_length:(spalte+1)*self.block_length] = self.tf_bias.numpy()
        self.con_weights[zeile*self.block_length:(zeile+1)*self.block_length,:] = self.tf_con_weights.numpy()
        self.weights[:,spalte*self.block_length:(spalte+1)*self.block_length] = self.tf_weights.numpy()
    
    def _close_files(self):
        self.f.close()
        self.csv_writer.close()
        
    #@tf.function
    def loss(self,zeile,spalte):
        ones_symetrical = tf.ones((self.block_length,self.block_length), dtype=tf.dtypes.float32, name=None)
    
        #;ust the words context
        if(zeile == self.amount_split - 1):
            difference = self.block_length - self.tf_con_bias.shape[0]
            add2_context_bias   = tf.zeros((difference,1),dtype=tf.dtypes.float32)
            add2_context_weights = tf.zeros((difference,self.vector_size),dtype=tf.dtypes.float32)
            
            context_weights       = tf.concat([self.tf_con_weights,add2_context_weights],axis = 0)
            con_bias_mat   = tf.concat([self.tf_con_bias,add2_context_bias],axis = 0) * ones_symetrical
        else:
            con_bias_mat   = self.tf_con_bias * ones_symetrical
            context_weights       = self.tf_con_weights
        
        co_occurences = self.tf_co_occurences
        #;ust the words without context
        if(spalte == self.amount_split - 1):
            difference = self.block_length - self.tf_bias.shape[1]
            add2_bias = tf.zeros((1,difference),dtype=tf.dtypes.float32)
            add2_weights = tf.zeros((self.vector_size,difference),dtype=tf.dtypes.float32)
            
            weights = tf.concat([self.tf_weights,add2_weights],axis = 1)
            bias_mat = tf.concat([self.tf_bias,add2_bias],axis=1) * ones_symetrical
        else:
            weights     = self.tf_weights
            bias_mat = self.tf_bias * ones_symetrical
          
                                                          
        bias_terms = bias_mat + con_bias_mat
        weight_matrix = tf.matmul(context_weights,weights)
        log_X = tf.math.log(co_occurences + self.epsilon)
        inner_sum = bias_terms + weight_matrix - log_X
        squared_sum = tf.math.square(inner_sum)
        weighted_sum = self.cut_function2(co_occurences) * squared_sum
        reduced = tf.math.reduce_sum(weighted_sum)
        return reduced
    
    alpha = tf.constant(0.75,dtype=tf.dtypes.float32)
    XMAX = tf.constant(100.0,dtype=tf.dtypes.float32)
    
    def cut_function2(self,value):
        clipped = tf.clip_by_value(value, clip_value_min = 0.0, clip_value_max=100.0)
        return tf.pow(clipped / self.XMAX, self.alpha)
    
    def load_optimizer(self,epoch,zeile,spalte,optimizer_factory):
        #load optimizer & blocks
        if(epoch == 0):
            optimizer = optimizer_factory.create()
        else:
            name = 'S://optimizer{z}-{s}'.format(z = zeile,s = spalte)
            with open(name, "rb") as file:
                optimizer = cloudpickle.load(file)
            optimizer.learning_rate.assign(lrOnPlato.get_lr(epoch))
        return optimizer
        
        
        
    def train_splitted(self,epochs,keepWeights=False):
        #These can stay in memory
        
        self.epsilon = tf.constant(1e-8) * tf.ones((self.block_length,self.block_length),dtype=tf.dtypes.float32)
        
        
        
        for epoch in range(0,epochs):
            cur_loss = float(0.0)
            for zeile in range(0,self.amount_split):#self.amount_split
                for spalte in range(0,self.amount_split):#self.amount_split !!!!!!!!!!!!!!!!!!!?!?!!?!!?!?!!?
                    if spalte > zeile:
                        continue
                    
                    optimizer = tf.keras.optimizers.Adam(0.005)#first hundret with 0.005 second hundret with 0.0025
                    #train one side
                    self.load_blocks(zeile,spalte)
                    print(zeile,spalte)
                    
                    #train code
                    with tf.GradientTape() as tape:
                        tmp_loss = self.loss(zeile,spalte)
                    grads = tape.gradient(tmp_loss, [self.tf_con_bias,self.tf_bias,self.tf_con_weights,self.tf_weights])
                    optimizer.apply_gradients(zip(grads, [self.tf_con_bias,self.tf_bias,self.tf_con_weights,self.tf_weights]))
                    cur_loss += tmp_loss.numpy()
                    print(tmp_loss)
                    #print(optimizer.learning_rate)
                    self.save_blocks(zeile,spalte)
                
                    
                    #train the other side
                    
                    if spalte != zeile:
                        
                        optimizer = tf.keras.optimizers.Adam(0.005)
                        self.load_weights(spalte,zeile)
                        self.tf_co_occurences = tf.transpose(self.tf_co_occurences)
                    
                        #train code
                        with tf.GradientTape() as tape:
                            tmp_loss = self.loss(spalte,zeile)
                        grads = tape.gradient(tmp_loss, [self.tf_con_bias,self.tf_bias,self.tf_con_weights,self.tf_weights])
                        optimizer.apply_gradients(zip(grads, [self.tf_con_bias,self.tf_bias,self.tf_con_weights,self.tf_weights]))
                        cur_loss += tmp_loss.numpy()
                        print(tmp_loss)
                        #print(optimizer.learning_rate)
                        self.save_blocks(spalte,zeile)
                    
                        
                
            print('epoch'+str(epoch)+"loss:"+str(cur_loss))
            #lrOnPlato.notify_loss(cur_loss.numpy(),epoch)
            self.csv_writer.write('ADAM',0.0025,epoch+1,cur_loss)
        self._close_files()
        return None

    

In [4]:
vocab = Vocabulary()
vocab.load('..\\vocabs\\baseline')
size = vocab.get_size()

class AdamFactory:
    def __init__(self,lr = 0.001):
        self.lr = lr
    def create(self):
        return tf.keras.optimizers.Adam(self.lr)
    def optimiser_name(self):
        return "Adam"
    
    
#tf.keras.backend.clear_session()
#trainer = ModelTrainer(size,"S:\\base_coocurrence_hdf")
#trainer.prepare("baseline_postSanityCheck")
#trainer.train_splitted(10)


In [None]:
import time
startTime = time.time()
trainer2 = ModelTrainer(size,'S:\\base_coocurrence_hdf')
trainer2.resume("baseline_postSanityCheck")

trainer2.train_splitted(50)

executionTime = (time.time() - startTime)
print('Execution time in seconds: ' + str(executionTime))

amout_split: 8
0 0
tf.Tensor(22130786.0, shape=(), dtype=float32)
1 0
tf.Tensor(14736812.0, shape=(), dtype=float32)
tf.Tensor(16730940.0, shape=(), dtype=float32)
1 1
tf.Tensor(14668454.0, shape=(), dtype=float32)
2 0
tf.Tensor(13673220.0, shape=(), dtype=float32)
tf.Tensor(14330777.0, shape=(), dtype=float32)
2 1
tf.Tensor(13478148.0, shape=(), dtype=float32)
tf.Tensor(13043492.0, shape=(), dtype=float32)
2 2
tf.Tensor(11868340.0, shape=(), dtype=float32)
3 0
tf.Tensor(11971766.0, shape=(), dtype=float32)
tf.Tensor(12456132.0, shape=(), dtype=float32)
3 1
tf.Tensor(11667296.0, shape=(), dtype=float32)
tf.Tensor(11303857.0, shape=(), dtype=float32)
3 2
tf.Tensor(10188425.0, shape=(), dtype=float32)
tf.Tensor(10175738.0, shape=(), dtype=float32)
3 3
tf.Tensor(8635902.0, shape=(), dtype=float32)
4 0
tf.Tensor(10568904.0, shape=(), dtype=float32)
tf.Tensor(11006652.0, shape=(), dtype=float32)
4 1
tf.Tensor(10348026.0, shape=(), dtype=float32)
tf.Tensor(10108147.0, shape=(), dtype=float32

6 2
tf.Tensor(1399979.9, shape=(), dtype=float32)
tf.Tensor(1408049.5, shape=(), dtype=float32)
6 3
tf.Tensor(1123932.6, shape=(), dtype=float32)
tf.Tensor(1130859.5, shape=(), dtype=float32)
6 4
tf.Tensor(968886.44, shape=(), dtype=float32)
tf.Tensor(931112.8, shape=(), dtype=float32)
6 5
tf.Tensor(884373.1, shape=(), dtype=float32)
tf.Tensor(821753.0, shape=(), dtype=float32)
6 6
tf.Tensor(795405.6, shape=(), dtype=float32)
7 0
tf.Tensor(2611463.2, shape=(), dtype=float32)
tf.Tensor(2592409.0, shape=(), dtype=float32)
7 1
tf.Tensor(3323681.0, shape=(), dtype=float32)
tf.Tensor(3297043.2, shape=(), dtype=float32)
7 2
tf.Tensor(3848523.8, shape=(), dtype=float32)
tf.Tensor(3948382.2, shape=(), dtype=float32)
7 3
tf.Tensor(3830910.2, shape=(), dtype=float32)
tf.Tensor(3801581.2, shape=(), dtype=float32)
7 4
tf.Tensor(4028468.0, shape=(), dtype=float32)
tf.Tensor(4074501.5, shape=(), dtype=float32)
7 5
tf.Tensor(4121276.5, shape=(), dtype=float32)
tf.Tensor(4090774.8, shape=(), dtype=flo

4 1
tf.Tensor(447043.03, shape=(), dtype=float32)
tf.Tensor(406566.6, shape=(), dtype=float32)
4 2
tf.Tensor(354483.03, shape=(), dtype=float32)
tf.Tensor(357991.53, shape=(), dtype=float32)
4 3
tf.Tensor(339571.16, shape=(), dtype=float32)
tf.Tensor(319960.25, shape=(), dtype=float32)
4 4
tf.Tensor(292523.25, shape=(), dtype=float32)
5 0
tf.Tensor(636943.44, shape=(), dtype=float32)
tf.Tensor(648064.7, shape=(), dtype=float32)
5 1
tf.Tensor(390487.3, shape=(), dtype=float32)
tf.Tensor(371931.53, shape=(), dtype=float32)
5 2
tf.Tensor(394574.38, shape=(), dtype=float32)
tf.Tensor(387737.6, shape=(), dtype=float32)
5 3
tf.Tensor(301518.97, shape=(), dtype=float32)
tf.Tensor(293314.44, shape=(), dtype=float32)
5 4
tf.Tensor(306185.97, shape=(), dtype=float32)
tf.Tensor(320801.84, shape=(), dtype=float32)
5 5
tf.Tensor(240924.28, shape=(), dtype=float32)
6 0
tf.Tensor(578323.1, shape=(), dtype=float32)
tf.Tensor(544750.2, shape=(), dtype=float32)
6 1
tf.Tensor(382806.5, shape=(), dtype=fl

tf.Tensor(3664787.8, shape=(), dtype=float32)
7 6
tf.Tensor(3427739.0, shape=(), dtype=float32)
tf.Tensor(3508091.0, shape=(), dtype=float32)
7 7
tf.Tensor(6937705.0, shape=(), dtype=float32)
epoch7loss:79578305.515625
0 0
tf.Tensor(10768317.0, shape=(), dtype=float32)
1 0
tf.Tensor(1971455.8, shape=(), dtype=float32)
tf.Tensor(1977139.2, shape=(), dtype=float32)
1 1
tf.Tensor(496472.9, shape=(), dtype=float32)
2 0
tf.Tensor(969444.44, shape=(), dtype=float32)
tf.Tensor(936249.5, shape=(), dtype=float32)
2 1
tf.Tensor(317888.0, shape=(), dtype=float32)
tf.Tensor(282896.5, shape=(), dtype=float32)
2 2
tf.Tensor(250025.28, shape=(), dtype=float32)
3 0
tf.Tensor(688085.3, shape=(), dtype=float32)
tf.Tensor(650670.1, shape=(), dtype=float32)
3 1
tf.Tensor(316263.28, shape=(), dtype=float32)
tf.Tensor(251655.75, shape=(), dtype=float32)
3 2
tf.Tensor(206233.44, shape=(), dtype=float32)
tf.Tensor(224608.17, shape=(), dtype=float32)
3 3
tf.Tensor(257721.78, shape=(), dtype=float32)
4 0
tf.Ten

tf.Tensor(271052.0, shape=(), dtype=float32)
6 1
tf.Tensor(218313.08, shape=(), dtype=float32)
tf.Tensor(100600.82, shape=(), dtype=float32)
6 2
tf.Tensor(137667.97, shape=(), dtype=float32)
tf.Tensor(109724.125, shape=(), dtype=float32)
6 3
tf.Tensor(175974.16, shape=(), dtype=float32)
tf.Tensor(107962.77, shape=(), dtype=float32)
6 4
tf.Tensor(118834.875, shape=(), dtype=float32)
tf.Tensor(103710.125, shape=(), dtype=float32)
6 5
tf.Tensor(168537.52, shape=(), dtype=float32)
tf.Tensor(113643.14, shape=(), dtype=float32)
6 6
tf.Tensor(107329.24, shape=(), dtype=float32)
7 0
tf.Tensor(1642574.2, shape=(), dtype=float32)
tf.Tensor(1517688.2, shape=(), dtype=float32)
7 1
tf.Tensor(2378772.5, shape=(), dtype=float32)
tf.Tensor(2376631.5, shape=(), dtype=float32)
7 2
tf.Tensor(2980574.8, shape=(), dtype=float32)
tf.Tensor(3080188.2, shape=(), dtype=float32)
7 3
tf.Tensor(3142285.2, shape=(), dtype=float32)
tf.Tensor(2822563.5, shape=(), dtype=float32)
7 4
tf.Tensor(3081190.8, shape=(), dty

tf.Tensor(146297.6, shape=(), dtype=float32)
3 3
tf.Tensor(117367.36, shape=(), dtype=float32)
4 0
tf.Tensor(423057.34, shape=(), dtype=float32)
tf.Tensor(366848.03, shape=(), dtype=float32)
4 1
tf.Tensor(225123.86, shape=(), dtype=float32)
tf.Tensor(105065.44, shape=(), dtype=float32)
4 2
tf.Tensor(115488.94, shape=(), dtype=float32)
tf.Tensor(80166.6, shape=(), dtype=float32)
4 3
tf.Tensor(159657.73, shape=(), dtype=float32)
tf.Tensor(104587.53, shape=(), dtype=float32)
4 4
tf.Tensor(74538.89, shape=(), dtype=float32)
5 0
tf.Tensor(402987.53, shape=(), dtype=float32)
tf.Tensor(308884.2, shape=(), dtype=float32)
5 1
tf.Tensor(178086.83, shape=(), dtype=float32)
tf.Tensor(94298.805, shape=(), dtype=float32)
5 2
tf.Tensor(116067.69, shape=(), dtype=float32)
tf.Tensor(89639.37, shape=(), dtype=float32)
5 3
tf.Tensor(170161.16, shape=(), dtype=float32)
tf.Tensor(71431.086, shape=(), dtype=float32)
5 4
tf.Tensor(95501.625, shape=(), dtype=float32)
tf.Tensor(90166.1, shape=(), dtype=float32

tf.Tensor(2579707.0, shape=(), dtype=float32)
7 4
tf.Tensor(2823449.2, shape=(), dtype=float32)
tf.Tensor(2980196.2, shape=(), dtype=float32)
7 5
tf.Tensor(3179787.0, shape=(), dtype=float32)
tf.Tensor(3124791.0, shape=(), dtype=float32)
7 6
tf.Tensor(2873286.5, shape=(), dtype=float32)
tf.Tensor(3117997.2, shape=(), dtype=float32)
7 7
tf.Tensor(6470084.0, shape=(), dtype=float32)
epoch15loss:64084487.2734375
0 0
tf.Tensor(8567409.0, shape=(), dtype=float32)
1 0
tf.Tensor(1637571.4, shape=(), dtype=float32)
tf.Tensor(1593278.6, shape=(), dtype=float32)
1 1
tf.Tensor(335438.25, shape=(), dtype=float32)
2 0
tf.Tensor(873289.2, shape=(), dtype=float32)
tf.Tensor(769745.25, shape=(), dtype=float32)
2 1
tf.Tensor(201078.03, shape=(), dtype=float32)
tf.Tensor(161108.14, shape=(), dtype=float32)
2 2
tf.Tensor(129833.836, shape=(), dtype=float32)
3 0
tf.Tensor(549424.7, shape=(), dtype=float32)
tf.Tensor(489939.2, shape=(), dtype=float32)
3 1
tf.Tensor(213163.12, shape=(), dtype=float32)
tf.Te

tf.Tensor(83824.24, shape=(), dtype=float32)
5 5
tf.Tensor(83263.39, shape=(), dtype=float32)
6 0
tf.Tensor(326436.44, shape=(), dtype=float32)
tf.Tensor(215237.0, shape=(), dtype=float32)
6 1
tf.Tensor(130645.766, shape=(), dtype=float32)
tf.Tensor(50664.3, shape=(), dtype=float32)
6 2
tf.Tensor(90238.04, shape=(), dtype=float32)
tf.Tensor(93182.5, shape=(), dtype=float32)
6 3
tf.Tensor(95783.164, shape=(), dtype=float32)
tf.Tensor(46033.203, shape=(), dtype=float32)
6 4
tf.Tensor(71834.05, shape=(), dtype=float32)
tf.Tensor(55124.766, shape=(), dtype=float32)
6 5
tf.Tensor(101143.664, shape=(), dtype=float32)
tf.Tensor(48471.543, shape=(), dtype=float32)
6 6
tf.Tensor(65262.9, shape=(), dtype=float32)
7 0
tf.Tensor(1340813.9, shape=(), dtype=float32)
tf.Tensor(1247010.6, shape=(), dtype=float32)
7 1
tf.Tensor(2008582.5, shape=(), dtype=float32)
tf.Tensor(1981460.5, shape=(), dtype=float32)
7 2
tf.Tensor(2617267.0, shape=(), dtype=float32)
tf.Tensor(2668254.2, shape=(), dtype=float32)

tf.Tensor(129617.375, shape=(), dtype=float32)
tf.Tensor(113421.18, shape=(), dtype=float32)
3 2
tf.Tensor(82726.6, shape=(), dtype=float32)
tf.Tensor(100100.39, shape=(), dtype=float32)
3 3
tf.Tensor(90056.92, shape=(), dtype=float32)
4 0
tf.Tensor(366781.12, shape=(), dtype=float32)
tf.Tensor(311493.03, shape=(), dtype=float32)
4 1
tf.Tensor(174800.97, shape=(), dtype=float32)
tf.Tensor(97041.78, shape=(), dtype=float32)
4 2
tf.Tensor(63789.74, shape=(), dtype=float32)
tf.Tensor(59838.457, shape=(), dtype=float32)
4 3
tf.Tensor(100050.44, shape=(), dtype=float32)
tf.Tensor(58239.383, shape=(), dtype=float32)
4 4
tf.Tensor(61064.84, shape=(), dtype=float32)
5 0
tf.Tensor(350778.8, shape=(), dtype=float32)
tf.Tensor(273340.22, shape=(), dtype=float32)
5 1
tf.Tensor(107923.88, shape=(), dtype=float32)
tf.Tensor(84765.92, shape=(), dtype=float32)
5 2
tf.Tensor(91340.15, shape=(), dtype=float32)
tf.Tensor(78203.91, shape=(), dtype=float32)
5 3
tf.Tensor(76641.76, shape=(), dtype=float32)
