In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
from tensorflow.keras.layers import Input, Dense, Concatenate, Dropout
from tensorflow.keras.regularizers import l2

tf.config.threading.set_inter_op_parallelism_threads(12)

In [2]:
@tf.keras.utils.register_keras_serializable()
def avg_recall(actual, pred):
    pred=tf.round(pred)
    class_recalls = []
    for i in range(actual.shape[1]):
        true_positives = tf.reduce_sum(tf.cast(tf.math.logical_and(tf.equal(actual[:, i], 1), tf.equal(pred[:, i], 1)), tf.float32))
        actual_positives = tf.reduce_sum(tf.cast(tf.equal(actual[:, i], 1), tf.float32))
        class_recalls.append(true_positives / (actual_positives + tf.keras.backend.epsilon()) )
    return tf.reduce_mean(class_recalls)

@tf.keras.utils.register_keras_serializable()
def avg_precision(actual, pred):
    pred=tf.round(pred)
    class_precisions = []
    for i in range(actual.shape[1]):
        tp = tf.reduce_sum(tf.cast(tf.math.logical_and(tf.equal(actual[:, i], 1), tf.equal(pred[:, i], 1)), tf.float32))
        positives=tf.reduce_sum(tf.cast(tf.equal(pred[:, i], 1), tf.float32))
        class_precisions.append(tp/(positives + tf.keras.backend.epsilon()))
    return tf.reduce_mean(class_precisions)

In [3]:
pretrained_model = tf.keras.models.load_model("v3.keras")
pretrained_model.summary()

In [5]:
#v3 - regularized

input_3 = Input(shape=(5,), name='law_area')
input_1 = Input(shape=(384,), name='facts')
input_2 = Input(shape=(384,), name='considerations')

b1 = Dense(500, activation='relu', name='facts_dense1', kernel_regularizer=l2(0.01))(input_1)
b1 = Dropout(0.3, name='facts_dropout1')(b1)
b1 = Dense(250, activation='relu', name='facts_dense2', kernel_regularizer=l2(0.01))(b1)

b2 = Dense(500, activation='relu', name='considerations_dense1', kernel_regularizer=l2(0.01))(input_2)
b2 = Dropout(0.3, name='considerations_dropout1')(b2)
b2 = Dense(250, activation='relu', name='considerations_dense2', kernel_regularizer=l2(0.01))(b2)

merged = Concatenate()([input_3, b1, b2])

output = Dense(250, activation='relu', name='concatenate_dense1', kernel_regularizer=l2(0.01))(merged)
output = Dropout(0.3, name='concatenate_dropout1')(output)
output = Dense(244, activation='sigmoid', name='output_layer', kernel_regularizer=l2(0.01))(output)

model = tf.keras.Model(inputs=[input_3,input_1,input_2], outputs=output)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[avg_recall, avg_precision])

model.summary()

In [5]:
model.layers

[<InputLayer name=facts, built=True>,
 <InputLayer name=considerations, built=True>,
 <Dense name=facts_dense1, built=True>,
 <Dense name=considerations_dense1, built=True>,
 <InputLayer name=law_area, built=True>,
 <Dense name=facts_dense2, built=True>,
 <Dense name=considerations_dense2, built=True>,
 <Concatenate name=concatenate, built=True>,
 <Dense name=concatenate_dense1, built=True>,
 <Dense name=output_layer, built=True>]

In [6]:
pretrained_model.layers

[<InputLayer name=input_layer_7, built=True>,
 <InputLayer name=input_layer_8, built=True>,
 <Dense name=dense_15, built=True>,
 <Dense name=dense_17, built=True>,
 <InputLayer name=input_layer_6, built=True>,
 <Dense name=dense_16, built=True>,
 <Dense name=dense_18, built=True>,
 <Concatenate name=concatenate_2, built=True>,
 <Dense name=dense_19, built=True>,
 <Dense name=dense_20, built=True>]

In [8]:
model.layers[2].set_weights(pretrained_model.layers[2].get_weights())
model.layers[3].set_weights(pretrained_model.layers[3].get_weights())
model.layers[7].set_weights(pretrained_model.layers[5].get_weights())
model.layers[8].set_weights(pretrained_model.layers[6].get_weights())
model.layers[10].set_weights(pretrained_model.layers[8].get_weights())
model.layers[12].set_weights(pretrained_model.layers[9].get_weights())


In [7]:
model.layers[2].set_weights(pretrained_model.layers[2].get_weights())
model.layers[3].set_weights(pretrained_model.layers[3].get_weights())
model.layers[5].set_weights(pretrained_model.layers[5].get_weights())
model.layers[6].set_weights(pretrained_model.layers[6].get_weights())
model.layers[8].set_weights(pretrained_model.layers[8].get_weights())
model.layers[9].set_weights(pretrained_model.layers[9].get_weights())

In [9]:
df_train=pd.read_csv("sep_selected_laws_final_combined.csv").sample(frac=1) 
df_val=pd.read_csv("sep_selected_laws_final_val.csv")

#language_train=df_train.values[:,:3]
law_area_train=df_train.values[:,3:8]
facts_train=df_train.values[:,8:392]
considerations_train=df_train.values[:,392:776]
laws_train=df_train.values[:,776:]


#language_val=df_val.values[:,:3]
law_area_val=df_val.values[:,3:8]
facts_val=df_val.values[:,8:392]
considerations_val=df_val.values[:,392:776]
laws_val=df_val.values[:,776:]

In [13]:
ep=500
bs=1024

h = model.fit(
    [law_area_train, facts_train, considerations_train],
    laws_train,
    epochs=ep,
    batch_size=bs,
    validation_data=([law_area_val, facts_val, considerations_val], laws_val)
)



Epoch 1/500
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 106ms/step - avg_precision: 0.6660 - avg_recall: 0.6731 - loss: 0.0476 - val_avg_precision: 0.2215 - val_avg_recall: 0.2105 - val_loss: 0.0962
Epoch 2/500
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 35ms/step - avg_precision: 0.6508 - avg_recall: 0.6187 - loss: 0.0257 - val_avg_precision: 0.2290 - val_avg_recall: 0.2033 - val_loss: 0.0737
Epoch 3/500
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 35ms/step - avg_precision: 0.6552 - avg_recall: 0.5945 - loss: 0.0196 - val_avg_precision: 0.2288 - val_avg_recall: 0.2060 - val_loss: 0.0652
Epoch 4/500
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 35ms/step - avg_precision: 0.6749 - avg_recall: 0.6065 - loss: 0.0169 - val_avg_precision: 0.2364 - val_avg_recall: 0.2043 - val_loss: 0.0619
Epoch 5/500
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 35ms/step - avg_precision: 0.6894 - avg_

KeyboardInterrupt: 

In [10]:
def calculate_recprec(actual, prediction):
    recall=[]
    precision=[]
    total_laws=actual[0].shape[0]
    for law in range(total_laws):
        recaller=tf.keras.metrics.Recall()
        recaller.update_state(actual[:,law].tolist(),prediction[:,law].tolist())
        recall.append(float(recaller.result().numpy()))
        
        precisioner=tf.keras.metrics.Precision()
        precisioner.update_state(actual[:,law].tolist(),prediction[:,law].tolist())
        precision.append(float(precisioner.result().numpy()))
    
    '''
    print("Individual recall:")
    print(recall)
    print("Individual precision:")
    print(precision)
    '''
    
    print("Average recall:", sum(recall)/total_laws)
    print("Average precision:", sum(precision)/total_laws)

In [11]:
def give_results(model):
    train_prediction=(model.predict([law_area_train, facts_train, considerations_train],batch_size=1024) > 0.5).astype(int)
    val_prediction=(model.predict([law_area_val, facts_val, considerations_val],batch_size=1024) > 0.5).astype(int)

    print("Training set:")
    calculate_recprec(laws_train, train_prediction)
    print("Validdation set:")
    calculate_recprec(laws_val, val_prediction)
    print("Epochs:",500)
    print("Batch size:",bs)

In [14]:
give_results(model)
print("--------------")
give_results(pretrained_model)

[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
Training set:
Average recall: 0.998098035572005
Average precision: 0.9997083332206382
Validdation set:
Average recall: 0.2786484530012383
Average precision: 0.33644203455416394
Epochs: 500
Batch size: 1024
--------------
[1m103/103[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
Training set:
Average recall: 0.8320960539286254
Average precision: 0.8949690685897577
Validdation set:
Average recall: 0.23318577614292257
Average precision: 0.3090214709537562
Epochs: 500
Batch size: 1024


In [15]:
model.save("v3_unregularized_newdata_overfit")

