In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
from tensorflow.keras.layers import Input, Dense, Concatenate
tf.config.threading.set_inter_op_parallelism_threads(12)

In [2]:
@tf.keras.utils.register_keras_serializable()
def avg_recall(actual, pred):
    pred=tf.round(pred)
    class_recalls = []
    for i in range(actual.shape[1]):
        true_positives = tf.reduce_sum(tf.cast(tf.math.logical_and(tf.equal(actual[:, i], 1), tf.equal(pred[:, i], 1)), tf.float32))
        actual_positives = tf.reduce_sum(tf.cast(tf.equal(actual[:, i], 1), tf.float32))
        class_recalls.append(true_positives / (actual_positives + tf.keras.backend.epsilon()) )
    return tf.reduce_mean(class_recalls)

@tf.keras.utils.register_keras_serializable()
def avg_precision(actual, pred):
    pred=tf.round(pred)
    class_precisions = []
    for i in range(actual.shape[1]):
        tp = tf.reduce_sum(tf.cast(tf.math.logical_and(tf.equal(actual[:, i], 1), tf.equal(pred[:, i], 1)), tf.float32))
        positives=tf.reduce_sum(tf.cast(tf.equal(pred[:, i], 1), tf.float32))
        class_precisions.append(tp/(positives + tf.keras.backend.epsilon()))
    return tf.reduce_mean(class_precisions)

In [6]:
ep=500
bs=1024

input_3 = Input(shape=(5,), name='law_area')
input_1 = Input(shape=(384,), name='facts')
input_2 = Input(shape=(384,), name='considerations')

b1 = Dense(500, activation='relu', name='facts_dense1')(input_1)
b1 = Dense(250, activation='relu', name='facts_dense2')(b1)

b2 = Dense(500, activation='relu', name='considerations_dense1')(input_2)
b2 = Dense(250, activation='relu', name='considerations_dense2')(b2)

merged = Concatenate()([input_3, b1, b2])

output = Dense(250, activation='relu', name='concatenate_dense1')(merged)
output = Dense(244, activation='sigmoid', name='output_layer')(output)

model = tf.keras.Model(inputs=[input_3,input_1,input_2], outputs=output)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[avg_recall, avg_precision])

model.summary()

In [4]:
df_train=pd.read_csv("sep_selected_laws_final.csv")
df_val=pd.read_csv("sep_selected_laws_final_val.csv")

#language_train=df_train.values[:,:3]
facts_train=df_train.values[:,3:8]
considerations_train=df_train.values[:,8:392]
law_area_train=df_train.values[:,392:776]
laws_train=df_train.values[:,776:]


#language_val=df_val.values[:,:3]
facts_val=df_val.values[:,3:8]
considerations_val=df_val.values[:,8:392]
law_area_val=df_val.values[:,392:776]
laws_val=df_val.values[:,776:]


import ast
#import multiprocessing

def worker(cokumn):
    return np.array(cokumn.map(ast.literal_eval).values.tolist())

if __name__ == "__main__":
    data = [df_train['onehot_language'], df_train['onehot_law_area'], df_train['encoded_facts'], df_train['encoded_considerations'], df_train['onehot_laws']]
    
    with multiprocessing.Pool() as pool:
        results = pool.map(worker, data)
    
    train_lanugage, train_law_area, train_facts, train_considerations, train_laws= results


    data = [df_val['onehot_language'], df_val['onehot_law_area'], df_val['encoded_facts'], df_val['encoded_considerations'], df_val['onehot_laws']]
    
    with multiprocessing.Pool() as pool:
        results = pool.map(worker, data)
    
    lanugage_val, law_area_val, facts_val, considerations_val, laws_val= results


In [None]:
h = model.fit(
    [facts_train, considerations_train,law_area_train],
    laws_train,
    epochs=ep,
    batch_size=bs,
    validation_data=([facts_val, considerations_val, law_area_val], laws_val)
)