In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install tensorflow_text
import tensorflow_text as text
import numpy as np
import pandas as pd
import keras
import tensorflow as tf
from keras.layers import *
import tensorflow_hub as hub
import matplotlib.pyplot as plt
import matplotlib
from keras.models import Model
from tqdm import tqdm, trange
import time
import os
import torch
from sklearn.metrics import accuracy_score, f1_score, classification_report

In [None]:
preprocessor = hub.load("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
text_inputs = [tf.keras.layers.Input(shape=(), dtype=tf.string)]
tokenize = hub.KerasLayer(preprocessor.tokenize)
tokenized_inputs = [tokenize(segment) for segment in text_inputs]
seq_length = 512
bert_pack_inputs = hub.KerasLayer(preprocessor.bert_pack_inputs, arguments=dict(seq_length=seq_length))
encoder_inputs = bert_pack_inputs(tokenized_inputs)
encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-24_H-1024_A-16/4")
outputs = encoder(encoder_inputs)
pooled_output = outputs["sequence_output"]
embedding_model = tf.keras.Model(text_inputs, pooled_output)

In [None]:
def label_vid(row):
    if "Medical Non-instructional" in row:
        return 0
    elif "Medical Instructional" in row:
        return 1
    elif "Non-medical" in row:
        return 2
    else:
        return -1

def import_datasets(vit_datatype = True):
    
    datasets = {}
    torch_features = {}
    # Import JSON files first
    json_filenames = [pos_json for pos_json in os.listdir('/content/drive/MyDrive/MedVidCL/data/text') if pos_json.endswith('.json')]
    for json_filename in json_filenames:
        datasets[json_filename] = pd.read_json('/content/drive/MyDrive/MedVidCL/data/text' + '/' + json_filename)
        # Rename columns
        datasets[json_filename] = datasets[json_filename].rename(columns = {'video_sub_title':'text', 'video_title':'title', 'label':'labels', 'video_id':'YouTube_ID'})
        # Change labels of new datasets to match the old one
        datasets[json_filename]['labels'] = datasets[json_filename]['labels'].apply(label_vid)
        # Add feature numpy array
        if vit_datatype:
            datasets[json_filename]['features'] = datasets[json_filename]['YouTube_ID'].apply(lambda x: np.load('/content/drive/MyDrive/MedVidCL/features/ViT/' + json_filename[:-5] + '/' + x + '.npy'))
        else:
            datasets[json_filename]['features'] = datasets[json_filename]['YouTube_ID'].apply(lambda x: np.load('/content/drive/MyDrive/MedVidCL/features/I3D/' + json_filename[:-5] + '/' + x + '.npy'))
        # Convert all numpy arrays to float32
        datasets[json_filename]['features'] = datasets[json_filename]['features'].apply(lambda x: x.astype('float32'))
        # Convert all numpy arrays to Pytorch tensors
        datasets[json_filename]['torch_features'] = datasets[json_filename]['features'].apply(lambda x: torch.Tensor(x))
        # Change each feature column to a list
        #torch_features[json_filename] = torch.nn.utils.rnn.pad_sequence(datasets[json_filename]['torch_features'].to_list(), batch_first=True, padding_value=0)
    
    return datasets

In [None]:
datasets=import_datasets(False)

In [None]:
train=datasets['train.json'].append(datasets['test.json'])
test=datasets['val.json']

In [None]:
train['Medical_NonMedical']=train['labels'] ### 0mednoninstr 1medinstr 2nonmed
train['Medical_NonMedical'].replace({0:1}, inplace=True)
train['Medical_NonMedical'].replace({2:0}, inplace=True)### 0 for Non medical 1 for Medical

In [None]:
train_Inst_NonInst = train[train.Medical_NonMedical != 0]

In [None]:
### for medical non medical
x = train['text'].to_numpy()
x = np.asarray(x).astype(str)

y = pd.get_dummies(train['Medical_NonMedical']).values
y = np.asarray(y).astype('float32')

text_input = Input(shape=(),dtype=tf.string,name='Text')
encode_output = embedding_model(text_input)

### bilstm
bilstm = Bidirectional(CuDNNLSTM(units=128,return_sequences=False))(encode_output)

###Output
output = Dense(2,activation='softmax')(bilstm)

first_model = Model(inputs=text_input,outputs=output)

first_model.compile(loss='categorical_crossentropy',
                    optimizer='adam',
                    metrics=['accuracy'])

first_model.summary()

In [None]:
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='accuracy', 
                                               mode='auto', 
                                               patience=3, 
                                               verbose=1)

hist = first_model.fit(x, y, 
                       epochs=50, 
                       batch_size=8, 
                       verbose=1,
                       callbacks=[early_stopping])

In [None]:
### vid_features extract
train_vidf=np.zeros((len(train_Inst_NonInst),2,1024))
tmp=train_Inst_NonInst['features'].to_numpy()
for i in range(len(train_Inst_NonInst)):
  train_vidf[i]=tmp[i]

test_vidf=np.zeros((len(test),2,1024))
tmp=test['features'].to_numpy()
for i in range(len(test)):
  test_vidf[i]=tmp[i]

In [None]:
y = pd.get_dummies(train_Inst_NonInst['labels']).values
y = np.asarray(y).astype('float32')

In [None]:
### for medical instructional and medical non instructional
x_text = train_Inst_NonInst['text'].to_numpy()
x_text = np.asarray(x_text).astype(str)

### Video features
vid_features_input = Input(shape=(2,1024),name='VidFeatures')

### bilstm
bilstm_vid = Bidirectional(CuDNNLSTM(units=128,return_sequences=False))(vid_features_input)

### Video features dense
vid_features_data = Dense(1024)(bilstm_vid)

### Text 
text_input = Input(shape=(),dtype=tf.string,name='Text')
encode_output = embedding_model(text_input)

### bilstm
bilstm = Bidirectional(CuDNNLSTM(units=128,return_sequences=False))(encode_output)

### Text dense
text_data = Dense(1024)(bilstm)

concat = concatenate([text_data, vid_features_data])
output = Dense(2, activation='softmax')(concat)

second_model = Model(inputs=[text_input,vid_features_input],outputs=output)

second_model.compile(loss='categorical_crossentropy',
                    optimizer='adam',
                    metrics=['accuracy'])

second_model.summary()

In [None]:
early_stopping = keras.callbacks.EarlyStopping(monitor='accuracy', 
                                               mode='auto', 
                                               patience=3, 
                                               verbose=1)

hist = second_model.fit([x_text, train_vidf], y, 
                       epochs=1, 
                       batch_size=8, 
                       verbose=1,
                       steps_per_epoch=2,
                       callbacks=[early_stopping])

In [None]:
first_pred=first_model.predict(test['text'])
second_pred=second_model.predict([test['text'],test_vidf])

In [None]:
y_pred=[]
for i in range(len(test)):
    tmp=first_pred[i]
    tmp=np.argmax(tmp)
    if tmp == 0:
        y_pred.append(2)
    
    else:
        tmp=second_pred[i]
        tmp=np.argmax(tmp)

        if tmp == 0:
            y_pred.append(0)
        else:
            y_pred.append(1)


In [None]:
y_real=test['labels'].to_list()
print(classification_report(y_true=y_real,
                            y_pred=y_pred,
                            labels=list(set(test['labels'])),
                            digits=4))