In [1]:
import pickle
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn import metrics

## Importing Tokenizer for the selected Model

In [2]:
# loading
with open('./data/tokenizer_lstm_gru.pickle', 'rb') as handle:
     tokenizer = pickle.load(handle)

## Creating custom scikit-learn estimator for Tokenize and padding the text

In [3]:
from sklearn.base import BaseEstimator, TransformerMixin

def pad_text(texts, tokenizer,MAX_SEQUENCE_LENGTH):
    return pad_sequences(tokenizer.texts_to_sequences(texts), maxlen=MAX_SEQUENCE_LENGTH)


class TokenizingAndPaddingText(BaseEstimator, TransformerMixin):
        def __init__(self, tokenizer): # no *args or **kargs
                self.tokenizer = tokenizer  
                
        def fit(self, X, y=None):
             return self  # nothing else to do
        
        def transform(self, X):
            MAX_SEQUENCE_LENGTH = 250
            return pad_text(X, self.tokenizer, MAX_SEQUENCE_LENGTH)

## Loading the Model along with trained weights

In [4]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, LSTM, GRU,Dropout, Activation, Input,\
 Embedding,Bidirectional,GlobalMaxPool1D,concatenate,Reshape,GlobalAveragePooling1D

In [5]:

#https://github.com/riteshranjan110/Jigsaw-Unintended-Bias-Toxic-Comment-Classification/blob/master/Jigsaw4.ipynb

def LSTM_GRU_model(MAX_SEQUENCE_LENGTH=250,EMBEDDINGS_DIMENSION = 300):
    #model_conv = Sequential()
    sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32',name='inputs')
    embedding_layer = Embedding(len(tokenizer.word_index) + 1,
                                        EMBEDDINGS_DIMENSION,
                                        #weights=[glove_matrix],  
                                        input_length=MAX_SEQUENCE_LENGTH,
                                        trainable=False)
    
    
    
    layer1 = embedding_layer(sequence_input)
    
    layer1 = Dropout(0.2)(layer1)
    layer1 = Bidirectional(LSTM(120, return_sequences=True))(layer1)
    layer1, fowrward_h_state, backward_h_state = Bidirectional(GRU(60, return_sequences= True, return_state=True))(layer1)
    
    
    
    h_state = concatenate([fowrward_h_state, backward_h_state])
    h_state = Reshape((-1,120))(h_state )

    h_avg = GlobalAveragePooling1D()(layer1)
    h_max = GlobalMaxPool1D()(layer1)

    h_avg = Reshape((-1,120))(h_avg)
    h_max = Reshape((-1,120))(h_max)
    
    
    
    x = concatenate([h_state, h_avg, h_max])
    x = Dense(20, activation='relu')(x)
    x = Dropout(0.1)(x)
    #https://stackoverflow.com/questions/56918388/error-valueerror-the-last-dimension-of-the-inputs-to-dense-should-be-defined
    x = GlobalMaxPool1D()(x)
    
    
    output = Dense(2, activation='sigmoid')(x)
    model = Model(inputs=sequence_input, outputs = output)
    model.compile(loss='categorical_crossentropy',optimizer=Adam(),metrics=['accuracy'])
    
    

    return model

In [6]:
WEIGHTS_PATH = "./data/weights_lstm_gru.best.hdf5"
LSTM_GRU_model_1 = LSTM_GRU_model()
LSTM_GRU_model_1.load_weights(WEIGHTS_PATH)     

## Creating sklearn pipeline with 2 layers:

In [7]:
from sklearn import pipeline
# Keras Model to sklearn pipeline:https://gist.github.com/MaxHalford/9bfaa8daf8b4bc17a7fb7ba58c880675
pipe_1 = pipeline.Pipeline([
    ('Tokenize_Padding', TokenizingAndPaddingText(tokenizer)),
    ('LSTM_GRU_Model',LSTM_GRU_model_1)
])

# 1. Function 1

In [8]:
def Function_1(text_list):
    return list(pipe_1.predict(text_list)[:,1])

## Result of Function 1:

In [9]:
import pandas as pd
test_embd = pd.read_csv('./data/test.csv', sep=',', quotechar='"')
test_embd.head()

Unnamed: 0,id,comment_text
0,7097320,[ Integrity means that you pay your debts.]\n\...
1,7097321,This is malfeasance by the Administrator and t...
2,7097322,@Rmiller101 - Spoken like a true elitist. But ...
3,7097323,"Paul: Thank you for your kind words. I do, in..."
4,7097324,Sorry you missed high school. Eisenhower sent ...


In [10]:
function_1_result=pd.DataFrame({
    "comment_text":list(test_embd.comment_text[20:40].astype(str)),
     "model_prediction_score":Function_1(list(test_embd.comment_text[20:40].astype(str)))
     
})
function_1_result["model_predictions"] = np.where(function_1_result["model_prediction_score"] >= 0.5, True, False)
function_1_result.tail()

Unnamed: 0,comment_text,model_prediction_score,model_predictions
15,The profoundly stupid have spoken.,0.999986,True
16,The ignorance and bigotry comes from your post!,0.722706,True
17,Don’t get it do you. As the price of things go...,0.00041,False
18,I bet China would be happy to help Puerto Rico...,0.001043,False
19,“This was not an action that was taken lightly...,0.001054,False


# 2. FUNCTION 2

## EVALUATION METRIC

In [11]:
##https://www.kaggle.com/c/jigsaw-unintended-bias-in-toxicity-classification/overview/evaluation

SUBGROUP_AUC = 'subgroup_auc'
BPSN_AUC = 'bpsn_auc'  # stands for background positive, subgroup negative
BNSP_AUC = 'bnsp_auc'  # stands for background negative, subgroup positive

def compute_auc(y_true, y_pred):
    try:
        return metrics.roc_auc_score(y_true, y_pred)
    except ValueError:
        return np.nan

def compute_subgroup_auc(df, subgroup, label, model_name):
    subgroup_examples = df[df[subgroup]]
    return compute_auc(subgroup_examples[label], subgroup_examples[model_name])

def compute_bpsn_auc(df, subgroup, label, model_name):
    """Computes the AUC of the within-subgroup negative examples and the background positive examples."""
    subgroup_negative_examples = df[df[subgroup] & ~df[label]]
    non_subgroup_positive_examples = df[~df[subgroup] & df[label]]
    examples = subgroup_negative_examples.append(non_subgroup_positive_examples)
    return compute_auc(examples[label], examples[model_name])

def compute_bnsp_auc(df, subgroup, label, model_name):
    """Computes the AUC of the within-subgroup positive examples and the background negative examples."""
    subgroup_positive_examples = df[df[subgroup] & df[label]]
    non_subgroup_negative_examples = df[~df[subgroup] & ~df[label]]
    examples = subgroup_positive_examples.append(non_subgroup_negative_examples)
    return compute_auc(examples[label], examples[model_name])

def compute_bias_metrics_for_model(dataset,
                                   subgroups,
                                   model,
                                   label_col,
                                   include_asegs=False):
    """Computes per-subgroup metrics for all subgroups and one model."""
    records = []
    for subgroup in subgroups:
        record = {
            'subgroup': subgroup,
            'subgroup_size': len(dataset[dataset[subgroup]])
        }
        record[SUBGROUP_AUC] = compute_subgroup_auc(dataset, subgroup, label_col, model)
        record[BPSN_AUC] = compute_bpsn_auc(dataset, subgroup, label_col, model)
        record[BNSP_AUC] = compute_bnsp_auc(dataset, subgroup, label_col, model)
        records.append(record)
    return pd.DataFrame(records).sort_values('subgroup_auc', ascending=True)




def calculate_overall_auc(df, model_name):
    TOXICITY_COLUMN="target"
    true_labels = df[TOXICITY_COLUMN]
    predicted_labels = df[model_name]
    return metrics.roc_auc_score(true_labels, predicted_labels)

def power_mean(series, p):
    total = sum(np.power(series, p))
    return np.power(total / len(series), 1 / p)

def get_final_metric(bias_df, overall_auc, POWER=-5, OVERALL_MODEL_WEIGHT=0.25):
    bias_score = np.average([
        power_mean(bias_df[SUBGROUP_AUC], POWER),
        power_mean(bias_df[BPSN_AUC], POWER),
        power_mean(bias_df[BNSP_AUC], POWER)
    ])
    return (OVERALL_MODEL_WEIGHT * overall_auc) + ((1 - OVERALL_MODEL_WEIGHT) * bias_score)

## Creating custom estimator to convert score columns to Boolean values

In [12]:
# Converting taget and identity columns to booleans
def convert_to_bool(df, col_name):
    df[col_name] = np.where(df[col_name] >= 0.5, True, False)
    
def convert_dataframe_to_bool(df,identity_columns):
    bool_df = df.copy()
    for col in ['target'] + identity_columns:
        convert_to_bool(bool_df, col)
    return bool_df


class ConverScoreToBool(BaseEstimator, TransformerMixin):
        def __init__(self, identity_columns, TOXICITY_COLUMN): # no *args or **kargs
                self.identity_columns = identity_columns  
                self.TOXICITY_COLUMN = TOXICITY_COLUMN
        def fit(self, df, y=None):
             return self  # nothing else to do
        
        def transform(self, df):
            return convert_dataframe_to_bool(df,self.identity_columns)  

In [13]:
def Function_2(test_df):
    identity_columns = [
         'male', 'female', 'homosexual_gay_or_lesbian', 'christian', 'jewish',
         'muslim', 'black', 'white', 'psychiatric_or_mental_illness']
    TOXICITY_COLUMN = 'target' 
    conv_to_bool=ConverScoreToBool(identity_columns,TOXICITY_COLUMN)
    test_df=conv_to_bool.transform(test_df) 
    test_df["model_prob"] = list(pipe_1.predict(test_df.comment_text.astype(str))[:,1])
    bias_metrics_df = compute_bias_metrics_for_model(test_df, identity_columns, "model_prob", TOXICITY_COLUMN)
    evaluation_score  = get_final_metric(bias_metrics_df.fillna(0), calculate_overall_auc(test_df, "model_prob"))
    
    return evaluation_score
    

## Result of Function 2

For Evaluation of model we have to collect following columns:
<ol>
<li> identity_columns = <b>['male', 'female', 'homosexual_gay_or_lesbian', 'christian', 'jewish','muslim', 'black', 'white', 'psychiatric_or_mental_illness']</b>
Score for toxicity theshold (between 0-1) with each identity columns </li> 

<li> comment text</li>
<li>target: overall threshold for toxicity in comment</li>
 </ol>
         


## Importing Train data to test Function 2

In [14]:
train_embd = pd.read_csv('./data/train.csv', sep=',', quotechar='"')
train_embd.head(2)

Unnamed: 0,id,target,comment_text,severe_toxicity,obscene,identity_attack,insult,threat,asian,atheist,...,article_id,rating,funny,wow,sad,likes,disagree,sexual_explicit,identity_annotator_count,toxicity_annotator_count
0,59848,0.0,"This is so cool. It's like, 'would you want yo...",0.0,0.0,0.0,0.0,0.0,,,...,2006,rejected,0,0,0,0,0,0.0,0,4
1,59849,0.0,Thank you!! This would make my life a lot less...,0.0,0.0,0.0,0.0,0.0,,,...,2006,rejected,0,0,0,0,0,0.0,0,4


## Evaluation Score of input data 

In [15]:
Function_2(train_embd[:500])

0.24439102564102566

In [16]:
train_embd.shape

(1804874, 45)