In [1]:
import pandas as pd
import numpy as np
import tensorflow
from tensorflow.compat.v1 import InteractiveSession
config = tensorflow.compat.v1.ConfigProto() 
config.gpu_options.allow_growth = True
config.log_device_placement = True
sess = tensorflow.compat.v1.Session(config=config)
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dense, Dropout, Conv1D, MaxPool1D, GlobalMaxPool1D, Embedding, Activation
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow.keras as ks
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten
from tensorflow.keras.regularizers import l1_l2
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.initializers import Constant
from tensorflow.keras.layers import LSTM

Device mapping:
/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: GeForce GTX 1660 Ti, pci bus id: 0000:01:00.0, compute capability: 7.5



In [108]:
df = pd.read_csv(r"D:\Datasets\NLP_text_mining\preprocessed_text_toxic2.csv")

In [109]:
tokenizer = Tokenizer(nb_words=100000)
tokenizer.fit_on_texts(df['text'].astype(str))
sequences = tokenizer.texts_to_sequences(df['text'].astype(str))

word_index = tokenizer.word_index
print('Found %s unique tokens.' % len(word_index))

data = pad_sequences(sequences, maxlen=100)

labels = to_categorical(np.asarray(df.toxic))



Found 184883 unique tokens.


In [4]:
# split the data into a training set and a validation set
indices = np.arange(data.shape[0])
np.random.shuffle(indices)
data = data[indices]
labels = labels[indices]
nb_validation_samples = int(0.20 * data.shape[0])

x_train = data[:-nb_validation_samples]
y_train = labels[:-nb_validation_samples]
x_val = data[-nb_validation_samples:]
y_val = labels[-nb_validation_samples:]

In [110]:
# load the whole embedding into memory
embeddings_index = dict()
f = open("D:/Datasets/embeddings/glove.6B.200d.txt", encoding="utf8")
lista = []
for line in f:
    values = line.split()
    word = values[0]
    lista.append(word)
    coefs = np.asarray(values[1:], dtype='float32')
    embeddings_index[word] = coefs
f.close()
print('Found %s word vectors.' % len(embeddings_index))

Found 400000 word vectors.


In [111]:
embedding_matrix = np.zeros((len(word_index) + 1, 200))
for word, i in word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        # words not found in embedding index will be all-zeros.
        embedding_matrix[i] = embedding_vector

In [112]:
from imblearn.under_sampling import RandomUnderSampler
from tensorflow.keras.preprocessing.image import ImageDataGenerator
train_img_pro = ImageDataGenerator()
def balanced_flow_from_directory(flow_from_directory):
    for immagini, classi in flow_from_directory:
         yield custom_balance(immagini.reshape(immagini.shape[0],immagini.shape[1]),classi)
            
def custom_balance(X, y):
    rus = RandomUnderSampler()
    X_resampled, y_resampled = rus.fit_sample(X, y.argmax(axis = -1))     
    #X_resampled, y_resampled = shuffle(X_resampled, y_resampled)
    #y_resampled = y_resampled.reshape(y_resampled.shape[0], 1, 1, 1) #da mettere se uso model_conv()
    y_resampled = to_categorical(y_resampled)#da mettere se uso model_flat()
    return(X_resampled, y_resampled)
train_generator_flow = train_img_pro.flow((x_train.reshape(x_train.shape[0],x_train.shape[1],1,1), y_train), 
                    batch_size=5000,
                    shuffle=False #riordino i dati in maniera casuale
                    )   
train_generator_bal = balanced_flow_from_directory(train_generator_flow)

# Ottimizzazione modello 1:

In [113]:
from tensorflow.keras.layers import LSTM
def create_model(ass, fit = False):

    keras_model = Sequential()
    embedding_layer = Embedding(len(word_index)+1, 200,weights=[embedding_matrix],
                               input_length=100, trainable = False)
    keras_model.add(embedding_layer)
    keras_model.add(Conv1D(32, 10, activation='relu', padding='same', strides=1,kernel_initializer='glorot_normal'))
    keras_model.add(MaxPool1D())
    keras_model.add(Conv1D(64, 10, activation='relu', padding='same', strides=1,kernel_initializer='glorot_normal'))
    keras_model.add(MaxPool1D(pool_size=1))
    keras_model.add(Dropout(ass["Dropout1"]))
    keras_model.add(Conv1D(128, 5, activation='relu', padding='same', strides=1,kernel_initializer='glorot_normal'))
    keras_model.add(MaxPool1D(pool_size=4))
    #keras_model.add(Flatten())
    keras_model.add(LSTM(ass["lstm"],kernel_regularizer = l1_l2(l1=0.01, l2=0.01)))
    keras_model.add(Dense(ass["neuron"], activation = "relu",kernel_regularizer = l1_l2(l1=0.1, l2=0.01)))
    # keras_model.add(Dense(32, activation = "relu",kernel_regularizer = l1_l2(l1=0.1, l2=0.01))) #kernel_regularizer = l1_l2(l1=0.1, l2=0.01)
    keras_model.add(Dropout(ass["Dropout2"]))
    keras_model.add(Dense(2, activation = "softmax"))
    opt = ks.optimizers.Nadam(lr=ass["lr"], beta_1=0.9, beta_2=0.99)
    keras_model.compile(loss='categorical_crossentropy', metrics=['acc'], optimizer=opt)
    if fit == True: 
        keras_model.fit_generator(generator=train_generator_bal,epochs=50, verbose= 1,steps_per_epoch = 200)#,class_weight=sample_weights)
    return keras_model

In [114]:
from sklearn.metrics import f1_score, accuracy_score
def evaluate_test(x):
    prediz = x.predict(x_val).argmax(axis = -1)
    prediz_train = x.predict(x_train).argmax(axis = -1)
    f1 = f1_score(y_val.argmax(axis = -1), prediz)
    acc_train = accuracy_score(y_train.argmax(axis = -1), prediz_train)
    acc_test = accuracy_score(y_val.argmax(axis = -1), prediz)
    loss = 1/(1+abs(acc_train - acc_test))
    loss_tot = loss + f1
    return loss_tot

In [10]:
from sigopt import Connection
conn = Connection(client_token="IXACCJUXJAOPJVQJMVVZFLALEKTVAPQQPKLGNRQONZELZTBP")

In [None]:
# experiment = conn.experiments().create(
#     name="Cnn-lstm-f1_acc", parameters=[
#         dict(name="lstm", bounds=dict(min=30, max=500),type="int"),
#         dict(name="neuron", bounds=dict(min=32, max=512),type="int"),
#         dict(name="lr", bounds=dict(min=0.0005, max=0.1),type="double"),
#         dict(name = "Dropout1", bounds = dict(min = 0, max = 0.85), type = "double"),
#         dict(name = "Dropout2", bounds = dict(min = 0, max = 0.85), type = "double")],
#     observation_budget=25,
#     project="text-mining")

In [14]:
experiment = conn.experiments('154960').update(observation_budget=5)

In [None]:
for _ in range(experiment.observation_budget):
    suggestion = conn.experiments(experiment.id).suggestions().create()
    assignments = suggestion.assignments
    model = create_model(assignments)
    f1 = evaluate_test(model)
    model.save_weights('C:/Users/loren/text_mining_project/weights/modello_cnn_lstm/{}.h5'.format(f1))
    conn.experiments(experiment.id).observations().create(
        suggestion=suggestion.id,
        value=f1
    )

In [106]:
best_assignments = conn.experiments(experiment.id).best_assignments().fetch().data[0].assignments
best_model = create_model(best_assignments, fit = False)

ValueError: Layer weight shape (140201, 200) not compatible with provided weight shape (184884, 200)

In [None]:
best_model.load_weights("C:/Users/loren/text_mining_project/weights/modello_cnn_lstm/1.720342497922431.h5")

In [17]:
acc_pred = best_model.predict(x_val).argmax(axis = -1)

In [18]:
from sklearn.metrics import classification_report
print(classification_report(y_val.argmax(axis = -1), acc_pred))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99     28801
           1       0.89      0.94      0.91      3078

    accuracy                           0.98     31879
   macro avg       0.94      0.96      0.95     31879
weighted avg       0.98      0.98      0.98     31879



In [19]:
acc_train = best_model.predict(x_train)

In [20]:
print(classification_report(y_train.argmax(axis = -1), acc_train.argmax(axis = -1)))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99    115305
           1       0.89      0.94      0.91     12215

    accuracy                           0.98    127520
   macro avg       0.94      0.96      0.95    127520
weighted avg       0.98      0.98      0.98    127520



In [21]:
acc_train = accuracy_score(y_train.argmax(axis = -1), acc_train.argmax(axis = -1))
acc_test = accuracy_score(y_val.argmax(axis = -1),acc_pred)

In [22]:
1/(1+(abs(acc_train - acc_test))) + f1_score(y_val.argmax(axis = -1), acc_pred)

1.9130125262894275

In [23]:
f1_score(y_val.argmax(axis = -1), acc_pred)

0.9130982367758186

In [24]:
acc_train

0.9825988080301129

In [25]:
acc_test

0.982684525863421

# Modello multi-input

In [21]:
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import MaxPooling1D
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Input, LSTM
from tensorflow.compat.v1.keras.layers import CuDNNLSTM
from tensorflow.keras.models import Model

In [12]:
tokenizer2 = Tokenizer(nb_words=20)
tokenizer2.fit_on_texts(df['less_pos'].astype(str))
sequences_pos = tokenizer2.texts_to_sequences(df['less_pos'].astype(str))

len(max(sequences_pos, key = len))

len(min(sequences_pos, key = len))

lengths = [len(i) for i in sequences_pos]

data_pos = pad_sequences(sequences_pos, maxlen=100)

data_pos = data_pos[indices]
# labels = labels[indices]
x_train_pos = data_pos[:-nb_validation_samples]
# y_train = labels[:-nb_validation_samples]
x_val_pos = data_pos[-nb_validation_samples:]
# y_val = labels[-nb_validation_samples:]



In [13]:
input_generator = np.concatenate([x_train, x_train_pos], axis = 1)
input_generator = input_generator.reshape(input_generator.shape[0], input_generator.shape[1],1,1)

from imblearn.under_sampling import RandomUnderSampler
def balanced_flow_from_directory(flow_from_directory):
    for immagini, classi in flow_from_directory:
         yield custom_balance(np.squeeze(immagini),classi)
            
def custom_balance(X, y):
    rus = RandomUnderSampler()
    X_resampled, y_resampled = rus.fit_sample(X, y.argmax(axis = -1))     
    #X_resampled, y_resampled = shuffle(X_resampled, y_resampled)
    #y_resampled = y_resampled.reshape(y_resampled.shape[0], 1, 1, 1) #da mettere se uso model_conv()
    y_resampled = to_categorical(y_resampled)#da mettere se uso model_flat()
    return([X_resampled[:,0:100],np.expand_dims(X_resampled[:,100:],-1)] , y_resampled)

train_generator_flow = train_img_pro.flow((input_generator, y_train), 
                    batch_size=3000,
                    shuffle=False #riordino i dati in maniera casuale
                    )   
train_generator_bal = balanced_flow_from_directory(train_generator_flow)

In [31]:
def create_model2(ass, fit = True):
    
    input_pos =Input(shape=(100,1), name = "Input_pos")
    input_embed = Input(shape = (100), name = "Input_embedding")

    embedding_layer = Embedding(len(word_index)+1, 200,weights=[embedding_matrix],
                               input_length=100, trainable = False)(input_embed)
    x = Conv1D(50, 32, activation='relu', padding='same', strides=1,kernel_initializer='glorot_normal')(embedding_layer)
    x = MaxPooling1D()(x)
    x = Conv1D(50, 16, activation='relu', padding='same', strides=1,kernel_initializer='glorot_normal')(x)
    x = MaxPooling1D(pool_size = 1)(x)
    x = Dropout(ass["Dropout1"])(x)
    x = Conv1D(16, 8, activation='relu', padding='same', strides=1,kernel_initializer='glorot_normal')(x)
    x = MaxPooling1D(pool_size = 4)(x)  
    x = CuDNNLSTM(ass["lstm"], kernel_regularizer=l1_l2(l1 = 0.01, l2 = 0.01))(x)
    aux_output = Dropout(ass["Dropout2"])(x)
    #x = Flatten()(x)
    #aux_output = Dense(128, activation='relu')(x)
    # preds = Dense(2, activation='softmax')(x)

    model=CuDNNLSTM(ass["lstm2"],kernel_initializer='glorot_normal')(input_pos)
    model = Dropout(ass["Dropout3"])(model)
    #model = Dense(64, activation = "relu")(model)
    aux_output2 = Dense(ass["dense1"],activation = "relu",kernel_initializer='glorot_normal',kernel_regularizer = l1_l2(l1=0.01, l2=0.01))(model)

    main_out = tensorflow.keras.backend.concatenate([aux_output,aux_output2])
    main_out = Dense(ass["dense2"], activation = "relu",kernel_initializer='glorot_normal',kernel_regularizer = l1_l2(l1=0.001, l2=0.001))(main_out)
    main_out = Dense(2, activation = "softmax", name = "output_totale")(main_out)

    model_tot = Model(inputs = [input_embed, input_pos], outputs = main_out)
    
    opt = ks.optimizers.Nadam(lr=0.01, beta_1=0.9, beta_2=0.99)
    
    model_tot.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])
    if fit == True:
        model_tot.fit_generator(train_generator_bal, steps_per_epoch=200, epochs=32)
    
    return model_tot

In [28]:
from sklearn.metrics import f1_score, accuracy_score
def evaluate_test(x):
    prediz = x.predict([x_val.astype("float32"), np.expand_dims(x_val_pos.astype("float32"),-1)]).argmax(axis = -1)
    prediz_train = x.predict([x_train.astype("float32"), np.expand_dims(x_train_pos.astype("float32"), -1)]).argmax(axis = -1)
    f1 = f1_score(y_val.argmax(axis = -1), prediz)
    acc_train = accuracy_score(y_train.argmax(axis = -1), prediz_train)
    acc_test = accuracy_score(y_val.argmax(axis = -1), prediz)
    loss = 1/(1+abs(acc_train - acc_test))
    loss_tot = loss + f1
    return loss_tot

In [17]:
experiment = conn.experiments().create(
    name="Cnn-lstm-Pos_f1_acc", parameters=[
        dict(name="lstm", bounds=dict(min=30, max=500),type="int"),
        dict(name="lstm2", bounds=dict(min=30, max=500),type="int"),
        dict(name = "dense1", bounds=dict(min=32, max=512),type="int"),
        dict(name = "dense2", bounds=dict(min=32, max=512),type="int"),
        #dict(name="lr", bounds=dict(min=0.01, max=0.1),type="double"),
        dict(name = "Dropout1", bounds = dict(min = 0, max = 0.85), type = "double"),
        dict(name = "Dropout2", bounds = dict(min = 0, max = 0.85), type = "double"),
    dict(name = "Dropout3", bounds = dict(min = 0, max = 0.85), type = "double")],
    observation_budget=30,
    project="text-mining")

In [25]:
experiment = conn.experiments('155992').update(observation_budget=10)

In [30]:
for _ in range(experiment.observation_budget):
    suggestion = conn.experiments(experiment.id).suggestions().create()
    assignments = suggestion.assignments
    model = create_model2(assignments)
    f1 = evaluate_test(model)
    model.save_weights('C:/Users/loren/text_mining_project/weights/modello_cnn_lstm_pos/{}.h5'.format(f1))
    conn.experiments(experiment.id).observations().create(
        suggestion=suggestion.id,
        value=f1
    )

Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32
Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32
Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32


Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32
Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32
Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32


Epoch 32/32
Epoch 1/32
Epoch 2/32
Epoch 3/32
Epoch 4/32
Epoch 5/32
Epoch 6/32
Epoch 7/32
Epoch 8/32
Epoch 9/32
Epoch 10/32
Epoch 11/32
Epoch 12/32
Epoch 13/32
Epoch 14/32
Epoch 15/32
Epoch 16/32
Epoch 17/32
Epoch 18/32
Epoch 19/32
Epoch 20/32
Epoch 21/32
Epoch 22/32
Epoch 23/32
Epoch 24/32
Epoch 25/32
Epoch 26/32
Epoch 27/32
Epoch 28/32
Epoch 29/32
Epoch 30/32
Epoch 31/32
Epoch 32/32
Epoch 1/32
 17/200 [=>............................] - ETA: 42s - loss: 10.7205 - accuracy: 0.5010

ResourceExhaustedError: OOM when allocating tensor with shape[128] and type int8 on /job:localhost/replica:0/task:0/device:GPU:0 by allocator gpu_host_bfc [Op:ConcatV2] name: concat

In [32]:
best_assignments = conn.experiments(experiment.id).best_assignments().fetch().data[0].assignments
best_model = create_model2(best_assignments, fit = False)

InvalidArgumentError:  Cannot parse tensor from proto: dtype: DT_INT32
tensor_shape {
}
int_val: 1

	 [[{{node Identity/_0__cf__135}}]] [Op:__inference_keras_scratch_graph_523483307]

Function call stack:
keras_scratch_graph


# Validazione inference:

In [124]:
df = pd.read_csv(r"D:\Datasets\NLP_text_mining\preprocessed_text_toxic2.csv")
test = pd.read_csv(r"D:\Datasets\NLP_text_mining\true_pos_toxic_final_test.csv")

In [128]:
all_text = pd.concat([df.text, test.text])
all_labels = pd.concat([df.toxic, test.toxic])

In [129]:
tokenizer = Tokenizer(nb_words=100000)
tokenizer.fit_on_texts(all_text.astype(str))
sequences = tokenizer.texts_to_sequences(all_text.astype(str))

word_index = tokenizer.word_index
print('Found %s unique tokens.' % len(word_index))

data = pad_sequences(sequences, maxlen=100)

labels = to_categorical(np.asarray(all_labels))



Found 262524 unique tokens.


In [132]:
train_idx = len(df)

x_train = data[:train_idx]
y_train = labels[:train_idx]
x_test = data[train_idx:]
y_test = labels[train_idx:]

In [137]:
embedding_matrix = np.zeros((len(word_index) + 1, 200))
for word, i in word_index.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        # words not found in embedding index will be all-zeros.
        embedding_matrix[i] = embedding_vector

In [139]:
best_assignments = conn.experiments(experiment.id).best_assignments().fetch().data[0].assignments
best_model = create_model(best_assignments, fit = True)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [144]:
acc_pred = best_model.predict(x_train).argmax(axis = -1)
from sklearn.metrics import classification_report
print(classification_report(y_train.argmax(axis = -1), acc_pred))

              precision    recall  f1-score   support

           0       0.91      0.92      0.91    144106
           1       0.17      0.16      0.17     15293

    accuracy                           0.84    159399
   macro avg       0.54      0.54      0.54    159399
weighted avg       0.84      0.84      0.84    159399



In [145]:
acc_pred = best_model.predict(x_test).argmax(axis = -1)
from sklearn.metrics import classification_report
print(classification_report(y_test.argmax(axis = -1), acc_pred))

              precision    recall  f1-score   support

           0       0.91      0.91      0.91     57888
           1       0.15      0.15      0.15      6090

    accuracy                           0.83     63978
   macro avg       0.53      0.53      0.53     63978
weighted avg       0.84      0.83      0.84     63978

