In [22]:
import random as python_random
import numpy as np
from keras.models import Sequential
from keras.layers.core import Dense
from keras.layers import Embedding, LSTM, Dropout, Bidirectional, MaxPooling1D, Conv1D
from keras.initializers import Constant
from sklearn.metrics import accuracy_score, f1_score, classification_report
from sklearn.preprocessing import LabelBinarizer
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.layers import TextVectorization
import tensorflow as tf
import keras
import pickle
import keras.backend as K
import pandas as pd
import re
import emoji
from wordsegment import load,segment
import io
from tqdm import tqdm

In [23]:
#Setting seeds for reproducibility
np.random.seed(1234)
tf.random.set_seed(1234)
python_random.seed(1234)

load() #for wordsegment

In [24]:
def read_corpus(corpus_file):
    '''Read in data set and returns docs and labels'''
    documents = []
    labels = []
    with open(corpus_file, encoding='utf-8') as f:
        for line in f:
            tokens = line.strip()
            documents.append(tokens.split("\t")[0])
            # binary problem: NOT, OFF
            labels.append(tokens.split("\t")[1])
    return documents, labels

def read_word_emb(embeddings_file, voc):
    '''Read embeddings dictionary file'''
    fin = io.open(embeddings_file, 'r', encoding='utf-8', newline='\n', errors='ignore')
    n, d = map(int, fin.readline().split())
    data = {}
    for line in fin:
        tokens = line.rstrip().split(' ')
        if tokens[0] in voc:
            data[tokens[0]] = np.array(list(map(float, tokens[1:])))
    return data

def get_emb_matrix(voc, emb):
    '''Get embedding matrix given vocab and the embeddings'''
    num_tokens = len(voc) + 2
    word_index = dict(zip(voc, range(len(voc))))
    # Bit hacky, get embedding dimension from the word "the"
    embedding_dim = len(emb["the"])
    # Prepare embedding matrix to the correct size
    embedding_matrix = np.zeros((num_tokens, embedding_dim))
    for word, i in word_index.items():
        embedding_vector = emb.get(word)
        if embedding_vector is not None:
            # Words not found in embedding index will be all-zeros.
            embedding_matrix[i] = embedding_vector
    # Final matrix with pretrained embeddings that we can feed to embedding layer
    return embedding_matrix

def test_set_predict(model, X_test, Y_test, ident):
    '''Do predictions and measure accuracy on our own test set (that we split off train)'''
    # Get predictions using the trained model
    Y_pred = model.predict(X_test)
    # Finally, convert to labels to get scores with sklearn
    Y_pred=(Y_pred.flatten()>0.5)*1
    # If you have gold data, you can calculate accuracy
    Y_test = Y_test.flatten()
    print('Accuracy on own {1} set: {0}'.format(round(accuracy_score(Y_test, Y_pred), 3), ident))
    return Y_pred

def get_f1(y_true, y_pred): #taken from old keras source code
    '''for getting f1 scores during training'''
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    recall = true_positives / (possible_positives + K.epsilon())
    f1_val = 2*(precision*recall)/(precision+recall+K.epsilon())
    return f1_val

def scheduler(epoch, lr):
    '''learning rate scheduler'''
    if epoch < 7:
        return lr
    else:
        return lr * tf.math.exp(-0.1)
    
def train_model(model, X_train, Y_train, X_dev, Y_dev, batch_size, epochs):
    '''Train the model here'''
    verbose = 1
    batch_size = batch_size
    epochs = epochs
    # Early stopping
    callback1 = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
    #Learning rate scheduler using function
    callback2 = tf.keras.callbacks.LearningRateScheduler(scheduler)
    #Assigning class weights for imbalanced classification
    class_weight = {0: 1.,
                1: 2.}
    # Finally fit the model to our data
    model.fit(X_train, Y_train, verbose=verbose, epochs=epochs, callbacks=[callback1,callback2], batch_size=batch_size, validation_data=(X_dev, Y_dev),
             class_weight=class_weight)
    return model

## Best model architecture used after experimenting
def create_model(Y_train, emb_matrix, lr):
    '''Create the Keras model to use'''
    
    loss_function = 'binary_crossentropy'
    optim = Adam(learning_rate=lr)
    
    # Take embedding dim and size from emb_matrix
    embedding_dim = len(emb_matrix[0])
    num_tokens = len(emb_matrix)
    
    # Now build the model
    model = Sequential()
    model.add(Embedding(num_tokens, embedding_dim, embeddings_initializer=Constant(emb_matrix),trainable=False))
    model.add(LSTM(embedding_dim, dropout=0.2))
    model.add(Dense(units=1, activation="sigmoid"))
    # Compile model using our settings, check for accuracy
    model.compile(loss=loss_function, optimizer=optim, metrics=[get_f1])
    return model

In [25]:
# Read in the data
X_train_org, Y_train = read_corpus("datasets/train.tsv")
X_dev_org, Y_dev = read_corpus("datasets/val.tsv")

#changing labels to binary
encoder = LabelBinarizer()
Y_train_bin = encoder.fit_transform(Y_train)  # Use encoder.classes_ to find mapping back
Y_dev_bin = encoder.fit_transform(Y_dev)

## No preprocessing

In [26]:
def preprocess_input(text_in):
    '''Read in text and preprocesses it and returns'''
    return text_in

X_train=[preprocess_input(x) for x in X_train_org]
X_dev=[preprocess_input(x) for x in X_dev_org]

In [27]:
# Transform words to indices using a vectorizer
vectorizer = TextVectorization(standardize=None, output_sequence_length=50)
# Use train and dev to create vocab - could also do just train
text_ds = tf.data.Dataset.from_tensor_slices(X_train + X_dev)
with tf.device('/cpu:0'):
    vectorizer.adapt(text_ds)
    
# Dictionary mapping words to idx
voc = vectorizer.get_vocabulary()

# Transform input to vectorized input
X_train_vect = vectorizer(np.array([[s] for s in X_train])).numpy()
X_dev_vect = vectorizer(np.array([[s] for s in X_dev])).numpy()

#Read embeddings
embeddings_ft = read_word_emb("embeddings/crawl-300d-2M-subword.vec", voc)

#embeddings matrix
emb_matrix = get_emb_matrix(voc, embeddings_ft)
# Create model
model = create_model(Y_train, emb_matrix, lr=0.00001)
# Train the model
model = train_model(model, X_train_vect, Y_train_bin, X_dev_vect, Y_dev_bin, 32, 50)
y_preds=test_set_predict(model, X_dev_vect, Y_dev_bin, "dev")
print("F1 score on dev set (macro):",f1_score(Y_dev_bin.flatten(),y_preds,average='macro'))
print("Accuracy on dev set (macro):",accuracy_score(Y_dev_bin.flatten(),y_preds))
print("Conf Matrix: ", classification_report(Y_dev_bin.flatten(), y_preds))

2022-11-03 15:43:11.872255: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 1/50


2022-11-03 15:53:44.832583: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2022-11-03 15:53:45.541298: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2022-11-03 15:53:47.562830: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2022-11-03 15:53:59.721454: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2022-11-03 15:53:59.762907: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
 5/32 [===>..........................] - ETA: 0s

2022-11-03 15:57:02.035161: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2022-11-03 15:57:02.065058: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Accuracy on own dev set: 0.727
F1 score on dev set (macro): 0.7110423116615068
Accuracy on dev set (macro): 0.727
Conf Matrix:                precision    recall  f1-score   support

           0       0.82      0.74      0.78       648
           1       0.60      0.70      0.64       352

    accuracy                           0.73      1000
   macro avg       0.71      0.72      0.71      1000
weighted avg       0.74      0.73      0.73      1000



## Split slashes and underscores

In [28]:
def preprocess_input(text_in):
    '''Read in text and preprocesses it and returns'''
    input = re.sub("/"," / ",text_in) #split slashes
    input = re.sub("_"," ",input) #split underscores
    return input

X_train=[preprocess_input(x) for x in X_train_org]
X_dev=[preprocess_input(x) for x in X_dev_org]

In [32]:
# Transform words to indices using a vectorizer
vectorizer = TextVectorization(standardize=None, output_sequence_length=50)
# Use train and dev to create vocab - could also do just train
text_ds = tf.data.Dataset.from_tensor_slices(X_train + X_dev)
with tf.device('/cpu:0'):
    vectorizer.adapt(text_ds)
    
# Dictionary mapping words to idx
voc = vectorizer.get_vocabulary()

# Transform input to vectorized input
X_train_vect = vectorizer(np.array([[s] for s in X_train])).numpy()
X_dev_vect = vectorizer(np.array([[s] for s in X_dev])).numpy()

#Read embeddings
embeddings_ft = read_word_emb("embeddings/crawl-300d-2M-subword.vec", voc)

#embeddings matrix
emb_matrix = get_emb_matrix(voc, embeddings_ft)
# Create model
model = create_model(Y_train, emb_matrix, lr=0.00001)
# Train the model
model = train_model(model, X_train_vect, Y_train_bin, X_dev_vect, Y_dev_bin, 32, 50)
y_preds=test_set_predict(model, X_dev_vect, Y_dev_bin, "dev")
print("F1 score on dev set (macro):",f1_score(Y_dev_bin.flatten(),y_preds,average='macro'))
print("Accuracy on dev set (macro):",accuracy_score(Y_dev_bin.flatten(),y_preds))
print("Conf Matrix: ", classification_report(Y_dev_bin.flatten(), y_preds))

2022-11-03 16:06:59.742677: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 1/50


2022-11-03 16:17:01.762782: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2022-11-03 16:17:01.927801: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


  3/383 [..............................] - ETA: 10s - loss: 0.9318 - get_f1: 0.3064 

2022-11-03 16:17:02.122132: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2022-11-03 16:17:13.572284: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2022-11-03 16:17:13.619438: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/50
Epoch 3/50
Epoch 4/50
 4/32 [==>...........................] - ETA: 0s

2022-11-03 16:17:53.221227: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2022-11-03 16:17:53.252681: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Accuracy on own dev set: 0.612
F1 score on dev set (macro): 0.6109480034011969
Accuracy on dev set (macro): 0.612
Conf Matrix:                precision    recall  f1-score   support

           0       0.82      0.51      0.63       648
           1       0.47      0.80      0.59       352

    accuracy                           0.61      1000
   macro avg       0.65      0.65      0.61      1000
weighted avg       0.70      0.61      0.62      1000



## Split hashtags

In [33]:
def preprocess_input(text_in):
    '''Read in text and preprocesses it and returns'''
    hashtags = re.findall(r"(#\w+)", text_in) #split hashtags
    for hs in hashtags:
        words = " ".join(segment(hs))
        text_in = text_in.replace(hs, words)
    return text_in

X_train=[preprocess_input(x) for x in X_train_org]
X_dev=[preprocess_input(x) for x in X_dev_org]

In [35]:
# Transform words to indices using a vectorizer
vectorizer = TextVectorization(standardize=None, output_sequence_length=50)
# Use train and dev to create vocab - could also do just train
text_ds = tf.data.Dataset.from_tensor_slices(X_train + X_dev)
with tf.device('/cpu:0'):
    vectorizer.adapt(text_ds)
    
# Dictionary mapping words to idx
voc = vectorizer.get_vocabulary()

# Transform input to vectorized input
X_train_vect = vectorizer(np.array([[s] for s in X_train])).numpy()
X_dev_vect = vectorizer(np.array([[s] for s in X_dev])).numpy()

#Read embeddings
embeddings_ft = read_word_emb("embeddings/crawl-300d-2M-subword.vec", voc)

#embeddings matrix
emb_matrix = get_emb_matrix(voc, embeddings_ft)
# Create model
model = create_model(Y_train, emb_matrix, lr=0.00001)
# Train the model
model = train_model(model, X_train_vect, Y_train_bin, X_dev_vect, Y_dev_bin, 32, 50)
y_preds=test_set_predict(model, X_dev_vect, Y_dev_bin, "dev")
print("F1 score on dev set (macro):",f1_score(Y_dev_bin.flatten(),y_preds,average='macro'))
print("Accuracy on dev set (macro):",accuracy_score(Y_dev_bin.flatten(),y_preds))
print("Conf Matrix: ", classification_report(Y_dev_bin.flatten(), y_preds))

2022-11-03 16:21:33.784758: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 1/50


2022-11-03 16:31:41.910461: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2022-11-03 16:31:42.088584: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


  1/383 [..............................] - ETA: 7:52 - loss: 0.9327 - get_f1: 0.3636

2022-11-03 16:31:42.259433: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2022-11-03 16:31:54.189899: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2022-11-03 16:31:54.234024: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/50
Epoch 3/50
Epoch 4/50
 6/32 [====>.........................] - ETA: 0s

2022-11-03 16:32:28.102804: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2022-11-03 16:32:28.137900: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Accuracy on own dev set: 0.607
F1 score on dev set (macro): 0.6050088496108885
Accuracy on dev set (macro): 0.607
Conf Matrix:                precision    recall  f1-score   support

           0       0.80      0.52      0.63       648
           1       0.46      0.76      0.58       352

    accuracy                           0.61      1000
   macro avg       0.63      0.64      0.61      1000
weighted avg       0.68      0.61      0.61      1000



## Emoji change

In [36]:
def preprocess_input(text_in):
    '''Read in text and preprocesses it and returns'''
    input = emoji.demojize(text_in,delimiters=(" "," ")) #change emoji
    input = re.sub("_"," ",input) #split underscores
    return input
X_train=[preprocess_input(x) for x in X_train_org]
X_dev=[preprocess_input(x) for x in X_dev_org]



In [38]:
# Transform words to indices using a vectorizer
vectorizer = TextVectorization(standardize=None, output_sequence_length=50)
# Use train and dev to create vocab - could also do just train
text_ds = tf.data.Dataset.from_tensor_slices(X_train + X_dev)
with tf.device('/cpu:0'):
    vectorizer.adapt(text_ds)
    
# Dictionary mapping words to idx
voc = vectorizer.get_vocabulary()

# Transform input to vectorized input
X_train_vect = vectorizer(np.array([[s] for s in X_train])).numpy()
X_dev_vect = vectorizer(np.array([[s] for s in X_dev])).numpy()

#Read embeddings
embeddings_ft = read_word_emb("embeddings/crawl-300d-2M-subword.vec", voc)

#embeddings matrix
emb_matrix = get_emb_matrix(voc, embeddings_ft)
# Create model
model = create_model(Y_train, emb_matrix, lr=0.00001)
# Train the model
model = train_model(model, X_train_vect, Y_train_bin, X_dev_vect, Y_dev_bin, 32, 50)
y_preds=test_set_predict(model, X_dev_vect, Y_dev_bin, "dev")
print("F1 score on dev set (macro):",f1_score(Y_dev_bin.flatten(),y_preds,average='macro'))
print("Accuracy on dev set (macro):",accuracy_score(Y_dev_bin.flatten(),y_preds))
print("Conf Matrix: ", classification_report(Y_dev_bin.flatten(), y_preds))

2022-11-03 16:59:57.267244: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 1/50


2022-11-03 17:09:42.392201: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2022-11-03 17:09:42.512533: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


  1/383 [..............................] - ETA: 6:54 - loss: 0.9332 - get_f1: 0.4848

2022-11-03 17:09:42.687631: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2022-11-03 17:09:53.570228: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2022-11-03 17:09:53.613838: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
 5/32 [===>..........................] - ETA: 0s

2022-11-03 17:12:09.999823: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2022-11-03 17:12:10.030818: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Accuracy on own dev set: 0.712
F1 score on dev set (macro): 0.70024854391567
Accuracy on dev set (macro): 0.712
Conf Matrix:                precision    recall  f1-score   support

           0       0.83      0.70      0.76       648
           1       0.57      0.73      0.64       352

    accuracy                           0.71      1000
   macro avg       0.70      0.72      0.70      1000
weighted avg       0.74      0.71      0.72      1000



## Lowercase

In [40]:
def preprocess_input(text_in):
    '''Read in text and preprocesses it and returns'''
    return text_in.lower()
X_train=[preprocess_input(x) for x in X_train_org]
X_dev=[preprocess_input(x) for x in X_dev_org]

In [41]:
# Transform words to indices using a vectorizer
vectorizer = TextVectorization(standardize=None, output_sequence_length=50)
# Use train and dev to create vocab - could also do just train
text_ds = tf.data.Dataset.from_tensor_slices(X_train + X_dev)
with tf.device('/cpu:0'):
    vectorizer.adapt(text_ds)
    
# Dictionary mapping words to idx
voc = vectorizer.get_vocabulary()

# Transform input to vectorized input
X_train_vect = vectorizer(np.array([[s] for s in X_train])).numpy()
X_dev_vect = vectorizer(np.array([[s] for s in X_dev])).numpy()

#Read embeddings
embeddings_ft = read_word_emb("embeddings/crawl-300d-2M-subword.vec", voc)

#embeddings matrix
emb_matrix = get_emb_matrix(voc, embeddings_ft)
# Create model
model = create_model(Y_train, emb_matrix, lr=0.00001)
# Train the model
model = train_model(model, X_train_vect, Y_train_bin, X_dev_vect, Y_dev_bin, 32, 50)
y_preds=test_set_predict(model, X_dev_vect, Y_dev_bin, "dev")
print("F1 score on dev set (macro):",f1_score(Y_dev_bin.flatten(),y_preds,average='macro'))
print("Accuracy on dev set (macro):",accuracy_score(Y_dev_bin.flatten(),y_preds))
print("Conf Matrix: ", classification_report(Y_dev_bin.flatten(), y_preds))

2022-11-03 17:27:12.663333: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 1/50


2022-11-03 17:35:48.424778: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2022-11-03 17:35:48.567411: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


  1/383 [..............................] - ETA: 7:53 - loss: 0.9292 - get_f1: 0.3571

2022-11-03 17:35:48.779442: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2022-11-03 17:35:59.561326: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2022-11-03 17:35:59.611363: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/50
Epoch 3/50
Epoch 4/50
 6/32 [====>.........................] - ETA: 0s

2022-11-03 17:36:30.988334: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2022-11-03 17:36:31.020493: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Accuracy on own dev set: 0.593
F1 score on dev set (macro): 0.5910530033489443
Accuracy on dev set (macro): 0.593
Conf Matrix:                precision    recall  f1-score   support

           0       0.79      0.51      0.62       648
           1       0.45      0.74      0.56       352

    accuracy                           0.59      1000
   macro avg       0.62      0.63      0.59      1000
weighted avg       0.67      0.59      0.60      1000

