<a href="https://colab.research.google.com/github/justina-tran/yelp-reviews/blob/master/notebooks/05_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Build Classifier
The objective is to build a classifier that predicts the rating (1-5) of a review, using the cleaned tokenized words from the reviews. 

I will build a pipeline to perform text vectorization and a grid search for different hyperparameters for various classification models. The models that I will explore with for classification are:
- logistic regression
- multinomial naive bayes
- random forest
- lightgbm
- xgboost



In [2]:
import pandas as pd
import seaborn as sns
import numpy as np
import time
import plotly.express as px

# setting dir
import os
os.chdir('/content/drive/MyDrive/Projects/NLP/data')

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from imblearn.over_sampling import SMOTE

from imblearn.pipeline import Pipeline, make_pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
import lightgbm as lgbm
from lightgbm import LGBMClassifier

from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score, classification_report, matthews_corrcoef
from sklearn.metrics import confusion_matrix

import joblib

In [None]:
from nltk.corpus import stopwords
import nltk
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [None]:
df = pd.read_csv('reviews_cleaned')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13211 entries, 0 to 13210
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   store         13211 non-null  object 
 1   date          13211 non-null  object 
 2   username      13211 non-null  object 
 3   user_loc      13211 non-null  object 
 4   rating        13211 non-null  object 
 5   reviews       13211 non-null  object 
 6   cleaned       13211 non-null  object 
 7   cleaned_text  13210 non-null  object 
 8   positive      13211 non-null  float64
 9   negative      13211 non-null  float64
 10  neutral       13211 non-null  float64
 11  compound      13211 non-null  float64
 12  sentiment     13211 non-null  object 
dtypes: float64(4), object(9)
memory usage: 1.3+ MB


In [None]:
df[df['cleaned_text'].isna()]

Unnamed: 0,store,date,username,user_loc,rating,reviews,cleaned,cleaned_text,positive,negative,neutral,compound,sentiment
4295,yaya-tea-garden-new-york-3,10/6/2011,Bonnie T.,"Manhattan, NY",5 star rating,:),[],,1.0,0.0,0.0,0.46,positive


In [None]:
df.dropna(inplace=True)

In [None]:
df.head()

Unnamed: 0,store,date,username,user_loc,rating,reviews,cleaned,cleaned_text,positive,negative,neutral,compound,sentiment
0,bar-pa-tea-new-york-2,11/29/2020,Casey W.,"New York, NY",5 star rating,- absolutely one of my favorite ice cream plac...,"['absolutely', 'one', 'favorite', 'ice', 'crea...",absolutely one favorite ice cream place city r...,0.2,0.0,0.8,0.88,positive
1,bar-pa-tea-new-york-2,8/13/2020,Marie R.,"Edgewater, NJ",1 star rating,My experience was really bad. I'm usually a bu...,"['experience', 'really', 'bad', 'usually', 'fa...",experience really bad usually fanatic think pl...,0.12,0.17,0.71,-0.97,negative
2,bar-pa-tea-new-york-2,7/21/2020,Mariama K.,"Los Angeles, CA",3 star rating,Good but not amazing. Staff was not very frien...,"['good', 'amazing', 'staff', 'friendly', 'mini...",good amazing staff friendly minimal seating ge...,0.21,0.33,0.46,-0.59,negative
3,bar-pa-tea-new-york-2,6/25/2020,Veon C.,"Washington, DC",5 star rating,One of the best boba I've ever had. I usually ...,"['one', 'best', 'ever', 'usually', 'get', 'adf...",one best ever usually get adffogato believe va...,0.26,0.0,0.74,0.99,positive
4,bar-pa-tea-new-york-2,6/14/2020,Jessleen C.,"Flushing, NY",5 star rating,I saw this place everyone on my timeline and h...,"['saw', 'place', 'everyone', 'timeline', 'chec...",saw place everyone timeline check disappoint g...,0.1,0.04,0.87,0.53,positive


In [None]:
# add the stopwords to Stopword list for modeling
stop_words = stopwords.words('english')
custom = ['boba', 'bubble', 'tea']
for i in custom:
  stop_words.append(i)
print(stop_words)

['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', '

In [None]:
corpus = list(df['reviews']) #comma separated reviews in list
print(corpus[:2])

["- absolutely one of my favorite ice cream places in the city\n- recommend: swirl (black tea/oolong tea) w/ large and mini bubbles\n- black tea is definitely stronger in flavor and almost has a coffee aftertaste\n- almost always a wait\n- fresh tapioca batches throughout the day\n- I heard this place didn't survive the pandemic but I really hope they come back once the dust settles", "My experience was really bad. I'm usually a bubble tea fanatic and I thought this place was cute and it looked delicious. I went and ordered a large oolong iced tea with bubbles with a little sweet and with ice cream on top.\n\nI saw the pictures and thought by adding matcha ice cream to it would be good. It looked good when they handed it to me, it was ombré colored.\nBut when I put my straw in it was so gross. It was so bitter. I can handle bitter, but it was like a poor quality tea bitter.\n\nAs for the ice cream portion, I figured that it would taste good but it wasn't creamy at all it was icy and we

In [None]:
# clean ratings
df['rating'] = df['rating'].str.replace(" star rating","")
df['rating'] = df['rating'].astype(int)
df['rating'].value_counts()

5    6810
4    3797
3    1431
2     639
1     533
Name: rating, dtype: int64

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 13210 entries, 0 to 13210
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   store         13210 non-null  object 
 1   date          13210 non-null  object 
 2   username      13210 non-null  object 
 3   user_loc      13210 non-null  object 
 4   rating        13210 non-null  int64  
 5   reviews       13210 non-null  object 
 6   cleaned       13210 non-null  object 
 7   cleaned_text  13210 non-null  object 
 8   positive      13210 non-null  float64
 9   negative      13210 non-null  float64
 10  neutral       13210 non-null  float64
 11  compound      13210 non-null  float64
 12  sentiment     13210 non-null  object 
dtypes: float64(4), int64(1), object(8)
memory usage: 1.4+ MB


## Splitting Data
X = cleaned text \
y = review rating (1-5)

I used stratify for train/test split because we need a balanced amount of each rating for each train/test split.

After splitting the data, I applied text vectorization and SMOTE.

SMOTE (Synthetic Minority Oversampling TEchnique) consists of synthesizing elements for the minority class. It works by randomly picking a point from the minority class and computing the k-nearest neighbors for this point. The synthetic points are added between the chosen point and its neighbors.
The `ratio='minority'` will only resample the minority class rating=1. Since we want each rating class to be balanced, we won't apply this.

In [None]:
# Defining the vectorizer to extract features from text
vectorizer = TfidfVectorizer(max_features=300, min_df=7, max_df=0.8, stop_words=stop_words)

# Creating X, y variables
reviews, labels = df['cleaned_text'], df['rating']

# create train/validation/test splits
X, X_test, y, y_test = train_test_split(reviews, labels, test_size=0.2, stratify=labels, random_state=123)
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, stratify=y, random_state=123)

print('Test Holdout Set:', X_test.shape, y_test.shape)
print('Full Training Set:', X.shape, y.shape)
print()
print('Train:', X_train.shape, y_train.shape)
print('Validation:', X_val.shape, y_val.shape)

Test Holdout Set: (2642,) (2642,)
Full Training Set: (10568,) (10568,)

Train: (8454,) (8454,)
Validation: (2114,) (2114,)


In [None]:
# apply preprocessing steps
X_train_prep = vectorizer.fit_transform(X_train).toarray()
X_val_prep = vectorizer.transform(X_val).toarray()
print(X_train_prep.shape, X_val_prep.shape)

# tfidf on full training set -> cv
vectorizer = TfidfVectorizer(max_features=300, min_df=7, max_df=0.8, stop_words=stop_words)
X_prep = vectorizer.fit_transform(X).toarray() # used for final prediction for best models
X_test_prep = vectorizer.transform(X_test).toarray()
print(X_prep.shape, X_test_prep.shape)

(8454, 300) (2114, 300)
(10568, 300) (2642, 300)


In [None]:
# apply smote sampling technique to remove class imbalance
smote = SMOTE(random_state=12)
X_train_sm, y_train_sm = smote.fit_resample(X_train_prep, y_train)
print(X_train_sm.shape, y_train_sm.shape)
print(y_train.value_counts())
print(y_train_sm.value_counts())

(21790, 300) (21790,)
5    4358
4    2430
3     916
2     409
1     341
Name: rating, dtype: int64
5    4358
1    4358
4    4358
3    4358
2    4358
Name: rating, dtype: int64


## Evaluating Models Baseline Performance
I'll run a few classifiers without fine-tuning to evaluate baseline performance using cross-validation. This performs train/test splits on the dataset k times and returns the average scores for evaluation. A good metric for imbalanced multiclassification is Matthew's Correlation Coefficient. MCC represents the values in a confusion matrix as a single score in the range of -1 and 1, where 1 is a perfect classifier, 0 is no better than random guessing, and -1 is the opposite of the true value.

Another great metric for evaluating imbalanced classes is the F1-score. With an imbalanced dataset, the Macro-average F1-Score will reflect the true model performance by treating each class equally regardless of the number of samples it has. The Macro-avereage F1-score is computed by calculating the F1-Score for each class and then average all the results.

Evaluating baseline performance will help me select which models to further fine-tune. 

In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer
models = [
    SGDClassifier(random_state=12), 
    LogisticRegression(random_state=12, max_iter=1000),
    MultinomialNB(),
    LinearSVC(random_state=12),
    RandomForestClassifier(random_state=12),
    LGBMClassifier(objective='multiclass', random_state=12),
    XGBClassifier(random_state=12)
]

In [None]:
def run_cv_models(model_list, X, y, kfold=5):
  """
  Inputs: list of models and kfold
  Outputs: cross-validation df and summary df
  """
  results = []
  for model in model_list:
    clf = make_pipeline(SMOTE(random_state=12), model)
    model_name = model.__class__.__name__
    mcc = cross_val_score(clf, X, y, scoring=make_scorer(matthews_corrcoef), cv=kfold)
    f1 = cross_val_score(clf, X, y, scoring='f1_macro', cv=kfold)

    for k_fold, (mcc, f1) in enumerate(zip(mcc, f1)):
      results.append((model_name, k_fold, mcc, f1))

  cv_df = pd.DataFrame(results, columns=['model_name', 'k_fold', 'mcc', 'f1'])
  # summary table
  mean_mcc = cv_df.groupby('model_name').mcc.mean()
  std_mcc = cv_df.groupby('model_name').mcc.std()
  mean_f1 = cv_df.groupby('model_name').f1.mean()
  std_f1 = cv_df.groupby('model_name').f1.std()

  metrics_df = pd.concat([mean_mcc, std_mcc, mean_f1, std_f1], axis= 1, ignore_index=True)
  metrics_df.columns = ['mean_mcc', 'std_mcc', 'mean_f1', 'std_f1']
  return cv_df, metrics_df

In [None]:
cv_df, summary_df = run_cv_models(models, X_prep, y)

In [None]:
cv_df

Unnamed: 0,model_name,k_fold,mcc,f1
0,SGDClassifier,0,0.282948,0.360934
1,SGDClassifier,1,0.292505,0.373661
2,SGDClassifier,2,0.28075,0.369131
3,SGDClassifier,3,0.270089,0.36302
4,SGDClassifier,4,0.281438,0.364703
5,LogisticRegression,0,0.303692,0.402552
6,LogisticRegression,1,0.291933,0.409736
7,LogisticRegression,2,0.303641,0.423564
8,LogisticRegression,3,0.291274,0.417513
9,LogisticRegression,4,0.291767,0.404479


In [None]:
summary_df

Unnamed: 0_level_0,mean_mcc,std_mcc,mean_f1,std_f1
model_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
LGBMClassifier,0.294995,0.006514,0.413016,0.01263
LinearSVC,0.294868,0.009239,0.398544,0.013892
LogisticRegression,0.296462,0.006582,0.411569,0.008862
MultinomialNB,0.293376,0.016691,0.405046,0.016636
RandomForestClassifier,0.260717,0.021511,0.384385,0.016601
SGDClassifier,0.281546,0.007969,0.36629,0.005106
XGBClassifier,0.259853,0.016893,0.37764,0.010656


In [None]:
mcc_perf = px.box(cv_df[['mcc', 'model_name']],
             facet_col='model_name', 
             boxmode="overlay", 
             color='model_name',
             title='Model Baseline Performance (MCC)', 
             labels={})
mcc_perf.update_xaxes(visible=False, showticklabels=False)
for annotation in mcc_perf.layout.annotations:
    annotation.text = annotation.text.split("=")[1]
mcc_perf.show()


f1_perf = px.box(cv_df[['f1', 'model_name']],
             facet_col='model_name', 
             boxmode="overlay", 
             color='model_name',
             title='Model Baseline Performance (F1-Macro)', 
             labels={})
f1_perf.update_xaxes(visible=False, showticklabels=False)
for annotation in f1_perf.layout.annotations:
    annotation.text = annotation.text.split("=")[1]
f1_perf.show()

In [None]:
mcc_perf.write_html("mcc_baseline.html")
f1_perf.write_html("f1_baseline.html")

Logistic Regression and LightGBM performs the best but across all classifiers, the best average F1-score is between 0.36 - 0.41.

In [None]:
#cv_df.to_pickle('cv_results_df')

## Building Pipeline

1. Text pipeline for processing steps: vectorizer
2. Model pipeline: with different algorithms and hyperparameters

In [None]:
print(X_train_prep.shape, X_val_prep.shape)

(8454, 300) (2114, 300)


In [None]:
def run_grid_search(pipeline, params, X_train, y_train, X_val, y_val):
  stratified_kfold = StratifiedKFold(n_splits=3,
                                       shuffle=True,
                                       random_state=11)
  start_time = time.time()
  gs = GridSearchCV(pipeline, 
                    param_grid=params, 
                    cv=stratified_kfold, 
                    scoring='f1_macro',
                    verbose=1, 
                    n_jobs=-1)
  gs.fit(X_train, y_train)
  runtime = time.time() - start_time
  print("Runtime: %s seconds" % runtime)

  # examine the best model
  best_score = gs.best_score_   # mean score for the best estimator
  best_params = gs.best_params_
  print('Best Score:', best_score) 
  print('Best Params:', best_params)
  
  # predict using best model
  train_pred = gs.predict(X_train)
  val_pred = gs.predict(X_val)

  # print eval metrics
  train_mcc = matthews_corrcoef(y_train, train_pred)
  val_mcc = matthews_corrcoef(y_val, val_pred)
  train_f1 = f1_score(y_train, train_pred, average='macro')
  val_f1 = f1_score(y_val, val_pred, average='macro')

  print('train MCC:', train_mcc)
  print('val MCC:', val_mcc)
  print('train F1-score', train_f1)
  print('val F1-score', val_f1)
  
  print(classification_report(y_val, val_pred))

  # return best model and eval metrics
  summary_df = pd.DataFrame([[pipeline['clf'], runtime, best_score, best_params, train_mcc, val_mcc, train_f1, val_f1]], 
                            columns=["model", "runtime", "best_score", "best_params",  "train_mcc", "val_mcc", "train_f1", "val_f1"])
  return gs, summary_df

## Logistic Regression

In [None]:
# without smote
logreg_pipe = Pipeline([
    ('tfidf', TfidfVectorizer(use_idf=True)),
#('smote', SMOTE(random_state=12)),
    ('clf', LogisticRegression(random_state=1)),
    ])
logreg_params = {'clf__C': [0.01, 0.1, 1.0, 10, 100],
                 'clf__class_weight': ['balanced', None],
                 'clf__multi_class': ['ovr', 'multinomial'],
                 'clf__max_iter':[100, 500, 1000]
                 }
logreg_gs_model, logreg_gs_df = run_grid_search(logreg_pipe, logreg_params, X_train, y_train, X_val, y_val)

Fitting 3 folds for each of 60 candidates, totalling 180 fits
Runtime: 152.66362619400024 seconds
Best Score: 0.4723295184382972
Best Params: {'clf__C': 1.0, 'clf__class_weight': 'balanced', 'clf__max_iter': 100, 'clf__multi_class': 'ovr'}
train MCC: 0.6843503949088312
val MCC: 0.37746240745120896
train F1-score 0.7920008409124011
val F1-score 0.4984583323714881
              precision    recall  f1-score   support

           1       0.50      0.58      0.54        85
           2       0.34      0.44      0.38       102
           3       0.33      0.39      0.36       229
           4       0.46      0.46      0.46       608
           5       0.79      0.72      0.75      1090

    accuracy                           0.59      2114
   macro avg       0.48      0.52      0.50      2114
weighted avg       0.61      0.59      0.60      2114



In [None]:
logreg_gs_df

Unnamed: 0,model,runtime,best_score,best_params,train_mcc,val_mcc,train_f1,val_f1
0,LogisticRegression(random_state=1),152.663626,0.47233,"{'clf__C': 1.0, 'clf__class_weight': 'balanced...",0.68435,0.377462,0.792001,0.498458


In [None]:
# save logreg model without smote
# joblib.dump(logreg_gs_model.best_estimator_, 'logreg.pkl', compress = 1)

['logreg.pkl']

In [None]:
# with smote
logreg_pipe = Pipeline([
    ('tfidf', TfidfVectorizer(use_idf=True)),
    ('smote', SMOTE(random_state=12)),
    ('clf', LogisticRegression(random_state=1)),
    ])
logreg_params = {'clf__C': [0.01, 0.1, 1.0, 10, 100],
                 'clf__class_weight': ['balanced', None],
                 'clf__multi_class': ['ovr', 'multinomial'],
                 'clf__max_iter':[100, 500, 1000]
                 }
logreg_gs_model, logreg_gs_df = run_grid_search(logreg_pipe, logreg_params, X_train, y_train, X_val, y_val)

Fitting 3 folds for each of 60 candidates, totalling 180 fits
Runtime: 576.7345151901245 seconds
Best Score: 0.45980180146412764
Best Params: {'clf__C': 1.0, 'clf__class_weight': 'balanced', 'clf__max_iter': 100, 'clf__multi_class': 'ovr'}
train MCC: 0.7015687939896564
val MCC: 0.36081767488264693
train F1-score 0.8267896535120627
val F1-score 0.4847953050268082
              precision    recall  f1-score   support

           1       0.48      0.59      0.53        85
           2       0.32      0.43      0.37       102
           3       0.31      0.37      0.34       229
           4       0.45      0.43      0.44       608
           5       0.78      0.73      0.75      1090

    accuracy                           0.58      2114
   macro avg       0.47      0.51      0.48      2114
weighted avg       0.60      0.58      0.59      2114



In [None]:
logreg_gs_df

Unnamed: 0,model,runtime,best_score,best_params,train_mcc,val_mcc,train_f1,val_f1
0,LogisticRegression(random_state=1),576.734515,0.459802,"{'clf__C': 1.0, 'clf__class_weight': 'balanced...",0.701569,0.360818,0.82679,0.484795


The logistic regression model performed worst with SMOTE applied and the runtime was 4x as long.

## LightGBM

In [None]:
# lighGBM without smote
lgbm_pipe = Pipeline([
    ('tfidf', TfidfVectorizer(use_idf=True)),
    # ('smote', SMOTE(random_state=12)),
    ('clf', LGBMClassifier(objective='multiclass', random_state=12)),
    ])

param_grid = {
    'clf__reg_alpha': [0.1, 0.5],
    'clf__min_data_in_leaf': [30, 50, 100],
    'clf__lambda_l1': [0, 1, 1.5],
    'clf__lambda_l2': [0, 1]
    }
lgbm_gs_model, lgbm_gs_df  = run_grid_search(lgbm_pipe, param_grid, X_train, y_train, X_val, y_val)

Fitting 3 folds for each of 36 candidates, totalling 108 fits
Runtime: 986.9799633026123 seconds
Best Score: 0.4107158701619809
Best Params: {'clf__lambda_l1': 1.5, 'clf__lambda_l2': 0, 'clf__min_data_in_leaf': 30, 'clf__reg_alpha': 0.1}
train MCC: 0.7644863913888313
val MCC: 0.31716935241717087
train F1-score 0.8609484864052362
val F1-score 0.4299328603434146
              precision    recall  f1-score   support

           1       0.65      0.39      0.49        85
           2       0.41      0.17      0.24       102
           3       0.37      0.20      0.26       229
           4       0.44      0.39      0.41       608
           5       0.68      0.85      0.76      1090

    accuracy                           0.59      2114
   macro avg       0.51      0.40      0.43      2114
weighted avg       0.56      0.59      0.57      2114



In [None]:
# save lgbm model without smote
# joblib.dump(lgbm_gs_model.best_estimator_, 'lgbm.pkl', compress = 1)

['lgbm.pkl']

In [None]:
# lighGBM smote
lgbm_pipe = Pipeline([
    ('tfidf', TfidfVectorizer(use_idf=True)),
    ('smote', SMOTE(random_state=12)),
    ('clf', LGBMClassifier(objective='multiclass', random_state=12)),
    ])

param_grid = {
    'clf__reg_alpha': [0.1, 0.5],
    'clf__min_data_in_leaf': [30, 50, 100],
    'clf__lambda_l1': [0, 1, 1.5],
    'clf__lambda_l2': [0, 1]
    }
lgbm_gs_model, lgbm_gs_df  = run_grid_search(lgbm_pipe, param_grid, X_train, y_train, X_val, y_val)

Fitting 3 folds for each of 36 candidates, totalling 108 fits
Runtime: 4404.966264247894 seconds
Best Score: 0.4479004520666376
Best Params: {'clf__lambda_l1': 0, 'clf__lambda_l2': 0, 'clf__min_data_in_leaf': 100, 'clf__reg_alpha': 0.1}
train MCC: 0.6791590821655314
val MCC: 0.33694630391444114
train F1-score 0.805666277622229
val F1-score 0.46120660155587123
              precision    recall  f1-score   support

           1       0.53      0.49      0.51        85
           2       0.34      0.27      0.30       102
           3       0.35      0.27      0.30       229
           4       0.45      0.43      0.44       608
           5       0.72      0.78      0.75      1090

    accuracy                           0.59      2114
   macro avg       0.48      0.45      0.46      2114
weighted avg       0.57      0.59      0.58      2114

