In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer

nltk.download('stopwords')
nltk.download('wordnet')

# Load the dataset
data = pd.read_csv('balanced_dataset_50000.csv')

# Drop rows with NaN values
data = data.dropna()

# Preprocess the dataset
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'\d+', '', text)  # Remove numbers
    text = re.sub(r'\s+', ' ', text)  # Remove extra spaces
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
    text = ' '.join([WordNetLemmatizer().lemmatize(word) for word in text.split() if word not in stopwords.words('english')])
    return text

# Preprocess the dataset
X = data['comment']
y = data['label']

# Encode labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to strings to handle potential float values
X_train = X_train.astype(str)
X_test = X_test.astype(str)

# Tokenize and pad sequences
max_words = 10000  # Maximum number of words to keep based on frequency
maxlen = 100  # Maximum length of sequences

tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X_train)

X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

X_train_pad = pad_sequences(X_train_seq, maxlen=maxlen)
X_test_pad = pad_sequences(X_test_seq, maxlen=maxlen)

# TF-IDF Vectorization
vectorizer = TfidfVectorizer(max_features=5000, ngram_range=(1, 2))
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\haree\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\haree\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


# Using BERT for Feature Extraction

In [None]:
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, classification_report
from transformers import BertTokenizer, BertModel
import torch
from torch.utils.data import DataLoader
from torch.cuda.amp import autocast

# Assume X_train, X_test, y_train, y_test are your data arrays
# Tokenize and pad sequences
maxlen = 100  # Assuming a maximum sequence length
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Encode text data with BERT
def encode_texts(texts, batch_size=8):
    tokenized_texts = tokenizer(texts, padding=True, truncation=True, return_tensors='pt')
    dataset = torch.utils.data.TensorDataset(tokenized_texts['input_ids'], tokenized_texts['attention_mask'])
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
    
    # Load pre-trained BERT model
    model = BertModel.from_pretrained('bert-base-uncased')
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    model.to(device)
    model.eval()  # Set evaluation mode
    
    bert_outputs = []
    with torch.no_grad():
        for input_ids, attention_mask in dataloader:
            input_ids = input_ids.to(device)
            attention_mask = attention_mask.to(device)
            
            with autocast():
                outputs = model(input_ids, attention_mask=attention_mask)
            
            # Use the [CLS] token representation (first token)
            cls_token_embedding = outputs.last_hidden_state[:, 0, :]
            bert_outputs.append(cls_token_embedding.cpu().numpy())
    
    bert_embeddings = np.concatenate(bert_outputs, axis=0)
    return bert_embeddings

# TF-IDF Vectorization
vectorizer = TfidfVectorizer(max_features=5000, ngram_range=(1, 2))
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Reduce batch size during BERT encoding if needed
X_train_bert = encode_texts(X_train.tolist(), batch_size=4)
X_test_bert = encode_texts(X_test.tolist(), batch_size=4)

# Combine TF-IDF and BERT features
X_train_combined = np.hstack((X_train_tfidf.toarray(), X_train_bert))
X_test_combined = np.hstack((X_test_tfidf.toarray(), X_test_bert))

# Example of how to use the combined features for training and evaluation
# Replace this with your actual model training and evaluation code
# Assuming you have labels y_train, y_test
# Here, we just print some evaluation metrics as an example
print('Training data shape:', X_train_combined.shape)
print('Testing data shape:', X_test_combined.shape)

# Example classifier
from sklearn.linear_model import LogisticRegression

clf = LogisticRegression(max_iter=1000)
clf.fit(X_train_combined, y_train)

y_pred = clf.predict(X_test_combined)

print('Accuracy:', accuracy_score(y_test, y_pred))
print('Precision:', precision_score(y_test, y_pred))
print('Classification Report:')
print(classification_report(y_test, y_pred))




# Advanced Model Training and Evaluation

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, classification_report

# Random Forest Model
rf_model = RandomForestClassifier(n_estimators=200, max_depth=20, random_state=42)
rf_model.fit(X_train_combined, y_train)

# Training accuracy
train_predictions = rf_model.predict(X_train_combined)
train_accuracy = accuracy_score(y_train, train_predictions)
train_precision = precision_score(y_train, train_predictions)

# Test accuracy and precision
test_predictions = rf_model.predict(X_test_combined)
test_accuracy = accuracy_score(y_test, test_predictions)
test_precision = precision_score(y_test, test_predictions)

# Print results
print('Random Forest Model')
print(f'Training Accuracy: {train_accuracy * 100:.2f}%')
print(f'Training Precision: {train_precision * 100:.2f}%')
print(f'Test Accuracy: {test_accuracy * 100:.2f}%')
print(f'Test Precision: {test_precision * 100:.2f}%')
print('Classification Report:')
print(classification_report(y_test, test_predictions))


# Random Forest Model with Metrics Calculation

In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, classification_report

# Random Forest Model
rf_model = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
rf_model.fit(X_train_tfidf, y_train)

# Training accuracy
train_predictions = rf_model.predict(X_train_tfidf)
train_accuracy = accuracy_score(y_train, train_predictions)
train_precision = precision_score(y_train, train_predictions)

# Test accuracy and precision
test_predictions = rf_model.predict(X_test_tfidf)
test_accuracy = accuracy_score(y_test, test_predictions)
test_precision = precision_score(y_test, test_predictions)

# Print results
print('Random Forest Model')
print(f'Training Accuracy: {train_accuracy * 100:.2f}%')
print(f'Training Precision: {train_precision * 100:.2f}%')
print(f'Test Accuracy: {test_accuracy * 100:.2f}%')
print(f'Test Precision: {test_precision * 100:.2f}%')
print('Classification Report:')
print(classification_report(y_test, test_predictions))


Random Forest Model
Training Accuracy: 65.60%
Training Precision: 74.43%
Test Accuracy: 62.49%
Test Precision: 70.36%
Classification Report:
              precision    recall  f1-score   support

           0       0.59      0.81      0.68      4892
           1       0.70      0.45      0.55      5008

    accuracy                           0.62      9900
   macro avg       0.65      0.63      0.61      9900
weighted avg       0.65      0.62      0.61      9900



# SVM Model with Metrics Calculation

In [3]:
#IMPROVED



from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, classification_report

# TF-IDF Vectorization
vectorizer = TfidfVectorizer(max_features=5000, ngram_range=(1, 2))
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# SVM Model with GridSearchCV for hyperparameter tuning
param_grid = {
    'C': [0.1, 1, 10],  # Regularization parameter
    'kernel': ['linear', 'rbf']  # Kernel type
}

# Number of cross-validation folds (epochs)
num_epochs = 5

svm_model = GridSearchCV(SVC(random_state=42), param_grid, cv=num_epochs, scoring='accuracy')
svm_model.fit(X_train_tfidf, y_train)

# Best parameters and best score from grid search
print("Best Parameters: ", svm_model.best_params_)
print("Best CV Accuracy: {:.2f}%".format(svm_model.best_score_ * 100))

# Training accuracy and precision
train_predictions_svm = svm_model.predict(X_train_tfidf)
train_accuracy_svm = accuracy_score(y_train, train_predictions_svm)
train_precision_svm = precision_score(y_train, train_predictions_svm)

# Test accuracy and precision
test_predictions_svm = svm_model.predict(X_test_tfidf)
test_accuracy_svm = accuracy_score(y_test, test_predictions_svm)
test_precision_svm = precision_score(y_test, test_predictions_svm)

# Print results
print('\nSVM Model with Tuning and Epochs (Cross-Validation Folds)')
print(f'Training Accuracy: {train_accuracy_svm * 100:.2f}%')
print(f'Training Precision: {train_precision_svm * 100:.2f}%')
print(f'Test Accuracy: {test_accuracy_svm * 100:.2f}%')
print(f'Test Precision: {test_precision_svm * 100:.2f}%')
print('Classification Report:')
print(classification_report(y_test, test_predictions_svm))


Best Parameters:  {'C': 1, 'kernel': 'rbf'}
Best CV Accuracy: 64.94%

SVM Model with Tuning and Epochs (Cross-Validation Folds)
Training Accuracy: 88.35%
Training Precision: 90.35%
Test Accuracy: 65.22%
Test Precision: 67.61%
Classification Report:
              precision    recall  f1-score   support

           0       0.63      0.71      0.67      4892
           1       0.68      0.60      0.64      5008

    accuracy                           0.65      9900
   macro avg       0.65      0.65      0.65      9900
weighted avg       0.65      0.65      0.65      9900



# logistic regression 

In [4]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, classification_report
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

# Define a pipeline with StandardScaler and LogisticRegression
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('lr', LogisticRegression(max_iter=1000, random_state=42))
])

# Define the parameter grid for GridSearchCV
param_grid = {
    'lr__C': [0.001, 0.01, 0.1, 1, 10, 100],  # Regularization parameter
    'lr__penalty': ['l1', 'l2']  # Penalty norm
}

# Perform GridSearchCV to find the best parameters
grid_search = GridSearchCV(pipeline, param_grid, cv=5, scoring='accuracy', verbose=1)
grid_search.fit(X_train_pad, y_train)

# Print the best parameters found by GridSearchCV
print("Best parameters:", grid_search.best_params_)
print()

# Evaluate the model with best parameters
best_lr_model = grid_search.best_estimator_
y_pred_lr = best_lr_model.predict(X_test_pad)

# Print evaluation metrics in percentage format
print('Logistic Regression Model')
print(f'Accuracy: {accuracy_score(y_test, y_pred_lr) * 100:.2f}%')
print(f'Precision: {precision_score(y_test, y_pred_lr) * 100:.2f}%')
print('Classification Report:')
print(classification_report(y_test, y_pred_lr))

Fitting 5 folds for each of 12 candidates, totalling 60 fits
Best parameters: {'lr__C': 0.001, 'lr__penalty': 'l2'}

Logistic Regression Model
Accuracy: 52.34%
Precision: 51.91%
Classification Report:
              precision    recall  f1-score   support

           0       0.54      0.26      0.35      4892
           1       0.52      0.79      0.63      5008

    accuracy                           0.52      9900
   macro avg       0.53      0.52      0.49      9900
weighted avg       0.53      0.52      0.49      9900



30 fits failed out of a total of 60.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
30 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\haree\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\haree\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\base.py", line 1474, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\haree\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\pipeline.py", line 475, in fit
    self._final_estimator.fit(Xt, y, **last_s

# LSTM

In [5]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from sklearn.metrics import accuracy_score, precision_score, classification_report

# Define and compile the LSTM model
lstm_model = Sequential([
    Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=128),
    LSTM(128, dropout=0.2, recurrent_dropout=0.2),
    Dense(1, activation='sigmoid')
])
lstm_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the LSTM model
history = lstm_model.fit(X_train_pad, y_train, epochs=10, batch_size=64, validation_split=0.1)

# Evaluate the LSTM model
y_pred_lstm = (lstm_model.predict(X_test_pad) > 0.5).astype("int32")

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred_lstm)
precision = precision_score(y_test, y_pred_lstm)

# Print results in percentage
print('LSTM Model')
print(f'Accuracy: {accuracy * 100:.2f}%')
print(f'Precision: {precision * 100:.2f}%')
print('Classification Report:')
print(classification_report(y_test, y_pred_lstm))


Epoch 1/10
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 71ms/step - accuracy: 0.5844 - loss: 0.6650 - val_accuracy: 0.6684 - val_loss: 0.6062
Epoch 2/10
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 71ms/step - accuracy: 0.7210 - loss: 0.5502 - val_accuracy: 0.6518 - val_loss: 0.6266
Epoch 3/10
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 71ms/step - accuracy: 0.7785 - loss: 0.4735 - val_accuracy: 0.6442 - val_loss: 0.6670
Epoch 4/10
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 71ms/step - accuracy: 0.8049 - loss: 0.4214 - val_accuracy: 0.6354 - val_loss: 0.7370
Epoch 5/10
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 71ms/step - accuracy: 0.8298 - loss: 0.3675 - val_accuracy: 0.6313 - val_loss: 0.8380
Epoch 6/10
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 71ms/step - accuracy: 0.8522 - loss: 0.3167 - val_accuracy: 0.6268 - val_loss: 0.9569
Epoch 7/10
[1m5

# NEURAL NETWORK

In [6]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.metrics import accuracy_score, precision_score, classification_report

# Define Neural Network Model
nn_model = Sequential([
    Dense(512, activation='relu', input_shape=(maxlen,)),
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

# Compile the model
nn_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
history = nn_model.fit(X_train_pad, y_train, epochs=10, batch_size=64, validation_split=0.1)

# Evaluate the model
y_pred_nn = (nn_model.predict(X_test_pad) > 0.5).astype("int32")

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred_nn)
precision = precision_score(y_test, y_pred_nn)

# Print results in percentage
print('Neural Network Model')
print(f'Accuracy: {accuracy * 100:.2f}%')
print(f'Precision: {precision * 100:.2f}%')
print('Classification Report:')
print(classification_report(y_test, y_pred_nn))


Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.5002 - loss: 41.9204 - val_accuracy: 0.5119 - val_loss: 0.6985
Epoch 2/10
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.5064 - loss: 0.9433 - val_accuracy: 0.4939 - val_loss: 0.6935
Epoch 3/10
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.5071 - loss: 0.7586 - val_accuracy: 0.5051 - val_loss: 0.6932
Epoch 4/10
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.5095 - loss: 0.7129 - val_accuracy: 0.5033 - val_loss: 0.6927
Epoch 5/10
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.5116 - loss: 0.7130 - val_accuracy: 0.5038 - val_loss: 0.6918
Epoch 6/10
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.5126 - loss: 0.7043 - val_accuracy: 0.5038 - val_loss: 0.6913
Epoch 7/10
[1m557/557[0m [32m━━━━━━

# GRU MODEL

In [7]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense
from sklearn.metrics import accuracy_score, precision_score, classification_report

# Assuming you have X_train_pad, X_test_pad, y_train, and y_test ready

# Create the GRU model
gru_model = Sequential([
    Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=128),
    GRU(128, dropout=0.2, recurrent_dropout=0.2),
    Dense(1, activation='sigmoid')
])

# Compile the model
gru_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
epochs = 10  # Increased number of epochs
batch_size = 64
validation_split = 0.1

history = gru_model.fit(X_train_pad, y_train, epochs=epochs, batch_size=batch_size, validation_split=validation_split)

# Evaluate the model
y_pred_gru = (gru_model.predict(X_test_pad) > 0.5).astype("int32")

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred_gru)
precision = precision_score(y_test, y_pred_gru)

# Print results in percentage
print('GRU Model')
print(f'Accuracy: {accuracy * 100:.2f}%')
print(f'Precision: {precision * 100:.2f}%')
print('Classification Report:')
print(classification_report(y_test, y_pred_gru))


Epoch 1/10
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 73ms/step - accuracy: 0.5796 - loss: 0.6688 - val_accuracy: 0.6513 - val_loss: 0.6206
Epoch 2/10
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 72ms/step - accuracy: 0.7265 - loss: 0.5465 - val_accuracy: 0.6455 - val_loss: 0.6263
Epoch 3/10
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 72ms/step - accuracy: 0.7650 - loss: 2021.2390 - val_accuracy: 0.6033 - val_loss: 0.7230
Epoch 4/10
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 72ms/step - accuracy: 0.7468 - loss: 0.5163 - val_accuracy: 0.6045 - val_loss: 0.7281
Epoch 5/10
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 72ms/step - accuracy: 0.7776 - loss: 0.4743 - val_accuracy: 0.6056 - val_loss: 0.7433
Epoch 6/10
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 72ms/step - accuracy: 0.7986 - loss: 0.4480 - val_accuracy: 0.6063 - val_loss: 0.7576
Epoch 7/10
[

# DECISION TREE

In [8]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, classification_report

# Decision Tree Model
dt_model = DecisionTreeClassifier(random_state=42, max_depth=10, min_samples_split=5, min_samples_leaf=5)
dt_model.fit(X_train_pad, y_train)

# Evaluate the model
y_pred_dt = dt_model.predict(X_test_pad)

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred_dt)
precision = precision_score(y_test, y_pred_dt)

# Print results in percentage
print('Decision Tree Model')
print(f'Accuracy: {accuracy * 100:.2f}%')
print(f'Precision: {precision * 100:.2f}%')
print('Classification Report:')
print(classification_report(y_test, y_pred_dt))


Decision Tree Model
Accuracy: 54.57%
Precision: 54.12%
Classification Report:
              precision    recall  f1-score   support

           0       0.55      0.42      0.48      4892
           1       0.54      0.67      0.60      5008

    accuracy                           0.55      9900
   macro avg       0.55      0.54      0.54      9900
weighted avg       0.55      0.55      0.54      9900



# XG BOOST MODEL

In [9]:
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, precision_score, classification_report

# XGBoost Model with initial parameters
xgb_model = XGBClassifier(random_state=42)
xgb_model.fit(X_train_pad, y_train)

# Evaluate the model
y_pred_xgb = xgb_model.predict(X_test_pad)

# Calculate metrics
accuracy = accuracy_score(y_test, y_pred_xgb)
precision = precision_score(y_test, y_pred_xgb)

# Print results in percentage
print('XGBoost Model')
print(f'Accuracy: {accuracy * 100:.2f}%')
print(f'Precision: {precision * 100:.2f}%')
print('Classification Report:')
print(classification_report(y_test, y_pred_xgb))


XGBoost Model
Accuracy: 56.60%
Precision: 56.94%
Classification Report:
              precision    recall  f1-score   support

           0       0.56      0.55      0.56      4892
           1       0.57      0.58      0.58      5008

    accuracy                           0.57      9900
   macro avg       0.57      0.57      0.57      9900
weighted avg       0.57      0.57      0.57      9900

