# Tennis Match Prediction Using Neural Networks

## Importing Required Libraries

In [35]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import train_test_split

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam, SGD

from sklearn.model_selection import GridSearchCV

from sklearn.metrics import classification_report

import itertools

import warnings
warnings.filterwarnings('ignore')

## Loading and Exploring the Dataset
This code loads the tennis match data from a CSV file and displays the first few rows to get an initial view of the dataset.

In [2]:
df = pd.read_csv('matches.csv')
df.head()

Unnamed: 0,Tournament,Date,Series,Court,Surface,Round,Best of,Player_1,Player_2,Winner,Rank_1,Rank_2,Pts_1,Pts_2,Odd_1,Odd_2,Score
0,Australian Hardcourt Championships,2000-01-03,International,Outdoor,Hard,1st Round,3,Dosedel S.,Ljubicic I.,Dosedel S.,63,77,-1,-1,-1.0,-1.0,6-4 6-2
1,Australian Hardcourt Championships,2000-01-03,International,Outdoor,Hard,1st Round,3,Clement A.,Enqvist T.,Enqvist T.,56,5,-1,-1,-1.0,-1.0,3-6 3-6
2,Australian Hardcourt Championships,2000-01-03,International,Outdoor,Hard,1st Round,3,Escude N.,Baccanello P.,Escude N.,40,655,-1,-1,-1.0,-1.0,6-7 7-5 6-3
3,Australian Hardcourt Championships,2000-01-03,International,Outdoor,Hard,1st Round,3,Knippschild J.,Federer R.,Federer R.,87,65,-1,-1,-1.0,-1.0,1-6 4-6
4,Australian Hardcourt Championships,2000-01-03,International,Outdoor,Hard,1st Round,3,Fromberg R.,Woodbridge T.,Fromberg R.,81,198,-1,-1,-1.0,-1.0,7-6 5-7 6-4


## Handling Missing Values
Here we identify missing values (represented as -1 or -1.0) in the dataset and count how many rows contain missing values.

In [3]:
missing_values = df.isin([-1, -1.0]).sum()

rows_with_missing_values = df[(df == -1).any(axis=1) | (df == -1.0).any(axis=1)]

missing_values, len(rows_with_missing_values)

(Tournament        0
 Date              0
 Series            0
 Court             0
 Surface           0
 Round             0
 Best of           0
 Player_1          0
 Player_2          0
 Winner            0
 Rank_1           14
 Rank_2           12
 Pts_1         15652
 Pts_2         15653
 Odd_1          3782
 Odd_2          3780
 Score             0
 dtype: int64,
 15822)

## Data Cleaning and Feature Engineering
In this section, we clean the data by replacing missing values, drop unnecessary columns, encode the winner, and create new features based on the differences between players' ranks, points, and odds.

In [4]:
df['Pts_1'].replace(-1, 0, inplace=True)
df['Pts_2'].replace(-1, 0, inplace=True)
df['Odd_1'].replace(-1.0, 1.0, inplace=True)
df['Odd_2'].replace(-1.0, 1.0, inplace=True)

In [5]:
df.drop(['Score'], axis=1, inplace=True)

df['Winner'] = np.where(df['Winner'] == df['Player_1'], 1, 0)

df['Rank_Diff'] = df['Rank_1'] - df['Rank_2']
df['Pts_Diff'] = df['Pts_1'] - df['Pts_2']
df['Odds_Diff'] = df['Odd_1'] - df['Odd_2']

In [None]:
df.to_csv('matches_final.csv', index=False)

## Encoding Categorical Variables
Here we encode categorical variables using LabelEncoder and convert the date to a numeric format.

In [6]:
le = LabelEncoder()
df['Tournament'] = le.fit_transform(df['Tournament'])
df['Series'] = le.fit_transform(df['Series'])
df['Court'] = le.fit_transform(df['Court'])
df['Surface'] = le.fit_transform(df['Surface'])
df['Round'] = le.fit_transform(df['Round'])
df['Player_1'] = le.fit_transform(df['Player_1'])
df['Player_2'] = le.fit_transform(df['Player_2'])

In [7]:
df['Date'] = pd.to_datetime(df['Date'])
df['Date'] = df['Date'].astype(int) // 10**9
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 63751 entries, 0 to 63750
Data columns (total 19 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Tournament  63751 non-null  int64  
 1   Date        63751 non-null  int64  
 2   Series      63751 non-null  int64  
 3   Court       63751 non-null  int64  
 4   Surface     63751 non-null  int64  
 5   Round       63751 non-null  int64  
 6   Best of     63751 non-null  int64  
 7   Player_1    63751 non-null  int64  
 8   Player_2    63751 non-null  int64  
 9   Winner      63751 non-null  int64  
 10  Rank_1      63751 non-null  int64  
 11  Rank_2      63751 non-null  int64  
 12  Pts_1       63751 non-null  int64  
 13  Pts_2       63751 non-null  int64  
 14  Odd_1       63751 non-null  float64
 15  Odd_2       63751 non-null  float64
 16  Rank_Diff   63751 non-null  int64  
 17  Pts_Diff    63751 non-null  int64  
 18  Odds_Diff   63751 non-null  float64
dtypes: float64(3), int64(16)


## Feature Scaling
We use StandardScaler to normalize numerical features, which helps in improving the model's performance.

In [9]:
scaler = StandardScaler()
df[['Date', 'Rank_1', 'Rank_2', 'Pts_1', 'Pts_2', 'Odd_1', 'Odd_2', 'Rank_Diff', 'Pts_Diff', 'Odds_Diff']] = scaler.fit_transform(df[['Date', 'Rank_1', 'Rank_2', 'Pts_1', 'Pts_2', 'Odd_1', 'Odd_2', 'Rank_Diff', 'Pts_Diff', 'Odds_Diff']])

## Splitting the Dataset
The data is split into training and testing sets, with 80% used for training and 20% for testing.

In [10]:
X = df.drop(columns=['Winner'])
y = df['Winner']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Building and Training Initial Models
We build and train three different neural network models, each with slight variations, to compare their performance.

In [11]:
model_1 = Sequential()
model_1.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model_1.add(Dense(32, activation='relu'))
model_1.add(Dense(1, activation='sigmoid'))

model_1.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model_1.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - accuracy: 0.5777 - loss: 5.1280 - val_accuracy: 0.6178 - val_loss: 0.8007
Epoch 2/10
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.6194 - loss: 0.8294 - val_accuracy: 0.6294 - val_loss: 0.7424
Epoch 3/10
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.6321 - loss: 0.7735 - val_accuracy: 0.6574 - val_loss: 0.6349
Epoch 4/10
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.6312 - loss: 0.7617 - val_accuracy: 0.6641 - val_loss: 0.6268
Epoch 5/10
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - accuracy: 0.6328 - loss: 0.7252 - val_accuracy: 0.6527 - val_loss: 0.6207
Epoch 6/10
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.6358 - loss: 0.6819 - val_accuracy: 0.6145 - val_loss: 0.7167
Epoch 7/10
[1m

<keras.src.callbacks.history.History at 0x79d4a78023b0>

In [12]:
_, accuracy = model_1.evaluate(X_test, y_test)
print('Accuracy: %.2f' % (accuracy*100))

y_pred = model_1.predict(X_test)
y_pred = (y_pred > 0.5)

print(classification_report(y_test, y_pred))

[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.6592 - loss: 0.6033
Accuracy: 66.51
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
              precision    recall  f1-score   support

           0       0.63      0.79      0.70      6343
           1       0.72      0.54      0.62      6408

    accuracy                           0.67     12751
   macro avg       0.68      0.67      0.66     12751
weighted avg       0.68      0.67      0.66     12751



In [18]:
model_2 = Sequential()
model_2.add(Dense(64, input_dim=X_train.shape[1], activation='relu', kernel_regularizer=l2(0.001)))
model_2.add(Dense(32, activation='relu'))
model_2.add(Dense(1, activation='sigmoid'))

model_2.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model_2.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.5706 - loss: 2.1208 - val_accuracy: 0.6487 - val_loss: 0.6986
Epoch 2/10
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.6169 - loss: 0.8382 - val_accuracy: 0.6461 - val_loss: 0.6910
Epoch 3/10
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.6159 - loss: 0.8075 - val_accuracy: 0.6447 - val_loss: 0.6552
Epoch 4/10
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.6201 - loss: 0.7594 - val_accuracy: 0.6018 - val_loss: 0.7720
Epoch 5/10
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.6325 - loss: 0.7160 - val_accuracy: 0.6599 - val_loss: 0.6359
Epoch 6/10
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.6300 - loss: 0.7121 - val_accuracy: 0.6266 - val_loss: 0.7185
Epoch 7/10
[1m1594/1594[0

<keras.src.callbacks.history.History at 0x79d42862a830>

In [19]:
_, accuracy = model_2.evaluate(X_test, y_test)
print('Accuracy: %.2f' % (accuracy*100))

y_pred = model_2.predict(X_test)
y_pred = (y_pred > 0.5)

print(classification_report(y_test, y_pred))

[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.6335 - loss: 0.6498
Accuracy: 63.65
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
              precision    recall  f1-score   support

           0       0.59      0.88      0.71      6343
           1       0.77      0.39      0.52      6408

    accuracy                           0.64     12751
   macro avg       0.68      0.64      0.61     12751
weighted avg       0.68      0.64      0.61     12751



In [22]:
model_3 = Sequential()
model_3.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model_3.add(Dense(32, activation='relu'))
model_3.add(Dense(1, activation='sigmoid'))

model_3.compile(loss='binary_crossentropy', optimizer=Adam(learning_rate=0.0001), metrics=['accuracy'])
model_3.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/10
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.5194 - loss: 10.0115 - val_accuracy: 0.6037 - val_loss: 0.7147
Epoch 2/10
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.6056 - loss: 0.7094 - val_accuracy: 0.5864 - val_loss: 0.7583
Epoch 3/10
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - accuracy: 0.6332 - loss: 0.6555 - val_accuracy: 0.6436 - val_loss: 0.6326
Epoch 4/10
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - accuracy: 0.6357 - loss: 0.6501 - val_accuracy: 0.6393 - val_loss: 0.6402
Epoch 5/10
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.6478 - loss: 0.6377 - val_accuracy: 0.6385 - val_loss: 0.6495
Epoch 6/10
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.6494 - loss: 0.6351 - val_accuracy: 0.6613 - val_loss: 0.6099
Epoch 7/10
[1m

<keras.src.callbacks.history.History at 0x79d428471bd0>

In [23]:
_, accuracy = model_3.evaluate(X_test, y_test)
print('Accuracy: %.2f' % (accuracy*100))

y_pred = model_3.predict(X_test)
y_pred = (y_pred > 0.5)

print(classification_report(y_test, y_pred))

[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.6667 - loss: 0.5986
Accuracy: 67.31
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
              precision    recall  f1-score   support

           0       0.67      0.68      0.67      6343
           1       0.68      0.67      0.67      6408

    accuracy                           0.67     12751
   macro avg       0.67      0.67      0.67     12751
weighted avg       0.67      0.67      0.67     12751



## Hyperparameter Tuning
This section performs a grid search to find the best hyperparameters for our neural network model.

In [39]:
GRID_SEARCH_PARAMS = {
    'neurons': [32, 64],
    'learning_rate': [0.001, 0.01, 0.1],
    'batch_size': [32, 64],
    'epochs': [10, 15, 20],
    'optimizer': ['adam', 'sgd'],
}

def create_model(neurons, learning_rate, optimizer):
    model = Sequential()
    model.add(Dense(neurons, input_dim=X_train.shape[1], activation='relu'))
    model.add(Dense(neurons, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    if optimizer == 'adam':
        opt = Adam(learning_rate=learning_rate)
    else:
        opt = SGD(learning_rate=learning_rate)

    model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
    return model

best_accuracy = 0
best_params = {}

keys, values = zip(*GRID_SEARCH_PARAMS.items())
combinations = [dict(zip(keys, v)) for v in itertools.product(*values)]

print(f"Total combinations: {len(combinations)}")

for params in combinations:
    print(f"Testing combination: {params}")

    model = create_model(
        neurons=params['neurons'],
        learning_rate=params['learning_rate'],
        optimizer=params['optimizer']
    )

    model.fit(X_train, y_train, epochs=params['epochs'], batch_size=params['batch_size'], verbose=0)

    _, accuracy = model.evaluate(X_test, y_test, verbose=0)

    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_params = params
        print(f"New best accuracy: {best_accuracy:.4f}")

print(f"Best Accuracy: {best_accuracy:.4f}")
print(f"Best Params: {best_params}")


Total combinations: 72
Testing combination: {'neurons': 32, 'learning_rate': 0.001, 'batch_size': 32, 'epochs': 10, 'optimizer': 'adam'}
New best accuracy: 0.6654
Testing combination: {'neurons': 32, 'learning_rate': 0.001, 'batch_size': 32, 'epochs': 10, 'optimizer': 'sgd'}
Testing combination: {'neurons': 32, 'learning_rate': 0.001, 'batch_size': 32, 'epochs': 15, 'optimizer': 'adam'}
Testing combination: {'neurons': 32, 'learning_rate': 0.001, 'batch_size': 32, 'epochs': 15, 'optimizer': 'sgd'}
Testing combination: {'neurons': 32, 'learning_rate': 0.001, 'batch_size': 32, 'epochs': 20, 'optimizer': 'adam'}
New best accuracy: 0.6922
Testing combination: {'neurons': 32, 'learning_rate': 0.001, 'batch_size': 32, 'epochs': 20, 'optimizer': 'sgd'}
Testing combination: {'neurons': 32, 'learning_rate': 0.001, 'batch_size': 64, 'epochs': 10, 'optimizer': 'adam'}
Testing combination: {'neurons': 32, 'learning_rate': 0.001, 'batch_size': 64, 'epochs': 10, 'optimizer': 'sgd'}
Testing combinati

## Training the Best Model
We create and train a model using the best hyperparameters found during the grid search

In [40]:
best_accuracy

0.6921809911727905

In [41]:
best_params

{'neurons': 32,
 'learning_rate': 0.001,
 'batch_size': 32,
 'epochs': 20,
 'optimizer': 'adam'}

In [43]:
best_model = create_model(
    neurons=best_params['neurons'],
    learning_rate=best_params['learning_rate'],
    optimizer=best_params['optimizer']
)

best_model.fit(X_train, y_train, epochs=best_params['epochs'], batch_size=best_params['batch_size'], validation_data=(X_test, y_test))

Epoch 1/20
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - accuracy: 0.5615 - loss: 2.5859 - val_accuracy: 0.6277 - val_loss: 0.7609
Epoch 2/20
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 2ms/step - accuracy: 0.6188 - loss: 0.8492 - val_accuracy: 0.6226 - val_loss: 0.7316
Epoch 3/20
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - accuracy: 0.6267 - loss: 0.7923 - val_accuracy: 0.6393 - val_loss: 0.6774
Epoch 4/20
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.6295 - loss: 0.7836 - val_accuracy: 0.6604 - val_loss: 0.6286
Epoch 5/20
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.6356 - loss: 0.7387 - val_accuracy: 0.6570 - val_loss: 0.6369
Epoch 6/20
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.6270 - loss: 0.7423 - val_accuracy: 0.6298 - val_loss: 0.6727
Epoch 7/20
[1m1

<keras.src.callbacks.history.History at 0x79d37c56e500>

In [44]:
_, accuracy = best_model.evaluate(X_test, y_test)
print('Accuracy: %.2f' % (accuracy*100))

y_pred = best_model.predict(X_test)
y_pred = (y_pred > 0.5)

print(classification_report(y_test, y_pred))

[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6804 - loss: 0.5855
Accuracy: 68.11
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
              precision    recall  f1-score   support

           0       0.71      0.60      0.65      6343
           1       0.66      0.76      0.71      6408

    accuracy                           0.68     12751
   macro avg       0.69      0.68      0.68     12751
weighted avg       0.69      0.68      0.68     12751



## Extended Training and Final Evaluation
Finally, we train the best model for an extended number of epochs and evaluate its performance on the test set.

In [50]:
# 50 epochs test
best_model.fit(X_train, y_train, epochs=50, batch_size=best_params['batch_size'], validation_data=(X_test, y_test))

Epoch 1/50
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.6879 - loss: 0.5794 - val_accuracy: 0.6792 - val_loss: 0.5921
Epoch 2/50
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.6821 - loss: 0.5822 - val_accuracy: 0.6841 - val_loss: 0.5878
Epoch 3/50
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.6883 - loss: 0.5791 - val_accuracy: 0.6876 - val_loss: 0.5819
Epoch 4/50
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.6893 - loss: 0.5768 - val_accuracy: 0.6842 - val_loss: 0.5868
Epoch 5/50
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 2ms/step - accuracy: 0.6840 - loss: 0.5819 - val_accuracy: 0.6893 - val_loss: 0.5856
Epoch 6/50
[1m1594/1594[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.6885 - loss: 0.5784 - val_accuracy: 0.6871 - val_loss: 0.5844
Epoch 7/50
[1m1

<keras.src.callbacks.history.History at 0x79d3f8d6e860>

In [51]:
_, accuracy = best_model.evaluate(X_test, y_test)
print('Accuracy: %.2f' % (accuracy*100))

y_pred = best_model.predict(X_test)
y_pred = (y_pred > 0.5)

print(classification_report(y_test, y_pred))

[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6878 - loss: 0.5846
Accuracy: 69.02
[1m399/399[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
              precision    recall  f1-score   support

           0       0.68      0.70      0.69      6343
           1       0.70      0.68      0.69      6408

    accuracy                           0.69     12751
   macro avg       0.69      0.69      0.69     12751
weighted avg       0.69      0.69      0.69     12751

