In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report

In [2]:
csv_file_path = r'C:\Users\kadir\Desktop\FALL-3\DATA MINING\DATAPROJECT\Dataset\Dataset\VNL2023.csv'

df_vnl = pd.read_csv(csv_file_path)

In [3]:
print(df_vnl)

                          Player    Country  Age  Attack  Block  Serve   Set  \
0                  Ichikawa Yuki      Japan   28   15.80   1.13   1.40  0.07   
1                    Romano Yuri      Italy   26   12.33   1.07   1.47  0.00   
2               Abdel-Aziz Nimir  Nederland   31   15.33   0.67   2.08  0.00   
3            Herrera Jaime Jesus       Cuba   28   15.00   0.92   1.75  0.00   
4                  Takahashi Ran      Japan   22   11.53   0.67   1.00  0.07   
..                           ...        ...  ...     ...    ...    ...   ...   
126               Graven Leonard    Germany   19    0.00   0.00   0.00  0.00   
127                 Balaso Fabio      Italy   28    0.00   0.00   0.00  0.00   
128      Nacsimento Maique Reis      Brazil   26    0.00   0.00   0.00  0.00   
129  Garcia Alvarez Yonder Roman       Cuba   30    0.00   0.00   0.00  0.00   
130              Salparov Teodor   Bulgaria   41    0.00   0.00   0.00  0.00   

       Dig  Recieve  Position  
0     4

In [4]:
df_vnl.columns = ['Player', 'Country', 'Age', 'Attack', 'Block', 'Serve', 'Set', 'Dig', 'Recieve', 'Position']

In [5]:
df_vnl.head()

Unnamed: 0,Player,Country,Age,Attack,Block,Serve,Set,Dig,Recieve,Position
0,Ichikawa Yuki,Japan,28,15.8,1.13,1.4,0.07,4.8,5.6,OH
1,Romano Yuri,Italy,26,12.33,1.07,1.47,0.0,3.87,0.0,OP
2,Abdel-Aziz Nimir,Nederland,31,15.33,0.67,2.08,0.0,3.17,0.25,OP
3,Herrera Jaime Jesus,Cuba,28,15.0,0.92,1.75,0.0,3.33,0.17,OP
4,Takahashi Ran,Japan,22,11.53,0.67,1.0,0.07,6.4,5.07,OH


In [6]:
print(df_vnl.shape)
print(df_vnl.shape[0])
print(df_vnl.shape[1])

(131, 10)
131
10


In [7]:
label_encoder = LabelEncoder()

df_vnl['Player'] = label_encoder.fit_transform(df_vnl['Player'])
df_vnl['Country'] = label_encoder.fit_transform(df_vnl['Country'])
df_vnl['Position'] = label_encoder.fit_transform(df_vnl['Position'])

In [8]:
y = df_vnl['Position']


X = df_vnl.drop('Position', axis=1)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
stdsc = StandardScaler()
X_train_std = stdsc.fit_transform(X_train)
X_test_std = stdsc.transform(X_test)

In [10]:
def evaluate_model(model, model_name):
    model.fit(X_train_std, y_train)
    y_pred = model.predict(X_test_std)

    report = classification_report(y_test, y_pred)
    confusion_mat = confusion_matrix(y_test, y_pred)

    print(f"{model_name} Classification Report:")
    print(report)
    
    print(f"\n{model_name} Confusion Matrix:")
    print(confusion_mat)
    
    print()

# Evaluate KNN models with different neighbor values (3, 7, 11)
for n_neighbors in [3, 7, 11]:
    knn_classifier = KNeighborsClassifier(n_neighbors=n_neighbors, metric='euclidean')
    evaluate_model(knn_classifier, f"KNN-{n_neighbors}")


KNN-3 Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       0.75      0.86      0.80         7
           2       0.91      0.91      0.91        11
           3       0.67      0.50      0.57         4
           4       1.00      1.00      1.00         2

    accuracy                           0.85        27
   macro avg       0.87      0.85      0.86        27
weighted avg       0.85      0.85      0.85        27


KNN-3 Confusion Matrix:
[[ 3  0  0  0  0]
 [ 0  6  1  0  0]
 [ 0  0 10  1  0]
 [ 0  2  0  2  0]
 [ 0  0  0  0  2]]

KNN-7 Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       0.86      0.86      0.86         7
           2       0.83      0.91      0.87        11
           3       0.67      0.50      0.57         4
           4       1.00      1.00      1.00         2

    accura

In [12]:
# MLP model with 1 hidden layer (32 neurons)
mlp_classifier_1 = MLPClassifier(hidden_layer_sizes=(32,), max_iter=10000, random_state=42)
evaluate_model(mlp_classifier_1, "MLP 32")

# MLP model with 2 hidden layers (32 neurons each)
mlp_classifier_2 = MLPClassifier(hidden_layer_sizes=(32, 32), max_iter=10000, random_state=42)
evaluate_model(mlp_classifier_2, "MLP 32, 32")

# MLP model with 3 hidden layers (32 neurons each)
mlp_classifier_3 = MLPClassifier(hidden_layer_sizes=(32, 32, 32), max_iter=10000, random_state=42)
evaluate_model(mlp_classifier_3, "MLP 32, 32, 32")


MLP 32 Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       0.88      1.00      0.93         7
           2       1.00      0.91      0.95        11
           3       0.75      0.75      0.75         4
           4       1.00      1.00      1.00         2

    accuracy                           0.93        27
   macro avg       0.93      0.93      0.93        27
weighted avg       0.93      0.93      0.93        27


MLP 32 Confusion Matrix:
[[ 3  0  0  0  0]
 [ 0  7  0  0  0]
 [ 0  0 10  1  0]
 [ 0  1  0  3  0]
 [ 0  0  0  0  2]]

MLP 32, 32 Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       0.88      1.00      0.93         7
           2       1.00      0.91      0.95        11
           3       0.75      0.75      0.75         4
           4       1.00      1.00      1.00         2

   

In [15]:
# Naive Bayes model
nb_classifier = GaussianNB()
nb_classifier.fit(X_train_std, y_train)
y_pred_nb = nb_classifier.predict(X_test_std)

# Evaluate Naive Bayes model
evaluate_model(nb_classifier, "Naive Bayes")

Naive Bayes Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       0.70      1.00      0.82         7
           2       0.89      0.73      0.80        11
           3       0.33      0.25      0.29         4
           4       1.00      1.00      1.00         2

    accuracy                           0.78        27
   macro avg       0.78      0.80      0.78        27
weighted avg       0.78      0.78      0.77        27




In [16]:
# Evaluate KNN models with different neighbor values
def evaluate_model(model, model_name):
    model.fit(X_train_std, y_train)
    y_pred = model.predict(X_test_std)

    report = classification_report(y_test, y_pred)
    print(f"{model_name} Classification Report:")
    print(report)
    print()


for n_neighbors in [3, 7, 11]:
    knn_classifier = KNeighborsClassifier(n_neighbors=n_neighbors, metric='euclidean')
    evaluate_model(knn_classifier, f"KNN-{n_neighbors}")


# MLP with 1 hidden layer with 32 neurons
print("MLP (1 Hidden Layer) Classification Report:")
print(classification_report(y_test, y_pred_1))

# MLP with 2 hidden layers each of 32 neurons
print("MLP (2 Hidden Layers) Classification Report:")
print(classification_report(y_test, y_pred_2))

# MLP with 3 hidden layers each of 32 neurons
print("MLP (3 Hidden Layers) Classification Report:")
print(classification_report(y_test, y_pred_3))

# Naive Bayes
print("Naive Bayes Classification Report:")
print(classification_report(y_test, y_pred_nb))

KNN-3 Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       0.75      0.86      0.80         7
           2       0.91      0.91      0.91        11
           3       0.67      0.50      0.57         4
           4       1.00      1.00      1.00         2

    accuracy                           0.85        27
   macro avg       0.87      0.85      0.86        27
weighted avg       0.85      0.85      0.85        27


KNN-7 Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3
           1       0.86      0.86      0.86         7
           2       0.83      0.91      0.87        11
           3       0.67      0.50      0.57         4
           4       1.00      1.00      1.00         2

    accuracy                           0.85        27
   macro avg       0.87      0.85      0.86        27
weighted avg      