# Importação das Bibliotecas

In [66]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

# Importação da Base de Dados

In [67]:
players = pd.read_csv('players_classified.csv')
players.head()

Unnamed: 0,player_id,matches_count,wins_count,easy_wins_count,defeated_count,easy_defeated_count,tied,score,player_name,email,country,last_login,level
0,DA1X9864,24.0,9.0,4.0,15.0,9.0,0.0,0.375,Trisha Kessler,trisha_kessler@gmail.com,Myanmar (Burma),2020-11-12,advanced
1,SZ0S2637,22.0,14.0,10.0,8.0,6.0,0.0,0.636364,Tangela Graver,tangela_g@gmail.com,Marshall Islands,2020-12-11,expert
2,QX0H6977,20.0,11.0,7.0,8.0,7.0,1.0,0.566667,Eliseo Ewald,eliseo_ewald@gmail.com,Italy,2020-12-20,expert
3,BK3W3327,14.0,11.0,6.0,3.0,3.0,0.0,0.785714,Bernardo Dieterich,dieterich.b@gmail.com,Cambodia,2020-10-11,intermediate
4,EG5I9716,22.0,12.0,9.0,10.0,7.0,0.0,0.545455,Maurice Wilcoxson,wilcoxson_m@gmail.com,Marshall Islands,2020-12-11,expert


In [68]:
players.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 13 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   player_id            10000 non-null  object 
 1   matches_count        10000 non-null  float64
 2   wins_count           10000 non-null  float64
 3   easy_wins_count      10000 non-null  float64
 4   defeated_count       10000 non-null  float64
 5   easy_defeated_count  10000 non-null  float64
 6   tied                 10000 non-null  float64
 7   score                10000 non-null  float64
 8   player_name          10000 non-null  object 
 9   email                10000 non-null  object 
 10  country              10000 non-null  object 
 11  last_login           10000 non-null  object 
 12  level                10000 non-null  object 
dtypes: float64(7), object(6)
memory usage: 1015.8+ KB


In [69]:
players.level.value_counts()

beginner        2828
intermediate    2560
expert          2546
advanced        2066
Name: level, dtype: int64

# Separando Colunas para o treinamento

In [70]:
variables = ['matches_count', 'easy_wins_count', 'easy_defeated_count', 'score']

In [71]:
X = players[variables].values
y = players['level'].values

# Treinamento do Modelo - Random Forest

In [72]:
rf_classifier = RandomForestClassifier(n_estimators=100, criterion='entropy')
rf_results = cross_val_score(rf_classifier, X, y, cv=10)

# Resultado de acertos no treinamento

In [73]:
rf_results.mean() * 100

98.72

# Classificando novos jogadores

In [74]:
rf_classifier.fit(X, y)

RandomForestClassifier(criterion='entropy')

Os valores na lista indicam: ['matches_count', 'easy_wins_count', 'easy_defeated_count', 'score']

In [75]:
new_player_1 = np.array([[24,9,4,0.63]])
predict_player_1 = rf_classifier.predict(new_player_1)

new_player_2 = np.array([[4,0,0,0.23]])
predict_player_2 = rf_classifier.predict(new_player_2)

In [76]:
print('O novo jogador 1 é classificado como: {}.'.format(predict_player_1[0]))

O novo jogador 1 é classificado como: expert.


In [77]:
print('O novo jogador 2 é classificado como: {}.'.format(predict_player_2[0]))

O novo jogador 2 é classificado como: beginner.


# Conclusão

### Podemo usar a classificação realizada com a clusterização para classificar novos jogadores.

Obs: Foi utilizado somente o algoritmo Rando Forest, pois ele obteve um ótimo resultado, mas é possível testar outros algorítmos de classificação existentes para comparação, como, por exemplo, redes neurais e o algoritmo XGBoost.