# Getting Started

In [None]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression

from sklearn.metrics import f1_score

In [None]:
data = pd.read_csv('../input/top-women-chess-players/top_women_chess_players_aug_2020.csv')

In [None]:
data

# Preprocessing

In [None]:
data.drop(['Fide id', 'Name', 'Gender'], axis=1, inplace=True)

In [None]:
data

## Missing Values

In [None]:
data.isnull().sum()

In [None]:
data.dtypes

In [None]:
numerical_features = ['Year_of_birth', 'Rapid_rating', 'Blitz_rating']

In [None]:
for column in numerical_features:
    data[column] = data[column].fillna(data[column].mean())

In [None]:
data.isnull().sum()

In [None]:
data['Title'].unique()

In [None]:
data['Inactive_flag'].unique()

In [None]:
data['Inactive_flag'] = data['Inactive_flag'].fillna('wa')

In [None]:
data.isnull().sum()

In [None]:
title_dummies = pd.get_dummies(data['Title'])
title_dummies

In [None]:
data = pd.concat([data, title_dummies['GM']], axis=1)
data.drop('Title', axis=1, inplace=True)

In [None]:
data

In [None]:
data.isnull().sum()

## Encoding

In [None]:
data['Inactive_flag'].unique()

In [None]:
encoder = LabelEncoder()

data['Inactive_flag'] = encoder.fit_transform(data['Inactive_flag'])

In [None]:
data

In [None]:
data['Federation'].unique()

In [None]:
data.drop('Federation', axis=1, inplace=True)

In [None]:
data

In [None]:
y = data['GM']
X = data.drop('GM', axis=1)

In [None]:
X

In [None]:
scaler = MinMaxScaler()

X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

In [None]:
X

# Training

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)

In [None]:
model = LogisticRegression()
model.fit(X_train, y_train)

In [None]:
print(f"Model Accuracy: {model.score(X_test, y_test)}")

In [None]:
y_pred = model.predict(X_test)

In [None]:
print(f"Model F1 Score: {f1_score(y_test, y_pred)}")

In [None]:
print(f"Percent Grandmaster: {y_test.sum() / len(y)}")