In [None]:
import pandas as pd
import sklearn
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.ensemble import VotingClassifier,RandomForestClassifier

In [None]:
data=pd.read_csv('/kaggle/input/gender-classification-dataset/gender_classification_v7.csv')

In [None]:
data.head()

In [None]:
data=data.sample(frac=1)
data.head()

In [None]:
labels=data.pop('gender')
data.head()

In [None]:
targets=labels.unique()
encoded_targets={target:label for label,target in enumerate(targets)}
print(encoded_targets)

In [None]:
decode_targets={encoded_targets[label]:label for label in encoded_targets.keys()}
print(decode_targets)

In [None]:
labels=labels.map(encoded_targets)

In [None]:
print(labels)

In [None]:
col_names=list(data.columns)
print(col_names)

In [None]:
numeric_col_names=[]
for col_name in col_names:
    if type(data[col_name][0])==np.float64:
        numeric_col_names.append(col_name)
print(numeric_col_names)

In [None]:
scaler=MinMaxScaler(feature_range=(0,1))
for num_col in numeric_col_names:
    scaler.fit(np.array(data[num_col]).reshape(-1,1))
    data[num_col]=scaler.transform(np.array(data[num_col]).reshape(-1,1))
data.head()

In [None]:
X=np.array(data)
print(X)

In [None]:
print(type(labels))

In [None]:
y=np.array(labels)

In [None]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2)

In [None]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
parameters={'n_estimators':list(range(90,200,20)),'max_depth':list(range(200,400,40))}

In [None]:
model=RandomForestClassifier()

In [None]:
clf=GridSearchCV(model,parameters,verbose=3,refit=True)

In [None]:
clf.fit(X_train,y_train)

In [None]:
clf=clf.best_estimator_
print(clf)

In [None]:
vote_clf=VotingClassifier(
    estimators=[(f'r{i}',clf) for i in range(0,10)],
    voting='soft',
    verbose=True
)

In [None]:
vote_clf.fit(X_train,y_train)

In [None]:
preds=vote_clf.predict(X_test)

In [None]:
cm=confusion_matrix(y_true=y_test,y_pred=preds)
acc=cm.trace()/cm.sum()
print(f'The test set accuracy is {acc*100}')