# Importing Libraries

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, accuracy_score, confusion_matrix, roc_curve, auc, classification_report
from sklearn.svm import SVC
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler

# Loading Dataset

In [None]:
!ls ../input/voicegender/voice.csv

In [None]:
data = pd.read_csv("../input/voicegender/voice.csv")
df = pd.DataFrame(data)
df

# Data Pre-processing

In [None]:
df.isnull().sum()

In [None]:
df['label'].value_counts()

Data is also balanced

In [None]:
df.info()

Sicne Lable is object so we need to convert it into numerical data

In [None]:
la=LabelEncoder()
df['label']=la.fit_transform(df['label'])

In [None]:
data.info()

In [None]:
df['label']

So now it is converted into mumerical data type so now we can apply our classiffier

In [None]:
corr=df.corr()
corr

# Data Visualization

In [None]:
plt.subplots(figsize=(36,12))
sns.heatmap(corr, annot=True)

In [None]:
corr['label'].sort_values(ascending=False)

Since the positive values are less so i will take all of them

# Splitting data into traning and testing

In [None]:
x=df.drop(['meanfun','Q25','meanfreq','centroid','median','maxdom','mindom','dfrange','meandom','mode','maxfun','minfun','label'], axis=1)
x.describe()

In [None]:
scaler = StandardScaler()
x = pd.DataFrame(data=scaler.fit_transform(x), columns=x.columns)
x

In [None]:
y=df.iloc[:,-1]
y

In [None]:
y.shape

In [None]:
xtrain, xtest, ytrain, ytest = train_test_split(x,y, test_size=0.25, random_state=0)

In [None]:
cla=SVC()
cla.fit(xtrain, ytrain)

In [None]:
cla.get_params()

# Data Training Using GridSearch

In [None]:
# Fitting Data to find best parameters
param = {
    'C': [1,2],
    'kernel': ['rbf', 'sigmoid'],
    'degree' : [3,4],
    'break_ties' : [True, False],
    'verbose' : [True, False],
    'probability' : [True, False]
}
grid = GridSearchCV(estimator=cla, param_grid=param, cv= 5)
grid.fit(xtrain, ytrain)

In [None]:
# Finding best parameters
grid.best_params_

# Applying Classifier

In [None]:
# Data fitting using parameters found from GridSearch
cla2=SVC(C=2 , random_state=0, kernel= 'rbf', degree=3 , break_ties= True, verbose=True , probability=True )
cla2.fit(xtrain, ytrain)

# Checking Accuracy

In [None]:
ytest_predict= cla2.predict(xtest)
accuracy_score(ytest_predict, ytest)

In [None]:
# ROC Curve

fpr, tpr, thresholds =roc_curve(ytest, ytest_predict, pos_label=1)
plt.plot(fpr, tpr)
plt.xlabel('False positive rate')
plt.ylabel('True positive rate')
plt.title('ROC curve')
plt.show()
# Measure the area under the curve.  The closer to 1, the "better" the predictions.
print("AUC of the predictions: {0}".format(auc(fpr, tpr)))

# Measure the Accuracy Score
print("Accuracy score of the predictions: {0}".format(accuracy_score(ytest_predict, ytest)))


In [None]:
# Confusion matrix

confu = confusion_matrix(ytest, ytest_predict, labels = [0,1])
sns.heatmap(confu, annot=True)

In [None]:
print("Classification Report for our model is ")
print(classification_report(ytest, ytest_predict))