## Importing the necessary libraries

In [None]:
import pandas as pd # is used for data analysis. we can analysis data in 
# pandas with 1.series 2.dataframes
import numpy as np #numpy is used to perform number of mathematical operations on arrays
import matplotlib.pyplot as plt #it is a plotting library used for 2D graphics in python.
import seaborn as sns #used for making statistical graphics. it is built on top of matplotlib and closely integrated with pandas data structures.
#%matplotlib inline #called magic function. used to export visualizations in matplotlib to jupyter notebook
import warnings # we import warning to suppress all the warnings
warnings.filterwarnings('ignore')

## Reading the dataset

In [None]:
# use pandas to get the data
df = pd.read_csv('voice.csv')
df.head()

## Data visualisation 

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.isnull()

In [None]:
df.isnull().sum()

In [None]:
print('shape of data',df.shape)
print('Total number of labels : {}'.format(df.shape[0]))
print('Number of male : {}'.format(df[df.label == 'male'].shape[0]))
print('Number of female : {}'.format(df[df.label == 'female'].shape[0]))

In [None]:
print(df)

In [None]:
print(type(df))

In [None]:
df[df.label == 'male']

## Splitting the data

In [None]:
x = df.iloc[:,:-1]
print(x.shape)

In [None]:
y = df.iloc[:,-1]
print(y)

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.3,random_state=100)

## Data Preprocessing

In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
gender_encoder = LabelEncoder()
y = gender_encoder.fit_transform(y)
y

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(x)
x = scaler.transform(x)

## Using SVM model

In [None]:
# we will be using SVM model 
from sklearn.svm import SVC
from sklearn import metrics
from sklearn.metrics import classification_report,confusion_matrix

In [None]:
svc_model = SVC()
svc_model.fit(x_train,y_train)
y_pred = svc_model.predict(x_test)

## Accuracy of SVM

In [None]:
print('Accuracy is : ')
print(metrics.accuracy_score(y_pred,y_test))

In [None]:
print(confusion_matrix(y_test,y_pred))

In [None]:
print(classification_report(y_test,y_pred))

## Using GridSearchCV

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
param_grid = {'C':[0.1,1,10,100],'gamma': [1,0.1,0.01,0.001]}

In [None]:
grid = GridSearchCV(SVC(),param_grid,refit=True,verbose=2)
grid.fit(x_train,y_train)

In [None]:
grid_predictions = grid.predict(x_test)

## Accuracy of GridSearchCV

In [None]:
print('Accuracy score : ')
print(metrics.accuracy_score(y_test,grid_predictions))

## Decision Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
classifier.fit(x_train,y_train)

In [None]:
y_pred = classifier.predict(x_test)

## Accuracy of Decision Tree

In [None]:
print('Accuracy score : ')
print(metrics.accuracy_score(y_test,y_pred))