In [60]:
%%time
'''
Logistic Regression
Voice Classification
Features 20
Author: Kushal Sharma
'''
import pandas as pd 
import numpy as np
from sklearn.utils import shuffle 
import random

#Dataset:
mainData = pd.DataFrame(pd.read_csv("voice.csv"))
# mainData = mainData.sample(frac=1).reset_index(drop=True)
mainData = shuffle(mainData)
train,cv,test = np.split(mainData,[int(0.6*len(mainData)),int(0.8*len(mainData))])

trainFeatures = train.select_dtypes(include=['float64'])
trainLabel = train['label']
trainFeatures = trainFeatures.T
testFeatures = test.select_dtypes(include=['float64'])
testLabel = test['label']
testFeatures = testFeatures.T
cvFeatures = cv.select_dtypes(include=['float64'])
cvLabel = cv['label']
cvFeatures = cvFeatures.T

featureCount = len(trainFeatures)

trainLabel = trainLabel.replace(['male','female'],[0,1])
testLabel = testLabel.replace(['male','female'],[0,1])
cvLabel = cvLabel.replace(['male','female'],[0,1])

#Normalising features
for i in range(0,len(trainFeatures)):
    trainFeatures.iloc[i] = trainFeatures.iloc[i]/max(trainFeatures.iloc[i])
for i in range(0,len(testFeatures)):
    testFeatures.iloc[i] = testFeatures.iloc[i]/max(testFeatures.iloc[i])
for i in range(0,len(cvFeatures)):
    cvFeatures.iloc[i] = cvFeatures.iloc[i]/max(cvFeatures.iloc[i])

#Global Variables:
weights = [5]*featureCount

def sigmoid(weights,features,label):
    z = 0
    for i in range(1,len(weights)):
        z = z + weights[i]*features.iloc[i]
    z = z+weights[0]
    return 1/(1+np.exp(-z))

def cost(weights,features,label):
    predicted = sigmoid(weights,features,label)
    costClass1 = -label*np.log(predicted)
    costClass2 = (1-label)*np.log(1-predicted)
    cost = costClass1-costClass2
    cost = np.mean(cost)
    return cost

def derivative(weights,features,label,featureIteration):
    predictions = sigmoid(weights,features,label)
    derivative = np.dot((features.iloc[featureIteration]).T,  predictions - label)
    derivative = np.mean(derivative)
    return derivative

def gradientDescent(weights,features,label,iters,lr):
    weightsNew = [0]*len(weights)
    for i in range(0,iters):
        for n in range(0,len(weights)):
            weightsNew[n] = weights[n] - lr*derivative(weights,features,label,n)
        weights = weightsNew
    return weights

def train(iters,lr):
    global weights,trainFeatures,trainLabel
    trainedWeights = (gradientDescent(weights,trainFeatures,trainLabel,iters,lr))
    trainedWeights = gradientDescent(weights,trainFeatures,trainLabel,iters,lr)
    from sklearn.metrics import accuracy_score
    predicted = sigmoid(trainedWeights,trainFeatures,trainLabel)
    for i in range(0,len(predicted)):
        if predicted.iloc[i]>0.5:
            predicted.iloc[i] = 1
        else:
            predicted.iloc[i] = 0
    accuracy = (accuracy_score(trainLabel,predicted)*100)
    print("=================================================")
    print("---------------Training Accuracy-----------------")
    print("               ",accuracy,"%","                  ")
    print("=================================================")
    

########## Training Area(Do not Enter) #############
######## TRESSPASSERS WILL BE SHOT AT SIGHT ########
train(100,0.01)
####################################################




---------------Training Accuracy-----------------
                95.7368421053 %                   
CPU times: user 1min 22s, sys: 144 ms, total: 1min 23s
Wall time: 1min 23s


In [20]:
import pandas as pd

mainData = pd.DataFrame(pd.read_csv("voice.csv"))

mainData

Unnamed: 0,meanfreq,sd,median,Q25,Q75,IQR,skew,kurt,sp.ent,sfm,...,centroid,meanfun,minfun,maxfun,meandom,mindom,maxdom,dfrange,modindx,label
0,0.059781,0.064241,0.032027,0.015071,0.090193,0.075122,12.863462,274.402906,0.893369,0.491918,...,0.059781,0.084279,0.015702,0.275862,0.007812,0.007812,0.007812,0.000000,0.000000,male
1,0.066009,0.067310,0.040229,0.019414,0.092666,0.073252,22.423285,634.613855,0.892193,0.513724,...,0.066009,0.107937,0.015826,0.250000,0.009014,0.007812,0.054688,0.046875,0.052632,male
2,0.077316,0.083829,0.036718,0.008701,0.131908,0.123207,30.757155,1024.927705,0.846389,0.478905,...,0.077316,0.098706,0.015656,0.271186,0.007990,0.007812,0.015625,0.007812,0.046512,male
3,0.151228,0.072111,0.158011,0.096582,0.207955,0.111374,1.232831,4.177296,0.963322,0.727232,...,0.151228,0.088965,0.017798,0.250000,0.201497,0.007812,0.562500,0.554688,0.247119,male
4,0.135120,0.079146,0.124656,0.078720,0.206045,0.127325,1.101174,4.333713,0.971955,0.783568,...,0.135120,0.106398,0.016931,0.266667,0.712812,0.007812,5.484375,5.476562,0.208274,male
5,0.132786,0.079557,0.119090,0.067958,0.209592,0.141634,1.932562,8.308895,0.963181,0.738307,...,0.132786,0.110132,0.017112,0.253968,0.298222,0.007812,2.726562,2.718750,0.125160,male
6,0.150762,0.074463,0.160106,0.092899,0.205718,0.112819,1.530643,5.987498,0.967573,0.762638,...,0.150762,0.105945,0.026230,0.266667,0.479620,0.007812,5.312500,5.304688,0.123992,male
7,0.160514,0.076767,0.144337,0.110532,0.231962,0.121430,1.397156,4.766611,0.959255,0.719858,...,0.160514,0.093052,0.017758,0.144144,0.301339,0.007812,0.539062,0.531250,0.283937,male
8,0.142239,0.078018,0.138587,0.088206,0.208587,0.120381,1.099746,4.070284,0.970723,0.770992,...,0.142239,0.096729,0.017957,0.250000,0.336476,0.007812,2.164062,2.156250,0.148272,male
9,0.134329,0.080350,0.121451,0.075580,0.201957,0.126377,1.190368,4.787310,0.975246,0.804505,...,0.134329,0.105881,0.019300,0.262295,0.340365,0.015625,4.695312,4.679688,0.089920,male
