# Import Statements

In [1]:
import pandas as pd
import numpy as np
import random
import math
import matplotlib.pyplot as plt

# Train and Validation Split

In [2]:
#Pass the percentage of data you need for test like 20 % 
def train_split(df,test_per):
    indices=df.index.tolist()
    test_size=round(len(df)*(test_per/100))
    random.seed(0)
    test_indices=random.sample(population=indices,k=test_size)
    valid_df=df.loc[test_indices]
    train_df=df.drop(test_indices)
    return train_df,valid_df

# Hypothesis

In [3]:
def cal_hypothesis(x,B):
    #hypothesis h(x) = 1/(1+ pow(e,-(B.T*x)))   #hypothesis=X.T*B as X=data.T
    t=x.dot(B)
    exp_part=math.exp(float(-t))
    hypothesis=1/(1+exp_part)
    return hypothesis

# Cost function

In [4]:
def cost_function(X,Y,B):
    #Cost function J=(-1/m)*(sum((yi*log(hypothesis(xi)))+((1-yi)*log(1-hypothesis(xi))))
    m = len(Y)
    J=0
    for i in range(0,m):
        x=X[i]
        y=Y[i]
        h=cal_hypothesis(x,B)
        J-=((y*np.log(h))+((1-y)*np.log(1-h)))
    J/=m
    return J

# Gradient Descent

In [5]:
def gradient_descent(X,Y,B,alpha,prev_cost,no_iters):
    #formula: Bj=Bj-alpha*((hypothesis(x)-y)*xj)
    if(no_iters==0):
        return B,prev_cost
    m=len(Y)
    for j in range(0,len(B)):
        sum=0
        gradient=1
        for i in range(0,m):
            x=X[i]
            y=Y[i]
            hypothesis=cal_hypothesis(x,B)
            sum+=(hypothesis-y)*x[j]
        gradient=sum
        #update B
        B[j]-=alpha*gradient
    cost=cost_function(X,Y,B)
    #print(prev_cost,cost)
    if(abs(prev_cost - cost) <  0.000001):
        return B,cost
    else:
        B,cost=gradient_descent(X,Y,B,alpha,cost,no_iters-1)
    return B,cost

# Predict

In [6]:
def predict_prob(x,B):
    prob=cal_hypothesis(x,B)
    #print("Predicted prob-",prob)
    return prob

In [7]:
def predict_ans(x,B,threshold):
    prob=predict_prob(x,B)
    if(prob >= threshold):
        return 1
    else:
        return 0
    return 0

In [8]:
def sigmoid_all(z):
    return 1 / (1 + np.exp(-z))

In [9]:
def confusion_mat(X,Y,B,threshold):
    true_val=1
    false_val=0
    total=len(X)
    actual_list=[]
    predicted_list=[]
    TN=0
    TP=0
    FP=0
    FN=0
    for i in range(0,total):
        actual = Y[i]
        actual_list.append(actual)
        predicted = predict_ans(X[i],B,threshold)
        predicted_list.append(predicted)
        if( actual > threshold):
            actual=1
        else:
            actual=0
        if(actual==false_val and predicted==false_val):
            TN+=1
        if(actual==true_val and predicted==false_val):
            FN+=1
        if(actual==false_val and predicted==true_val):
            FP+=1
        if(actual==true_val and predicted==true_val):
            TP+=1
    #measures=[accuracy,misclassification,precision,recall,f1score]
    measures=[]
    accuracy=(TN+TP)/total
    measures.append(accuracy)
    misclassification=(FN+FP)/total
    measures.append(misclassification)
    if( TP+FP > 0):
        precision=TP/(TP+FP)
    else:
        precision=0
    measures.append(precision)
    if( TP+FN > 0):
        recall=TP/(TP+FN)
    else:
        recall=0
    measures.append(recall)
    if( precision !=0 and recall !=0):
        f1score=2/((1/precision)+(1/recall))
    else:
        f1score=0
    measures.append(f1score)
    return measures

In [10]:
def cal_accuracy(Y,predictions):
    total=len(Y)
    correct=0
    for i in range(0,total):
        actual = Y[i]
        predicted = predictions[i]
        if(actual == predicted):
            correct+=1
    accuracy=correct/total
    return accuracy

In [11]:
def plot_graph(actual_list,predicted_list,title):
    x_labels=[]
    for i in range(len(actual_list)):
        x_labels.append(i)
        
    area=np.pi
    plt.xlabel("Students")
    plt.ylabel("Chance of Admit")
    plt.title(title)

    plt.scatter(x_labels, predicted_list, s=area, c='blue', alpha=0.5)
    plt.scatter(x_labels, actual_list, s=area, c='red', alpha=0.5)

    plt.show()
    return

# Helper Functions

In [12]:
#Creating X(data.T),Y arrays from df
def to_arrays(df,label):
    no_rows,no_cols=df.shape
    header=list(df.columns)
    data_array=np.ones((no_cols-1,no_rows))
    X0=np.ones(no_rows)                      #X0=1
    for i in range(0,no_cols-1):             #Removing first col-serial no , last col-label
        if(i == 0):
            data_array[i]=X0
        else:
            x=df[header[i]].values
            #data_array[i]=x
            data_array[i]=mean_normalize(x)
    X=data_array.T        #Doing Transpose

    #Y (output) array
    Y=np.array(df[label].values)  
    return X,Y

In [13]:
def mean_normalize(x):
    x_new=np.ones(len(x))
    mean=np.mean(x)
    std=np.nanstd(x)
    for i in range(0,len(x)):
        x_new[i]=(x[i]-mean)/std
    return x_new

# Main

In [14]:
def main():
    file="wine-quality/data.csv"
    label='quality'
    no_coefficients=11  #[B0,B1,B2,...,B10] also 10 columns but X0=1    
        
    df=pd.read_csv(file)
    
    #no_classes=df[label].nunique()  
    no_classes=11    #Multiclass (0-10)
   
    train_df,valid_df=train_split(df,20)
    
    #Training
    train_X,train_Y=to_arrays(train_df,label)
    
    alpha = 0.003
    no_iters=1000
    
    ##One VS All
    
    print("One Vs All")
    #B(beta) array of coefficients
    #Initialized to zero
    B = np.zeros(shape=(no_classes,no_coefficients))
    
    for c in range(0, no_classes):
        y_new=np.zeros(len(train_Y))
        for i in range(0,len(train_Y)):
            if(train_Y[i]==c):
                y_new[i]=1
            else:
                y_new[i]=0

        init_J=cost_function(train_X,y_new,B[c])
        print("Initial_cost- ",init_J)

        B[c],final_J=gradient_descent(train_X,y_new,B[c],alpha,init_J,no_iters)
        print("Final_cost- ",final_J)
        #print(B[c])

    #Prediction
    valid_X,valid_Y=to_arrays(valid_df,label)

    classProbs=sigmoid_all(valid_X @ B.T)
    #print(classProbs)    

    prediction=np.zeros(len(valid_df))
    
    for i in range(len(classProbs)):
        prediction[i]=np.argmax(classProbs[i])
     
    acc=cal_accuracy(valid_Y,prediction)
    print("Accuracy-",acc)
    
    
#     ##One VS One
    
#     print("One Vs One")
#     #B(beta) array of coefficients
#     #Initialized to zero
#     B = np.zeros(shape=(no_classes,no_coefficients))
    
#     for i in range(0, no_classes-1):
#         for j in range(0,)
#         y_new=np.zeros(len(train_Y))
#         for i in range(0,len(train_Y)):
#             if(train_Y[i]==c):
#                 y_new[i]=1
#             else:
#                 y_new[i]=0

#         init_J=cost_function(train_X,y_new,B[c])
#         print("Initial_cost- ",init_J)

#         B[c],final_J=gradient_descent(train_X,y_new,B[c],alpha,init_J,no_iters)
#         print("Final_cost- ",final_J)
#         #print(B[c])

#     #Prediction
#     valid_X,valid_Y=to_arrays(valid_df,label)

#     classProbs=sigmoid_all(valid_X @ B.T)
#     #print(classProbs)    

#     prediction=np.zeros(len(valid_df))
    
#     for i in range(len(classProbs)):
#         prediction[i]=np.argmax(classProbs[i])
     
#     acc=cal_accuracy(valid_Y,prediction)
#     print(acc)
    
    return

In [15]:
if __name__ == "__main__":
    main()

One Vs All
Initial_cost-  0.6931471805599391
Final_cost-  0.00030601885892336946
Initial_cost-  0.6931471805599391
Final_cost-  0.00030601885892336946
Initial_cost-  0.6931471805599391
Final_cost-  0.00030601885892336946
Initial_cost-  0.6931471805599391
Final_cost-  0.023416731702795276
Initial_cost-  0.6931471805599391
Final_cost-  0.11954174787867652
Initial_cost-  0.6931471805599391
Final_cost-  0.513664579402055
Initial_cost-  0.6931471805599391
Final_cost-  0.9268563650318428
Initial_cost-  0.6931471805599391
Final_cost-  0.39966462684962967
Initial_cost-  0.6931471805599391
Final_cost-  0.12651762413367557
Initial_cost-  0.6931471805599391
Final_cost-  0.007264291499559555
Initial_cost-  0.6931471805599391
Final_cost-  0.00030601885892336946
Accuracy- 0.4886621315192744
