In [2]:
import pandas as pd
import numpy as np

In [17]:
def sg(z):
    return 1 / (1 + np.exp(-z))

def loss_gradient(beta, x, y):
    z = np.dot(x,beta)
    h = sg(z)
    loss = (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()
    gdn = np.dot(x.T, (y - h)) / y.size
    grad = gdn + 300 / (2 * y.size) * np.concatenate(([0],beta[1:])).T
    return loss, grad

In [23]:
def model_fit(x, y, total_iterations=5000):
    final_beta = []
    label = np.unique(y)
    all_loss = np.zeros(total_iterations)
    
    for c in label:
        Y = np.where(y == c, 1, 0)
        beta = np.zeros(x.shape[1])
        for i in range(total_iterations):
            all_loss[i], grad = loss_gradient(beta, x, Y)
            beta += 0.01 * grad   
        final_beta.append(beta)
    return final_beta, label, all_loss

In [20]:
def predict_prob(label, beta,x):   
    preds = [np.argmax(
        [sg(i @ j) for j in final_beta]
    ) for i in x]
    return [label[p] for p in preds]

def accuracy(label, beta, x, y):
    acc = predict_prob(label, beta, x)
    predicted = []
    actual = []
    types = {0:"ale",1:"stout",2:"lager"}
    for i in acc:
        predicted.append(types[i])
    for i in y:
         actual.append(types[i])
       

    x = zip(predicted,actual)
    f.write("\n \npredicted actual \n \n")
    f.write('\n'.join('{} {}'.format(i[0],i[1]) for i in x))

    return (acc == y).mean()
   

In [21]:
#Pre processing training data set beer_training.txt
read_file = pd.read_csv (r'./beer.txt',delimiter='\t', header = None)
read_file.columns = ['calorific_value', 'nitrogen', 'turbidity', 'style', 'alcohol', 'sugars', 'bitterness', 'beer_id', 'colour', 'degree_of_fermentation']
read_file.to_csv (r'./beer_training.csv', index=None)
df = pd.read_csv("./beer_training.csv",sep=",")
df = df.drop("beer_id", axis=1)
first_col = df.pop("style")
df.insert(8, "style", first_col)
df.to_csv('./beer_training.csv', index=None)
df['style'] = df['style'].astype('category').cat.codes
df

Unnamed: 0,calorific_value,nitrogen,turbidity,alcohol,sugars,bitterness,colour,degree_of_fermentation,style
0,41.721239,0.503276,2.628182,4.015385,16.73,10.452789,13.44,55.337143,0
1,42.429204,0.525512,1.776364,4.092308,16.72,10.999526,12.24,58.380000,0
2,45.880531,0.443233,2.628182,4.276923,16.68,13.456368,10.92,58.382857,0
3,45.305310,0.471668,1.806364,4.126154,18.84,9.202737,10.92,58.525714,0
4,38.977876,0.392846,2.272727,4.015385,16.77,9.457895,10.56,58.900000,0
...,...,...,...,...,...,...,...,...,...
149,39.951327,0.272587,5.217273,4.412308,16.68,6.008368,6.84,74.757143,2
150,40.039823,0.347919,2.859091,4.178462,17.58,7.340842,8.88,74.887143,2
151,43.977876,0.266770,3.775455,4.347692,19.10,7.356000,7.32,75.894286,2
152,45.349558,0.255530,1.301818,4.160000,18.17,3.243579,10.68,76.182857,2


In [24]:
with open('result.txt','w') as f:
    
    for i in range(0,10):
        data = np.array(df)
        np.random.shuffle(data)
        num_train = int(.66 * len(data))  
        x_train, y_train = data[:num_train, :-1], data[:num_train, -1]
        x_test, y_test = data[num_train:, :-1], data[num_train:, -1]
        final_beta, classes, losses = model_fit(x_train, y_train)
        print(f"Train Accuracy for run {i}: {accuracy(classes, final_beta, x_train, y_train):.3f}")
        print(f"Test Accuracy for run {i} : {accuracy(classes, final_beta, x_test, y_test):.3f}")

Train Accuracy for run 0: 0.752
Test Accuracy for run 0 : 0.755
Train Accuracy for run 1: 0.772
Test Accuracy for run 1 : 0.736
Train Accuracy for run 2: 0.752
Test Accuracy for run 2 : 0.642
Train Accuracy for run 3: 0.762
Test Accuracy for run 3 : 0.698
Train Accuracy for run 4: 0.802
Test Accuracy for run 4 : 0.642
Train Accuracy for run 5: 0.782
Test Accuracy for run 5 : 0.717
Train Accuracy for run 6: 0.743
Test Accuracy for run 6 : 0.792
Train Accuracy for run 7: 0.743
Test Accuracy for run 7 : 0.755
Train Accuracy for run 8: 0.752
Test Accuracy for run 8 : 0.717
Train Accuracy for run 9: 0.723
Test Accuracy for run 9 : 0.792
