In [199]:
# softmax regression
import numpy as np
import math
import random

z = np.array([5,4,3,6,6])

def softmax(z):
    
    # z--> linear part.
    
    # subtracting the max of z for numerical stability.
    exp = np.exp(z - np.max(z))
    
    # Calculating softmax for all examples.
    for i in range(len(z)):
        exp[i] /= np.sum(exp[i])
        
    return exp

def softmax_stable(Z):
    e_Z = np.exp(Z - np.max(Z, axis = 0, keepdims = True))
    A = e_Z / e_Z.sum(axis = 0)
    return A

def number_classes(y):
    c = 0
    lst_class = []
    dict = {}
    for i in range(len(y)):
        if y[i] not in lst_class:
            c += 1
            lst_class.append(y[i])
    return lst_class, c

def one_hot(y, c):
    
    # y--> label/ground truth.
    # c--> Number of classes.
    
    # A zero matrix of size (m, c)
    y_hot = np.zeros((len(y), c))
    
    # Putting 1 for column where the label is,
    # Using multidimensional indexing.
    y_hot[np.arange(len(y)), y] = 1
    
    return y_hot

def predict(X, w, b):
    
    # X --> Input.
    # w --> weights.
    # b --> bias.
    
    # Predicting
    z = X@w + b
    y_hat = softmax(z)
    
    # Returning the class with highest probability.
    return np.argmax(y_hat, axis=1)

def accuracy(y, y_hat):
    return str(np.sum(y==y_hat)/len(y)*100) +"%"

print(softmax(z))
print(softmax_stable(z))

[1. 1. 1. 1. 1.]
[0.14409682 0.05301026 0.01950138 0.39169577 0.39169577]


In [200]:
def predict_top(x, w, b, n):
    z = x@w + b
    y_hot = softmax_stable(z)
    
    #sort the y_hot
    sort = np.sort(y_hot)
    sort_convert = sort[::-1]
    
    top_n = sort_convert[:n]
    top_n_index = []
    for j in range(len(top_n)):
        for i in range(len(y_hot)):
            if y_hot[i] == top_n[j]:
                top_n_index.append(i)
    
    top_list = {}
    for i in range(n):
        top_list[(dict_convert[classes[top_n_index[i]]])] = str(sort_convert[i] * 100) + "%"
    return top_n, top_n_index, top_list

In [201]:
import pandas as pd

df = pd.read_csv("out.csv")
df_raw = df.iloc[:,1:]

In [202]:
X = df_raw.iloc[:,:-1].to_numpy()
y = df_raw.iloc[:,-1].to_numpy()

In [203]:
#create dictionary of classes

dict = {}
count = 0
for x in y:
    if x not in dict:
        dict[x] = count
        count += 1

In [204]:
#create dictionary of convert classes
dict_convert = {}
for x in dict:
    dict_convert[dict[x]] = x

In [205]:
#change text to integer
for i in range(len(y)):
    y[i] = dict[y[i]]

In [206]:
y = y.astype(np.int32)

In [207]:
classes, c = number_classes(y)
y_hot = one_hot(y, c)

In [208]:
loss = -np.mean(np.log(y_hot[np.arange(len(y)), y]))

In [209]:
y_hot

array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [210]:
def fit(X, y, lr, c, epochs):
    
    # X --> Input.
    # y --> true/target value.
    # lr --> Learning rate.
    # c --> Number of classes.
    # epochs --> Number of iterations.
    
        
    # m-> number of training examples
    # n-> number of features 
    m, n = X.shape
    
    # Initializing weights and bias randomly.
    w = np.random.random((n, c))
    b = np.random.random(c)
    # Empty list to store losses.
    losses = []
    
    # Training loop.
    for epoch in range(epochs):
        
        # Calculating hypothesis/prediction.
        z = X@w + b
        y_hat = softmax(z)
        
        # One-hot encoding y.
        y_hot = one_hot(y, c)
        
        # Calculating the gradient of loss w.r.t w and b.
        w_grad = (1/m)*np.dot(X.T, (y_hat - y_hot)) 
        b_grad = (1/m)*np.sum(y_hat - y_hot)
        
        # Updating the parameters.
        w = w - lr*w_grad
        b = b - lr*b_grad
        
        # Calculating loss and appending it in the list.
        loss = -np.mean(np.log(y_hat[np.arange(len(y)), y]))
        losses.append(loss)
        # Printing out the loss at every 100th iteration.
        if epoch%100==0:
            print('Epoch {epoch}==> Loss = {loss}'
                  .format(epoch=epoch, loss=loss))
    return w, b, losses

In [211]:
#train vs sample data
import numpy as np
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

In [219]:
# Training
w, b, l = fit(X_train, y_train, lr=0.5, c=len(classes), epochs=10000)

Epoch 0==> Loss = 19.588297100373808


  loss = -np.mean(np.log(y_hat[np.arange(len(y)), y]))


Epoch 100==> Loss = 0.01063797805145097
Epoch 200==> Loss = 0.007767469665437666
Epoch 300==> Loss = 0.006518366738953934
Epoch 400==> Loss = 0.005735388506923986
Epoch 500==> Loss = 0.005179770887277138
Epoch 600==> Loss = 0.004757636979415175
Epoch 700==> Loss = 0.00442189316686973
Epoch 800==> Loss = 0.004145919340566325
Epoch 900==> Loss = 0.003913511131367227
Epoch 1000==> Loss = 0.003714161216757258
Epoch 1100==> Loss = 0.003540696810812607
Epoch 1200==> Loss = 0.00338802071911251
Epoch 1300==> Loss = 0.003252391365811723
Epoch 1400==> Loss = 0.0031309868189938247
Epoch 1500==> Loss = 0.003021628732626871
Epoch 1600==> Loss = 0.0029226009246481207
Epoch 1700==> Loss = 0.0028325263541913452
Epoch 1800==> Loss = 0.0027502815730238305
Epoch 1900==> Loss = 0.0026749360748922124
Epoch 2000==> Loss = 0.002605708639993917
Epoch 2100==> Loss = 0.002541935495253689
Epoch 2200==> Loss = 0.0024830467876951123
Epoch 2300==> Loss = 0.0024285489570947207
Epoch 2400==> Loss = 0.0023780113284902

In [213]:
# Accuracy for training set.
train_preds = predict(X_train, w, b)
accuracy(y_train, train_preds)

# Accuracy for test set.
# Flattening and normalizing.
test_preds = predict(X_test, w, b)
accuracy(y_test, test_preds)

'99.9515503875969%'

In [214]:
x = X[5115]
predict_top(x, w, b, 5)

(array([1.00000000e+00, 1.26853142e-13, 4.08548538e-14, 1.92079102e-14,
        1.12305135e-14]),
 [41, 37, 35, 33, 39],
 {'Covid': '99.99999999997522%',
  'Acne': '1.2685314159533862e-11%',
  'Arthritis': '4.0854853761290825e-12%',
  'Hypoglycemia': '1.920791016937742e-12%',
  'Psoriasis': '1.123051350896803e-12%'})

In [216]:
def create_random_x():
    n = random.randint(10,30)
    random_index = random.sample(range(1, len(X[5] - 1)), n)
    zero_matrix = np.zeros(len(X[5]))
    for i in random_index:
        zero_matrix[i] = 1
    return n, zero_matrix

In [218]:
n, k = create_random_x()
n, k
predict_top(k, w, b, 5)

(array([0.14905524, 0.11857408, 0.10086413, 0.08105034, 0.06697267]),
 [18, 31, 10, 14, 27],
 {'Typhoid': '14.9055239648949%',
  'Hypothyroidism': '11.857408249118642%',
  'Hypertension ': '10.086412606554193%',
  'Jaundice': '8.105034113143612%',
  'Pneumonia': '6.697267069648662%'})