In [2]:
# softmax regression
import numpy as np
import math
import random

z = np.array([5,4,3,6,6])

def softmax(z):
    
    # z--> linear part.
    
    # subtracting the max of z for numerical stability.
    exp = np.exp(z - np.max(z))
    
    # Calculating softmax for all examples.
    for i in range(len(z)):
        exp[i] /= np.sum(exp[i])
        
    return exp

def softmax_stable(Z):
    e_Z = np.exp(Z - np.max(Z, axis = 0, keepdims = True))
    A = e_Z / e_Z.sum(axis = 0)
    return A

def number_classes(y):
    c = 0
    lst_class = []
    dict = {}
    for i in range(len(y)):
        if y[i] not in lst_class:
            c += 1
            lst_class.append(y[i])
    return lst_class, c

def one_hot(y, c):
    
    # y--> label/ground truth.
    # c--> Number of classes.
    
    # A zero matrix of size (m, c)
    y_hot = np.zeros((len(y), c))
    
    # Putting 1 for column where the label is,
    # Using multidimensional indexing.
    y_hot[np.arange(len(y)), y] = 1
    
    return y_hot

def predict(X, w, b):
    
    # X --> Input.
    # w --> weights.
    # b --> bias.
    
    # Predicting
    z = X@w + b
    y_hat = softmax(z)
    
    # Returning the class with highest probability.
    return np.argmax(y_hat, axis=1)

def accuracy(y, y_hat):
    return str(np.sum(y==y_hat)/len(y)*100) +"%"

print(softmax(z))
print(softmax_stable(z))

[1. 1. 1. 1. 1.]
[0.14409682 0.05301026 0.01950138 0.39169577 0.39169577]


In [3]:
def predict_top(x, w, b, n):
    z = x@w + b
    y_hot = softmax_stable(z)
    
    #sort the y_hot
    sort = np.sort(y_hot)
    sort_convert = sort[::-1]
    
    top_n = sort_convert[:n]
    top_n_index = []
    for j in range(len(top_n)):
        for i in range(len(y_hot)):
            if y_hot[i] == top_n[j]:
                top_n_index.append(i)
    
    top_list = {}
    for i in range(n):
        top_list[(dict_convert[classes[top_n_index[i]]])] = str(sort_convert[i] * 100) + "%"
    return top_n, top_n_index, top_list

In [4]:
import pandas as pd

df = pd.read_csv("out.csv")
df_raw = df.iloc[:,1:]

In [5]:
X = df_raw.iloc[:,:-1].to_numpy()
y = df_raw.iloc[:,-1].to_numpy()

In [6]:
#create dictionary of classes

dict = {}
count = 0
for x in y:
    if x not in dict:
        dict[x] = count
        count += 1

In [7]:
#create dictionary of convert classes
dict_convert = {}
for x in dict:
    dict_convert[dict[x]] = x

In [8]:
#change text to integer
for i in range(len(y)):
    y[i] = dict[y[i]]

In [9]:
y = y.astype(np.int32)

In [10]:
classes, c = number_classes(y)
y_hot = one_hot(y, c)

In [11]:
loss = -np.mean(np.log(y_hot[np.arange(len(y)), y]))

In [12]:
y_hot

array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.]])

In [13]:
def fit(X, y, lr, c, epochs):
    
    # X --> Input.
    # y --> true/target value.
    # lr --> Learning rate.
    # c --> Number of classes.
    # epochs --> Number of iterations.
    
        
    # m-> number of training examples
    # n-> number of features 
    m, n = X.shape
    
    # Initializing weights and bias randomly.
    w = np.random.random((n, c))
    b = np.random.random(c)
    # Empty list to store losses.
    losses = []
    
    # Training loop.
    for epoch in range(epochs):
        
        # Calculating hypothesis/prediction.
        z = X@w + b
        y_hat = softmax(z)
        
        # One-hot encoding y.
        y_hot = one_hot(y, c)
        
        # Calculating the gradient of loss w.r.t w and b.
        w_grad = (1/m)*np.dot(X.T, (y_hat - y_hot)) 
        b_grad = (1/m)*np.sum(y_hat - y_hot)
        
        # Updating the parameters.
        w = w - lr*w_grad
        b = b - lr*b_grad
        
        # Calculating loss and appending it in the list.
        loss = -np.mean(np.log(y_hat[np.arange(len(y)), y]))
        losses.append(loss)
        # Printing out the loss at every 100th iteration.
        if epoch%100==0:
            print('Epoch {epoch}==> Loss = {loss}'
                  .format(epoch=epoch, loss=loss))
    return w, b, losses

In [14]:
#train vs sample data
import numpy as np
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

In [15]:
# Training
w, b, l = fit(X_train, y_train, lr=0.5, c=len(classes), epochs=10000)

Epoch 0==> Loss = 19.51958168265143


  loss = -np.mean(np.log(y_hat[np.arange(len(y)), y]))


Epoch 100==> Loss = 0.010982989765766353
Epoch 200==> Loss = 0.008157752047960348
Epoch 300==> Loss = 0.006954883799946346
Epoch 400==> Loss = 0.006220916203022421
Epoch 500==> Loss = 0.00570184899307882
Epoch 600==> Loss = 0.005303172953060633
Epoch 700==> Loss = 0.004980455235638506
Epoch 800==> Loss = 0.0047096327329077025
Epoch 900==> Loss = 0.004476372673514642
Epoch 1000==> Loss = 0.004271538535679611
Epoch 1100==> Loss = 0.004089005050687278
Epoch 1200==> Loss = 0.003924502436988381
Epoch 1300==> Loss = 0.0037749583660765355
Epoch 1400==> Loss = 0.0036381007146445883
Epoch 1500==> Loss = 0.003512206754388157
Epoch 1600==> Loss = 0.003395939664905003
Epoch 1700==> Loss = 0.0032882394944502287
Epoch 1800==> Loss = 0.003188248808019038
Epoch 1900==> Loss = 0.0030952605376239587
Epoch 2000==> Loss = 0.0030086802265342025
Epoch 2100==> Loss = 0.002927998038185063
Epoch 2200==> Loss = 0.002852767870926776
Epoch 2300==> Loss = 0.002782591945556029
Epoch 2400==> Loss = 0.002717109686112

In [16]:
# Accuracy for training set.
train_preds = predict(X_train, w, b)
accuracy(y_train, train_preds)

# Accuracy for test set.
# Flattening and normalizing.
test_preds = predict(X_test, w, b)
accuracy(y_test, test_preds)

'99.9515503875969%'

In [17]:
x = X[5115]
predict_top(x, w, b, 5)

(array([1.00000000e+00, 1.28953552e-17, 4.90982970e-19, 4.85313488e-19,
        3.56477959e-19]),
 [41, 35, 4, 0, 10],
 {'Covid': '100.0%',
  'Arthritis': '1.2895355175031855e-15%',
  'Drug Reaction': '4.909829702074874e-17%',
  'Fungal infection': '4.8531348784232774e-17%',
  'Hypertension ': '3.564779591314934e-17%'})

In [18]:
def create_random_x():
    n = random.randint(10,30)
    random_index = random.sample(range(1, len(X[5] - 1)), n)
    zero_matrix = np.zeros(len(X[5]))
    for i in random_index:
        zero_matrix[i] = 1
    return n, zero_matrix

In [19]:
n, k = create_random_x()
n, k
predict_top(k, w, b, 5)

(array([0.26402925, 0.12136285, 0.1189559 , 0.11545562, 0.10302569]),
 [31, 25, 13, 4, 41],
 {'Hypothyroidism': '26.40292494295153%',
  'Tuberculosis': '12.136285338762296%',
  'Paralysis (brain hemorrhage)': '11.89558971231476%',
  'Drug Reaction': '11.545562201626975%',
  'Covid': '10.302569435421411%'})

In [20]:
#train model with sklearn
import numpy as np 
from mnist import MNIST
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn.metrics import accuracy_score

# train
logreg = linear_model.LogisticRegression(C=1e5, solver = 'lbfgs', multi_class = 'multinomial')
logreg.fit(X_train, y_train)

# test
y_pred = logreg.predict(X_test)
print ("Accuracy: %.2f %%" %(100*accuracy_score(y_test, y_pred.tolist())))

Accuracy: 100.00 %


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
