# NMF and Perceptron for Twitter Data Classification

In [4]:
import numpy as np
from scipy.io import loadmat
import matplotlib.pyplot as plt
from scipy import linalg as LA
import scipy.io.wavfile as wav
import librosa
from IPython.display import Audio


def B_update(B, T, X):
    ones = np.ones((len(B), len(B)))
    BT = np.dot(B, T)
    Tt = np.transpose(T)
    num1 = np.divide(X, BT + 1e-20)
    num = np.dot(num1, Tt)
    B = B * num

    den = np.dot(ones, B)
    B = np.divide(B, den + 1e-20)

    return B


def T_update(B, T, X):
    ones = np.ones((len(B[0]), len(B[0])))
    BT = np.dot(B, T)
    Bt = np.transpose(B)
    num1 = np.divide(X, BT + 1e-20)
    num = np.dot(Bt, num1)
    T = num * T

    den = np.dot(ones, T)
    T = np.divide(T, den + 1e-20)

    return T


def cost_fun(X, X_hat):
    E = np.abs(X - X_hat)
    return np.average(E)


def W_init(rows, col):
    input_dim = rows
    output_dim = col

    std_dev = np.sqrt(2 / (input_dim + output_dim))
    W = np.random.randn(input_dim, output_dim) * std_dev

    return W


def softmax(x):
    exp_scores = np.exp(x)
    temp = exp_scores / np.sum(exp_scores, axis=0, keepdims=True)
    return temp

def perceptron(X, Y, l_rate, epochs, tol):
    weights = W_init(len(X), len(Y))  # 50 x 3
    biases = np.random.randn(len(Yte), 1)  # 3

    for i in range(epochs):
        # Forward pass
        out = np.dot(weights.T, X) + biases
        out_prob = softmax(out)

        loss = -np.sum(Y * np.log(out_prob)) / len(X)

        dW = np.dot(X, (out_prob - Y).T) / len(X[0])
        db = np.sum(out_prob - Y, axis=1) / len(X[0])
        db = db.reshape(-1, 1)

        if np.abs(np.mean(loss)) < tol:
            break

        weights -= l_rate * dW
        biases -= l_rate * db

    return weights, biases


if __name__ == '__main__':
    file = loadmat('/content/twitter.mat')
    Xtr = file["Xtr"]
    Xte = file["Xte"]
    Ytr = file["YtrMat"]
    Yte = file["YteMat"]
    K = 50

    seed_value = 1 
    rng = np.random.default_rng(seed_value)
    lower_bound = 0.0
    upper_bound = 1.0

    B = rng.uniform(lower_bound, upper_bound, size=(len(Xtr), K))
    T = rng.uniform(lower_bound, upper_bound, size=(K, len(Xtr[0])))
    TY = rng.uniform(lower_bound, upper_bound, size=(K, len(Xte[0])))

    tol = 1e-2
    max_iteration = 600

    for i in range(max_iteration):
        B = B_update(B, T, Xtr)
        T = T_update(B, T, Xtr)
        S_hat = np.dot(B, T)
        error = cost_fun(Xtr, S_hat)
        if error < tol:
            break

    for i in range(max_iteration):
        TY = T_update(B, TY, Xte)
        Y_hat = np.dot(B, TY)
        error2 = cost_fun(Xte, Y_hat)
        if error2 < tol:
            break

    weights, bias = perceptron(T, Ytr, l_rate=0.1, epochs=2000, tol=1e-3)  ##

    out = np.dot(weights.T, TY) + bias
    out_prob = softmax(out)
    pred = np.argmax(out_prob, axis=0)
    label = np.argmax(Yte, axis=0)

    correct = np.sum(np.where(pred == label, 1, 0))
    accuracy = (correct / len(Yte[0]))*100

    print("Classification accuracy is: ", accuracy)

Classification accuracy is:  56.994818652849744
