# Normalize inputs to SVD using TF-IDF
Natalia Vélez, July 2020

In [1]:
import numpy as np

In [11]:
def tf(row): return row/np.sum(row)

def idf(col):
    N = len(col)
    df = np.sum(col > 0)+1
    idf_val = np.log(N/df)+1

    return np.ones(col.shape)*idf_val

def tf_idf(m):
    m_tf = np.apply_along_axis(tf, 1, m)
    m_idf = np.apply_along_axis(idf, 0, m)
    m_norm = np.multiply(m_tf, m_idf)
    
    return m_norm

# Example 1: TF-IDF example on Wikipedia

In [3]:
bag_o_words = np.array([[1,1,2,1,0,0],[1,1,0,0,2,3]])
bag_o_words.shape

(2, 6)

In [4]:
word_tf = np.apply_along_axis(tf, 1, bag_o_words)
word_tf

array([[0.2       , 0.2       , 0.4       , 0.2       , 0.        ,
        0.        ],
       [0.14285714, 0.14285714, 0.        , 0.        , 0.28571429,
        0.42857143]])

In [5]:
word_idf = np.apply_along_axis(idf, 0, bag_o_words)
word_idf

array([[0.59453489, 0.59453489, 1.        , 1.        , 1.        ,
        1.        ],
       [0.59453489, 0.59453489, 1.        , 1.        , 1.        ,
        1.        ]])

In [6]:
np.multiply(word_tf, word_idf)

array([[0.11890698, 0.11890698, 0.4       , 0.2       , 0.        ,
        0.        ],
       [0.08493356, 0.08493356, 0.        , 0.        , 0.28571429,
        0.42857143]])

In [12]:
tf_idf(bag_o_words)

array([[0.11890698, 0.11890698, 0.4       , 0.2       , 0.        ,
        0.        ],
       [0.08493356, 0.08493356, 0.        , 0.        , 0.28571429,
        0.42857143]])