In [4]:
"""Volume 2: Non-negative Matrix Factorization."""

import numpy as np
import cvxpy as cp
from matplotlib import pyplot as plt
import os
from imageio import imread
import warnings
warnings.filterwarnings("ignore")

from sklearn.decomposition import NMF
from sklearn.metrics import mean_squared_error as mse

In [32]:
#Problems 1-2
class NMFRecommender:

    def __init__(self,random_state=15,rank=3,maxiter=200,tol=1e-3):
        """The parameter values for the algorithm"""
        self.random_state = random_state
        self.rank = rank
        self.maxiter = maxiter
        self.tol = tol
        
    
    def initialize_matrices(self,m,n):
        """randomly initialize the W and H matrices,"""
        np.random.seed(self.random_state)
        self.m = m
        self.n = n
        # Set k using the rank and generate rand W & H
        k = self.rank
        self.W = np.random.random((m, k))
        self.H = np.random.random((k, n))
        return self.W, self.H
      
    def fit(self,V):
        """Fits W and H weight matrices using CVXPY"""
        # Initialize matrices
        m, n = V.shape
        self.initialize_matrices(m, n)

        iters = 0
        w1 = cp.Variable((self.m, self.rank), nonneg = True)
        h1 = cp.Variable((self.rank, self.n), nonneg = True)

        # First run for optimizing W
        obj_w = cp.Minimize(cp.norm(V - w1 @ self.H, 'fro'))
        prob = cp.Problem(obj_w)
        prob.solve()

        # First run for optimizing H
        obj_h = cp.Minimize(cp.norm(V - self.W @ h1, 'fro'))
        prob = cp.Problem(obj_h)
        prob.solve()

        while np.linalg.norm(self.W - w1.value, 'fro') < self.tol and np.linalg.norm(self.H - h1.value, 'fro') < self.tol:
            self.W = w1.value
            self.H = h1.value
            
            # Break if past max iters
            iters += 1
            if iters > self.maxiter:
                break

            w1 = cp.Variable((self.m, self.rank), nonneg = True)
            h1 = cp.Variable((self.rank, self.n), nonneg = True)

            # Run Optimizing W
            obj_w = cp.Minimize(cp.norm(V - w1 @ self.H, 'fro'))
            prob = cp.Problem(obj_w)
            prob.solve()
            self.W = w1.value

            # Run Optimizing H
            obj_h = cp.Minimize(cp.norm(V - self.W @ h1, 'fro'))
            prob = cp.Problem(obj_h)
            prob.solve()
            self.H = h1.value

        return self.W, self.H



    def reconstruct(self):
        """Reconstruct V matrix for comparison against the original V"""
        return self.W @ self.H


In [25]:
def prob3():
    """Run NMF recommender on the grocery store example"""
    V = np.array(
        [
            [0, 1, 0, 1, 2, 2],
            [2, 3, 1, 1 ,2, 2],
            [1, 1, 1, 0, 1, 1],
            [0, 2, 3, 4, 1, 1],
            [0, 0, 0, 0, 1, 0]
        ]
    )
    nmf = NMFRecommender()
    W, H = nmf.fit(V)
    num_of_people = np.sum(H[1, :] - H[0, :] > 0)
    return W, H, num_of_people

In [33]:
W, H, num = prob3()
print(W @ H)
print(num)

[[0.27069309 0.28570643 0.15361423 0.14090288 0.36825654 0.90334652]
 [0.42528749 0.34493434 0.43260642 0.31309811 0.61766068 0.74393628]
 [0.22988559 0.29853625 0.21860497 0.20066576 0.4090827  0.41317508]
 [0.51754975 0.77705221 0.58834932 0.55656116 1.0592767  0.52578995]
 [0.84922211 0.89267417 0.8779088  0.71088018 1.39585751 1.34869212]]
5


In [3]:

#get data
def get_faces(path="./faces94"):
    """Traverse the specified directory to obtain one image per subdirectory.
    Flatten and convert each image to grayscale.

    Parameters:
        path (str): The directory containing the dataset of images.

    Returns:
        ((mn,k) ndarray) An array containing one column vector per
            subdirectory. k is the number of people, and each original
            image is mxn.
    """
    # Traverse the directory and get one image per subdirectory.
    faces = []
    for (dirpath, dirnames, filenames) in os.walk(path):
        for fname in filenames:
            if fname[-3:]=="jpg":       # Only get jpg images.
                # Load the image, convert it to grayscale,
                # and flatten it into a vector.
                faces.append(np.ravel(imread(dirpath+"/"+fname, as_gray=True)))
                break
    # Put all the face vectors column-wise into a matrix.
    return np.transpose(faces)

def show(image, m=200, n=180, plt_show=False):
    """Plot the flattened grayscale 'image' of width 'w' and height 'h'.

    Parameters:
        image ((mn,) ndarray): A flattened image.
        m (int): The original number of rows in the image.
        n (int): The original number of columns in the image.
        plt_show (bool): if True, call plt.show() at the end
    """
    #scale image
    image = image / 255
    #reshape image
    image = np.reshape(image,(m,n))
    #show image
    plt.imshow(image,cmap = "gray")
    
    if plt_show:
        plt.show()


In [4]:
def prob4():
    """
        Gridsearch over rank, alpha and l1_ratio values to reconstruct 
        image using NMF. Plot all reconstructed images.
    """
    raise NotImplementedError("Problem 3 incomplete")


In [None]:
def prob5():
    '''
        find the 10 basis faces with the largest coefficients 
        corresponding to the the second and twelfth face in the dataset. 
        Plot these basis faces along with the original image using 
        subplots
    '''
    raise NotImplementedError("Problem 4 incomplete")