In [None]:
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt


In [None]:
LOWRANK_APPROX = "svd" # svd or nmf

In [None]:
ROOT = ".."
g_df = pd.read_csv(f"{ROOT}/data/recommend/example.csv", index_col=[0])
sns.heatmap(g_df.values)

In [None]:
from recomm_misc import plot_svd_sdiag
plot_svd_sdiag(g_df.values)

In [None]:
from sklearn.decomposition import NMF

def make_recom_svd(df, nrank):
    """line up candicates by SVD. 特異値分解

    Args:
        df (pd.DataFrame): data
        nrank (int): the maximum rank to reconstruct data

    Returns:
        pd.DataFrame: reconstruct data
    """
    X = df.values
    u, sdiag, v = np.linalg.svd(X)
    s = np.zeros((u.shape[1], v.shape[0]))
    s[:nrank, :nrank] = np.diag(sdiag[:nrank])
    u = np.matrix(u)
    v = np.matrix(v)
    s = np.matrix(s)
    recom_svd = u * s * v
    return pd.DataFrame(recom_svd, index=df.index, columns=df.columns)


def make_recom_nmf(df, nrank, random_state=1):
    """line up candicates by NMF, 非負値行列分解.

    Args:
        df (pd.DataFrame): data
        nrank (int): the maximum rank to reconstruct data

    Returns:
        pd.DataFrame: reconstruct data
    """
    X = df.values
    model = NMF(n_components=nrank, init='random', random_state=random_state)
    W = model.fit_transform(X)
    H = model.components_
    W = np.matrix(W)
    H = np.matrix(H)
    WH = W*H
    """
    check how they are the same
    """
    if False:
        WHM = WH - X
        for i in range(WHM.shape[0]):
            for j in range(WHM.shape[1]):
                if np.abs(WHM[i, j]) > 0.1:
                    print(i, j, WHM[i, j])

    recom_nmf = WH
    return pd.DataFrame(recom_nmf, index=df.index, columns=df.columns)


def make_recom_correlation(df, nrank=None):
    """line up candicates by correlation

     X[material , structuretype]とすると
    ( X.T * X )[structuretype,structuretype] でstructuretype間の相関を与えるだろう．
    更にXをかけると[material, structuretype]の行列になる．
    recom = X[material , structuretype] * ( X.T * X )[structuretype,structuretype]
    
    Args:
        df (pd.DataFrame): data
        nrank (int): not used. Defaults to None.

    Returns:
        pd.DataFrame: reconstruct data
    """
    # nrank はdummy
    X = np.matrix(df.values)

    recom = X * X.T * X
    # X^3のオーダーになっているので[0,1]に規格化する．
    vmax = recom.reshape(-1).max()
    vmin = recom.reshape(-1).min()
    recom = (recom - vmin)/(vmax-vmin)

    return pd.DataFrame(recom, index=df.index, columns=df.columns)

In [None]:
from recomm_misc import plot_2df
for _nrank in [1, 3, 5, 7]:
    if LOWRANK_APPROX== "svd":
        g_df_tranform = make_recom_svd(g_df, _nrank)
    elif LOWRANK_APPROX == "nmf":
        g_df_tranform = make_recom_nmf(g_df,_nrank)
    plot_2df(g_df, g_df_tranform, _nrank, threshold=0.2)

![](image_text/160_10.png)