In [1]:
%matplotlib inline

import os, re, glob
import numpy as np
import pandas as pd
from tqdm import tqdm
from scipy.linalg import svd
from os.path import join as opj
from kneed import KneeLocator

import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style('white')
sns.set_context('talk')

Load SVD outputs:

In [2]:
U = np.loadtxt('outputs/svd/U.txt')
s = np.loadtxt('outputs/svd/s.txt')
Vh = np.loadtxt('outputs/svd/Vh.txt')

print(U.shape)
print(s.shape)
print(Vh.shape)

(8492, 2991)
(2991,)
(2991, 2991)


Plot threshold:

In [None]:
knee = KneeLocator(range(len(s)), s, curve='convex', direction='decreasing')
r = knee.knee
fig,(ax1,ax2) = plt.subplots(1,2, figsize=(12,6))

# Plotting the knee
ax1.plot(s)
ax1.axvline(knee.knee, linestyle='--', color='#aaaaaa')
ax1.text(r+200, np.max(s)/2, 'i = %i' % r, size=18)
ax1.set(xlabel = 'i', ylabel = 'Singular values')

# Variance explained
var_exp = np.cumsum(s)/np.sum(s)
ax2.plot(var_exp)
ax2.axhline(var_exp[r], linestyle='--', color='gray')
ax2.axvline(r, linestyle='--', color='gray')
ax2.text(r+200, var_exp[r]-0.1, '%0.2f' % var_exp[r], size=18)
ax2.set(xlabel = 'i', ylabel = 'Variance explained')

sns.despine()

plt.savefig('outputs/svd/truncate_threshold.png')

Truncate:

In [4]:
U_hat = U[:,:r]
print(U_hat.shape)

s_hat = s[:r]
print(s_hat.shape)

      
Vh_hat = Vh[:r,:]
print(Vh_hat.shape)

(8492, 95)
(95,)
(95, 2991)
