In [1]:
# Code to enable this notebook to import from libraries
import os
import sys
module_path = os.path.abspath(os.path.join('..\..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.cm as cm
from matplotlib.colors import ListedColormap
import seaborn as sns
import csv
from scripts.mockUtilities import *
from scripts.utilities import *

In [3]:
import pandas as pd
from IPython.display import display, HTML
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.cm as cm
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import pywt
import pywt.data
from sklearn.cluster import SpectralClustering
from matplotlib.colors import ListedColormap
from scipy.sparse import csr_matrix
from scipy.io import mmread
from sklearn.mixture import GaussianMixture
%matplotlib inline

In [4]:
# Set seed for reproducibility
np.random.seed(100)

## Loading data

In [5]:
# Loading data
raws = pd.read_csv('std_raws.csv', index_col=0)
df = raws.copy()
grid_size = 32
y_index, x_index = np.divmod(np.arange(grid_size * grid_size), grid_size)
df.columns = [f'gene_indicator_{i}' for i in range(df.shape[1])]
df.index = pd.MultiIndex.from_arrays([y_index, x_index], names=['y_index', 'x_index'])
grid_rna = df

## Processing data

In [6]:
N_GENES = raws.shape[1]
N_Y_INDICES = 32
N_X_INDICES = N_Y_INDICES

In [7]:
result_rna_unnormalised = grid_rna.copy()
result_rna_unnormalised['feature'] = result_rna_unnormalised.apply(lambda row: row.tolist(), axis=1)
result_rna_unnormalised = result_rna_unnormalised.drop(result_rna_unnormalised.columns[:-1], axis=1)
result_rna_unnormalised = result_rna_unnormalised.reset_index()

In [8]:
result_rna = result_rna_unnormalised.copy()
matrix_standardized = raws.to_numpy()
standardized_features = matrix_standardized.tolist()
result_rna['feature'] = standardized_features
assert np.allclose(matrix_standardized.mean(axis=0),np.array([0.0]*N_GENES),rtol=0.001)
assert np.allclose(matrix_standardized.std(axis=0),np.array([1.0]*N_GENES),rtol=0.001)

In [9]:
assert np.allclose(matrix_standardized.mean(axis=0),np.array([0.0]*N_GENES),rtol=0.001)
assert np.allclose(matrix_standardized.std(axis=0),np.array([1.0]*N_GENES),rtol=0.001)

In [10]:
N_FACTORS = 9 # M in thesis

## Run PCA

In [11]:
n_components = N_FACTORS
feats = np.vstack(result_rna['feature'].to_numpy())
pca = PCA(n_components=n_components)
flattened_L_pca = pca.fit_transform(feats)
F_pca = pca.components_ # F matrix, factor vs feature

L_pca = [np.zeros((N_Y_INDICES, N_X_INDICES)) for _ in range(n_components)]
for i, row in result_rna.iterrows():
    y_idx = row['y_index']
    x_idx = row['x_index']
    
    for component_index in range(n_components):
        L_pca[component_index][y_idx, x_idx] = flattened_L_pca[i, component_index]

LF_pca = pca.inverse_transform(flattened_L_pca).transpose()

## Run WaviFM

In [12]:
from IPython.display import display, HTML
import matplotlib.cm as cm
from matplotlib.colors import ListedColormap
from sklearn.mixture import GaussianMixture
from scripts.cavi_plot_utilities import *
from scripts.cavi import *
import copy
from scripts.cavi_utilities import *
from scripts.cavi_evaluation import *
from scripts.utilities import *
from scripts.run_wavifm import *

%matplotlib inline

In [13]:
N_LENGTH_SCALES = 3 # D in thesis
n_spots = N_X_INDICES*N_Y_INDICES

In [14]:
results = run_wavifm(
    result_rna=result_rna,
    n_length_scales=N_LENGTH_SCALES,
    n_factors=N_FACTORS,
    n_x_indices=N_X_INDICES,
    n_y_indices=N_Y_INDICES,
    max_iterations=1000,
    relative_elbo_threshold=0.0001,
    n_init=10
)
param_results = results["parameters"]

Initialisation 1:
	ELBO = -429264.1018705442
	#Iterations = 167
	Time taken (s) = 128.62983441352844
Initialisation 2:
	ELBO = -421105.83209152013
	#Iterations = 50
	Time taken (s) = 40.12436509132385
Initialisation 3:
	ELBO = -421030.0976647729
	#Iterations = 107
	Time taken (s) = 84.61430740356445
Initialisation 4:
	ELBO = -421006.38653507724
	#Iterations = 41
	Time taken (s) = 32.96919131278992
Initialisation 5:
	ELBO = -421156.76573171944
	#Iterations = 67
	Time taken (s) = 53.268216133117676
Initialisation 6:
	ELBO = -420799.03267785226
	#Iterations = 61
	Time taken (s) = 47.78884291648865
Initialisation 7:
	ELBO = -421185.7022203619
	#Iterations = 49
	Time taken (s) = 38.864450216293335
Initialisation 8:
	ELBO = -442898.8812193268
	#Iterations = 143
	Time taken (s) = 106.66841530799866
Initialisation 9:
	ELBO = -421301.7044612562
	#Iterations = 48
	Time taken (s) = 38.080350160598755
Initialisation 10:
	ELBO = -421009.3611918543
	#Iterations = 118
	Time taken (s) = 94.49440002441

## Processing results

In [15]:
L_means = variational_approx_posterior_mean_L(param_results)
F_means = variational_approx_posterior_mean_F(param_results)
pi_means = variational_approx_posterior_mean_pi(param_results)
eta_means = variational_approx_posterior_mean_eta(param_results)

Get factor activities in spatial domain

In [16]:
def square_flattened_matrix(flattened_matrix):
    flattened_matrix_arr = np.array(flattened_matrix)
    n = int(np.sqrt(len(flattened_matrix_arr)))
    square_matrix = flattened_matrix_arr.reshape((n, n))
    return square_matrix

In [17]:
L_means_formatted = copy.deepcopy(L_means)
n_factors, n_features = np.array(param_results["mu_F"]).shape
mean_values = copy.deepcopy(param_results["mu_L"])

for l in range(n_factors):
    for i in range(len(param_results["mu_L"][l])):
        for j in range(len(param_results["mu_L"][l][i])):
            L_means_formatted[l][i][j] = square_flattened_matrix(L_means[l][i][j])

for l in range(n_factors):
    L_means_formatted[l][0] = square_flattened_matrix(L_means[l][0][0])

In [18]:
# Inverse Wavelet Transform on the factor loadings
idwt_L_means = [None]*n_factors

for l in range(n_factors):
    print(f"factor {l}")
    idwt_L_means[l] = pywt.waverec2(L_means_formatted[l], 'haar')

factor 0
factor 1
factor 2
factor 3
factor 4
factor 5
factor 6
factor 7
factor 8


## Exporting results

In [19]:
# Export inferred factor activities
np.savez('idwt_L_meansWaviFM.npz', *idwt_L_means)
np.savez('F_meansWaviFM.npz', *F_means)