# **PCA** using STAN
---

In [1]:
# Load packages used in this notebook
import os
import json
import shutil
import urllib.request
import pandas as pd
import numpy as np
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [2]:
# Install package CmdStanPy
!pip install --upgrade cmdstanpy



In [3]:
# Install pre-built CmdStan binary
# (faster than compiling from source via install_cmdstan() function)
tgz_file = 'colab-cmdstan-2.23.0.tar.gz'
tgz_url = 'https://github.com/stan-dev/cmdstan/releases/download/v2.23.0/colab-cmdstan-2.23.0.tar.gz'
if not os.path.exists(tgz_file):
    urllib.request.urlretrieve(tgz_url, tgz_file)
    shutil.unpack_archive(tgz_file)

In [4]:
# Specify CmdStan location via environment variable
os.environ['CMDSTAN'] = './cmdstan-2.23.0'
# Check CmdStan path
from cmdstanpy import CmdStanModel, cmdstan_path
cmdstan_path()

'cmdstan-2.23.0'

In [5]:
df = pd.read_csv('/content/drive/MyDrive/SEARCH/ST001932_AN003142_Results.txt', sep = "\t")

In [6]:
dtf = df.filter(regex='metabolite|00_1')
dtf = dtf.set_index('metabolite_name').T

In [7]:
model_string = '/content/drive/MyDrive/Github_rep/colab_python/stan_compute/ppca.stan'

In [8]:
idata = dict(N = 1796, D = 7055, M = 2, x = np.array(dtf))

In [10]:
# Compile model ppca.stan
ppca_model = CmdStanModel(stan_file = model_string)

# Condition data
ppca_fit = ppca_model.sample(data = idata, seed = 123)

DEBUG:cmdstanpy:found newer exe file, not recompiling
DEBUG:cmdstanpy:input tempfile: /tmp/tmpejw4sufw/y26hi7gh.json
DEBUG:cmdstanpy:cmd: /content/drive/MyDrive/Github_rep/colab_python/stan_compute/ppca info
cwd: None
DEBUG:cmdstanpy:Command ['/content/drive/MyDrive/Github_rep/colab_python/stan_compute/ppca', 'info']
	error during processing Machine is not on the network
20:27:33 - cmdstanpy - INFO - CmdStan start processing
INFO:cmdstanpy:CmdStan start processing


chain 1 |          | 00:00 Status

chain 2 |          | 00:00 Status

chain 3 |          | 00:00 Status

chain 4 |          | 00:00 Status

DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: 1
DEBUG:cmdstanpy:CmdStan args: ['/content/drive/MyDrive/Github_rep/colab_python/stan_compute/ppca', 'id=1', 'random', 'seed=123', 'data', 'file=/tmp/tmpejw4sufw/y26hi7gh.json', 'output', 'file=/tmp/tmpejw4sufw/ppcahlwpsb19/ppca-20230719202733_1.csv', 'method=sample', 'algorithm=hmc', 'adapt', 'engaged=1']
DEBUG:cmdstanpy:idx 1
DEBUG:cmdstanpy:running CmdStan, num_threads: 1
DEBUG:cmdstanpy:CmdStan args: ['/content/drive/MyDrive/Github_rep/colab_python/stan_compute/ppca', 'id=2', 'random', 'seed=123', 'data', 'file=/tmp/tmpejw4sufw/y26hi7gh.json', 'output', 'file=/tmp/tmpejw4sufw/ppcahlwpsb19/ppca-20230719202733_2.csv', 'method=sample', 'algorithm=hmc', 'adapt', 'engaged=1']


KeyboardInterrupt: ignored