# Test Python installation
All modules should be pre-installed if you are running a container of the analysisrmp image.

In [None]:
import pandas as pd
import numpy as np
from contrastive import CPCA
import umap as u
from scipy import stats
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
def read_pickle_file(file):
    """Read and return content of a pickle file"""
    pickle_data = pd.read_pickle(file)
    return pickle_data

def contrastive_pca(background, foreground, alpha = np.log10(0.5), n = 50):
   """Perform a contrastive PCA to maximize variance in foreground data and minimize variance of
   background data, for a given tradeoff parameter alpha, and return best n axes
   """
   background_data = np.array(background)
   foreground_data = np.array(foreground)
   assert foreground_data.shape[1]==background_data.shape[1]
   mdl = CPCA(n_components=n)
   projected_data = mdl.fit_transform(foreground_data, background_data, alpha_selection='manual', alpha_value=alpha)
   return(projected_data)

def umap(dataFrame, labels = None, n = 2, neighbors = 5, min_dist = 0.3, metric = 'correlation', state = 42):
   """Perform a Uniform Manifold Approximation and Projection"""
   data = np.array(dataFrame)
   n_neighbors = int(neighbors)
   n_components = int(n)
   state = int(state)
   dUmap = u.UMAP(n_neighbors=n_neighbors, min_dist=min_dist, metric=metric, n_components=n_components, random_state=state)
   if labels:
      y = np.array(labels)
      print(type(y))
      print(y.shape)
      return(dUmap.fit_transform(data, y))
   return(dUmap.fit_transform(data))

def umap_fit(dataFrame, labels = None, n = 2, neighbors = 5, min_dist = 0.3, metric = 'correlation', state = 42):
   """Fits a UMAP to data"""
   data = np.array(dataFrame)
   n_neighbors = int(neighbors)
   n_components = int(n)
   state = int(state)
   dUmap = u.UMAP(n_neighbors=n_neighbors, min_dist=min_dist, metric=metric, n_components=n_components, random_state=state)
   if labels:
      y = np.array(labels)
      print(type(y))
      print(y.shape)
      return(dUmap.fit(data, y))
   return(dUmap.fit(data))

def umap_transform(umapObject, dataFrame):
    """Embed data based on a given UMAP fit
    """
    data = np.array(dataFrame)
    return(umapObject.transform(data))

def uncorrelate(dataFrame, orderCol = None, threshold = 0.8):
  """Returns column  of 'dataFrame' that are never pairwise-correlated more than 'threshold',
     prioritizing columns by a giver order 'orderCol' (defaults to left to right).
     (!) For use with reticulate, note that orderCol should be using 0-based indices.
  """
  data = np.array(dataFrame)
  if not orderCol:
    orderCol = range(data.shape[1])
  # Columns to sort
  L1 = np.array(orderCol, dtype=np.int32)
  # Sorted columns to keep
  L2 = []
  while L1.size > 0:
    refFt = L1[0]
    L2.append(refFt)
    L1 = L1[1:]
    stillToKeep = []
    for ift, ft in enumerate(L1):
      if stats.pearsonr(data[:,refFt], data[:,ft])[0] < threshold:
        stillToKeep.append(ift)
    L1 = L1[stillToKeep]
  return(L2)


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
import umap
%matplotlib inline

In [None]:
sns.set(style='white', context='poster', rc={'figure.figsize':(14,10)})
np.random.seed(42)
data = np.random.rand(800, 4)
fit = umap.UMAP()
u = fit.fit_transform(data)
plt.scatter(u[:,0], u[:,1], c=data)
plt.title('UMAP embedding of random colours');

In [None]:
!pip freeze

You should see the following:

alembic==1.0.8   
asn1crypto==0.24.0    
async-generator==1.10  
attrs==19.1.0   
backcall==0.1.0  
beautifulsoup4==4.7.1  
bleach==3.1.0  
blinker==1.4  
bokeh==1.0.4  
certifi==2019.3.9  
certipy==0.1.3  
cffi==1.12.3  
chardet==3.0.4  
Click==7.0  
cloudpickle==0.8.1  
conda==4.6.14  
contrastive==1.0.0  
cryptography==2.6.1  
cycler==0.10.0  
Cython==0.29.7  
cytoolz==0.9.0.1  
dask==1.1.5  
decorator==4.4.0  
defusedxml==0.5.0  
dill==0.2.9  
distributed==1.28.0  
entrypoints==0.3  
fastcache==1.1.0  
gmpy2==2.0.8  
h5py==2.9.0  
heapdict==1.0.0  
idna==2.8  
imageio==2.5.0  
ipykernel==5.1.0  
ipython==7.5.0  
ipython-genutils==0.2.0  
ipywidgets==7.4.2  
jedi==0.13.3  
Jinja2==2.10.1  
jsonschema==3.0.1  
jupyter-client==5.2.4  
jupyter-core==4.4.0  
jupyterhub==1.0.0  
jupyterlab==0.35.5  
jupyterlab-server==0.2.0  
kiwisolver==1.1.0  
llvmlite==0.27.1  
locket==0.2.0  
Mako==1.0.7  
MarkupSafe==1.1.1  
matplotlib==3.0.3  
mistune==0.8.4  
mpmath==1.1.0  
msgpack==0.6.1  
nbconvert==5.5.0  
nbformat==4.4.0  
networkx==2.3  
notebook==5.7.8  
numba==0.42.1  
numexpr==2.6.9  
numpy==1.15.4  
oauthlib==3.0.1  
olefile==0.46  
packaging==19.0  
pamela==1.0.0  
pandas==0.24.2  
pandocfilters==1.4.2  
parso==0.4.0  
partd==0.3.9  
patsy==0.5.1  
pexpect==4.7.0  
pickleshare==0.7.5  
Pillow==6.0.0  
prometheus-client==0.6.0  
prompt-toolkit==2.0.9  
protobuf==3.7.1  
psutil==5.6.2  
ptyprocess==0.6.0  
pycosat==0.6.3  
pycparser==2.19  
pycurl==7.43.0.2  
Pygments==2.4.0  
PyJWT==1.7.1  
pyOpenSSL==19.0.0  
pyparsing==2.4.0  
pyrsistent==0.15.2  
PySocks==1.6.8  
python-dateutil==2.8.0  
python-editor==1.0.4  
pytz==2019.1  
PyWavelets==1.0.3  
PyYAML==5.1  
pyzmq==18.0.1  
requests==2.21.0  
rpy2==2.9.4  
ruamel-yaml==0.15.71    
scikit-image==0.14.2  
scikit-learn==0.20.3  
scipy==1.2.1  
seaborn==0.9.0  
Send2Trash==1.5.0  
six==1.12.0  
sklearn==0.0  
sortedcontainers==2.1.0  
soupsieve==1.9.1  
SQLAlchemy==1.3.3  
statsmodels==0.9.0  
sympy==1.3  
tblib==1.3.2  
terminado==0.8.2  
testpath==0.4.2  
toolz==0.9.0  
tornado==6.0.2  
traitlets==4.3.2  
tzlocal==1.5.1  
umap-learn==0.3.6  
urllib3==1.24.2  
vincent==0.4.4  
wcwidth==0.1.7  
webencodings==0.5.1  
widgetsnbextension==3.4.2  
xlrd==1.2.0  
zict==0.1.4  