In [None]:
!pip install plotly

In [None]:
import numpy as np
import pandas as pd
import numba
import sklearn.datasets
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.mplot3d import Axes3D
import umap
import plotly.express as px

%matplotlib inline

In [None]:
sns.set(style='white', rc={'figure.figsize':(10,10)})

The dataset was obtained as processed in German, Vulliard et al., adding the following block to `LT_ARPC1B.ipynb`:

    write.csv(file = "Tab/Export_ARPC1B_filtered.csv",
    x = data.frame(Row = as.factor(LT$Metadata_Row[fieldToKeep]), 
                   Col = as.factor(LT$Metadata_Column[fieldToKeep]),
                   URL = as.factor(LT$URL_Actin[fieldToKeep]),
                   Coating = as.factor(LT$Coating[fieldToKeep]),
                   Donor = as.factor(LT$Donor[fieldToKeep]),
                   transformedLT))

In [None]:
# Number of points in control dataset
NR = 3000
# Number of points in other datasets
N = 1000

## Load and visualize original dataset

In [None]:
X = pd.read_csv("Export_ARPC1B_filtered.csv", index_col = list(range(6)))

In [None]:
X.shape

In [None]:
plane_mapper = umap.UMAP(random_state=42, min_dist = 1, spread = 10, n_epochs = 200).fit(X)

In [None]:
y = X.index.get_level_values('Donor').astype('category').values.codes
plt.scatter(plane_mapper.embedding_.T[0], plane_mapper.embedding_.T[1], 
            c=y, cmap='Spectral')

In [None]:
dfX = pd.DataFrame({'x': plane_mapper.embedding_.T[0], 
                    'y': plane_mapper.embedding_.T[1], 
                    'c': X.index.get_level_values('Donor')})
fig = px.scatter(dfX, x='x', y='y', color='c')
fig.show()

## Dataset 1 - Reference R_ARPC1B

In [None]:
x,y = plane_mapper.embedding_.T
rangeX = (min(x), max(x))
rangeY = (min(y), max(y))
xRef = x[X.index.get_level_values('Donor') == "ND1"]
yRef = y[X.index.get_level_values('Donor') == "ND1"]
rangeRefX = (min(xRef), max(xRef))
rangeRefY = (min(yRef), max(yRef))

In [None]:
# We generate random points corresponding to a reference distribution
refPts = np.random.uniform([rangeRefX[0], rangeRefY[0]], 
                           [rangeRefX[1], rangeRefY[1]], [NR,2])

In [None]:
# We then convert back these points to the original dimensions
trRefPts = plane_mapper.inverse_transform(refPts)
trRefPts.shape

In [None]:
trRefPts = pd.DataFrame(trRefPts)
trRefPts.to_csv("Data/matR_ARPC1B.csv", sep = ',', header = False, index = False)

## Dataset 2 - Negative control N

In [None]:
# We sample random points from the reference distribution
nPts = np.random.uniform([rangeRefX[0], rangeRefY[0]], 
                         [rangeRefX[1], rangeRefY[1]], [N,2])

In [None]:
# We then convert back these points to the original dimensions
trNPts = plane_mapper.inverse_transform(nPts)
trNPts.shape

In [None]:
trNPts = pd.DataFrame(trNPts)
trNPts.to_csv("Data/matN_ARPC1B.csv", sep = ',', header = False, index = False)

## Dataset 3 - Positive control P

In [None]:
# We sample from the whole morphological space
pPts = np.random.uniform([rangeX[0], rangeY[0]], 
                         [rangeX[1], rangeY[1]], [N,2])

In [None]:
# We then convert back these points to the original dimensions
trPPts = plane_mapper.inverse_transform(pPts)
trPPts.shape

In [None]:
trPPts = pd.DataFrame(trPPts)
trPPts.to_csv("Data/matP_ARPC1B.csv", sep = ',', header = False, index = False)