In [1]:
import os
import numpy as np
import scanpy as sc
import pandas as pd
import glob

path = "/home/olle/PycharmProjects/LODE/workspace/feature_statistics/cell_data"

cell_pd = pd.read_csv(os.path.join(path, "feature_statistics.csv"))



In [2]:
# create adata object
var_names = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15"]

X = np.array(cell_pd[var_names])

obs_id = cell_pd.id
obs_cls = cell_pd.id.str.split("-", expand=True)[0]


adata = sc.AnnData(X=X)
adata.obs["obs_id"] = obs_id.values.tolist()
adata.obs["obs_cls"] = obs_cls.values.tolist()
adata.var["var_name"] = var_names

In [None]:
#### Add train test split to obs

In [3]:
test_image_path = "/home/olle/PycharmProjects/LODE/workspace/feature_statistics/cell_data/OCT2017/test"

test_ids_list = [i.split("/")[-1] for i in glob.glob(test_image_path + "/*/*")]

all_ids = pd.DataFrame(adata.obs["obs_id"])
test_ids = pd.DataFrame(test_ids_list)
test_paths = pd.DataFrame([test_ids_list, glob.glob(test_image_path + "/*/*")]).T

data_split_pd = pd.merge(all_ids, test_ids, left_on="obs_id", right_on=0, how="left")

data_split_pd["split"] = "train"
data_split_pd["split"][~data_split_pd[0].isna()] = "test"

adata.obs["split"] = data_split_pd.split.values.tolist()

# create adata test object
adata_test = adata[adata.obs.split == "test"]

#### add image path to adata object

In [4]:
adata_test.obs["img_path"] = test_image_path + "/" + adata_test.obs.obs_cls + "/" + adata_test.obs.obs_id

Trying to set attribute `.obs` of view, copying.


### Preprocessing

In [None]:
# outlier filtering, remove any images with less 2 then 1000

In [6]:
sc.pl.highest_expr_genes(adata_test, log=True)

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [None]:
sc.pl.violin(adata_test, var_names, log=True)

#### filter_data

In [None]:
adata = adata[adata.X[:, 2] > 1000]

In [None]:
# Log and norm data

In [None]:
sc.pp.log1p(adata_test)

In [None]:
sc.pp.highly_variable_genes(adata_test, min_mean=0.000125, max_mean=10000, min_disp=0.5)
sc.pl.highly_variable_genes(adata_test)


In [None]:
sc.pp.scale(adata_test, max_value=10)


In [None]:
### PCA analysis

In [None]:
sc.tl.pca(adata_test,n_comps=10, svd_solver='arpack')


In [None]:
sc.pl.pca(adata_test, color='obs_cls')


In [None]:
sc.pl.pca_variance_ratio(adata_test, log=True)


In [None]:
##### compte neighbour hood graph

In [None]:
sc.pp.neighbors(adata_test, n_neighbors=100, n_pcs=40)
sc.tl.umap(adata_test)

In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(20,10))
sc.pl.umap(adata_test, color=['obs_cls', "1", "3", "4", "7", "5", "8", "9", "10", "13"])

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
from matplotlib.cbook import get_sample_data
import cv2
import matplotlib.patches as mpatches
%matplotlib widget


def main(data):
    x = data.obsm["X_umap"][:, 0]
    y = data.obsm["X_umap"][:, 1]

    color_map = {'NORMAL': 'tab:blue', 'DRUSEN': 'tab:green', "CNV": 'tab:orange', 'DME': 'tab:red'}

    colors = [color_map[class_] for class_ in adata_test.obs.obs_cls.values.tolist()]
    
    fig, ax = plt.subplots(figsize=(20,10))
    imscatter(x, y, data.obs.img_path.values.tolist(), zoom=0.05, ax=ax)
    ax.scatter(x, y, c=colors)
    plt.legend(np.unique(color_map.keys()))
    
    patches = []
    for key in color_map.keys():
        color = color_map[key]
        patches.append(mpatches.Patch(color=color, label=key))
    
    plt.legend(handles=patches)
    #plt.savefig("test.png")
    plt.show()

def imscatter(x, y, image_path, ax=None, zoom=1):
    if ax is None:
        ax = plt.gca()
    x, y = np.atleast_1d(x, y)
    artists = []
    iter_ = 0
    for x0, y0 in zip(x, y):
        if iter_ % 10 == 0:
            image = plt.imread(image_path[iter_])
            image = np.stack((image,) * 3, axis = -1)
            im = OffsetImage(image, zoom=zoom)
            ab = AnnotationBbox(im, (x0, y0), xycoords='data', frameon=False)
            artists.append(ax.add_artist(ab))
            
        iter_ += 1
    ax.update_datalim(np.column_stack([x, y]))
    ax.autoscale()
    return artists

main(adata_test)

In [None]:
df = pd.read_csv(
"https://raw.githubusercontent.com/plotly/datasets/master/tips.csv"
)
# Matplotlib Scatter Plot
plt.scatter('total_bill', 'tip',data=df)
plt.xlabel('Total Bill')
plt.ylabel('Tip')
plt.show()

In [None]:
color=['obs_cls', "1", "3", "4", "7", "5", "8", "9", "13"]

In [None]:
#### Clustering

In [None]:
sc.tl.leiden(adata_test)
sc.tl.louvain(adata_test)
sc.tl.paga(adata_test)

In [None]:
sc.pl.umap(adata, projection="2d", color=['obs_cls'])

In [None]:
#### Feature expression significance

In [None]:
sc.tl.rank_genes_groups(adata, 'obs_cls', method='t-test')
sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False)

In [None]:
sc.pl.violin(adata, ["3", "5", "13"], groupby='obs_cls')


In [None]:
sc.pl.violin(adata, [ "7", "8", "9"], groupby='obs_cls')

In [None]:
#### Self labeling

In [None]:
confusion_matrix = np.zeros((num_classes, num_classes))

# For each class.
for class_idx in range(num_classes):
    # Consider 10 examples.
    example_idxs = class_idx_to_test_idxs[class_idx][:10]
    for y_test_idx in example_idxs:
        # And count the classes of its near neighbours.
        for nn_idx in near_neighbours[y_test_idx][:-1]:
            nn_class_idx = y_test[nn_idx]
            confusion_matrix[class_idx, nn_class_idx] += 1

# Display a confusion matrix.
labels = [
    "Airplane",
    "Automobile",
    "Bird",
    "Cat",
    "Deer",
    "Dog",
    "Frog",
    "Horse",
    "Ship",
    "Truck",
]
disp = ConfusionMatrixDisplay(confusion_matrix=confusion_matrix, display_labels=labels)
disp.plot(include_values=True, cmap="viridis", ax=None, xticks_rotation="vertical")
plt.show()

In [None]:
sc.pl.dotplot(adata, adata.var_names, groupby='obs_cls');

In [None]:
sc.pl.dotplot(adata, adata.var_names, groupby='leiden');

In [None]:
# normalize
X_norm = sc.pp.normalize_total(adata, target_sum=1, inplace=False)['X']

In [None]:
X_norm.shape