In [None]:
import numpy as np
from sklearn.datasets import load_digits
from sklearn.datasets import make_blobs
from sklearn.manifold import trustworthiness
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import itertools
import time
import csv
import umap
from scipy.spatial.distance import euclidean
import math

%matplotlib inline

In [None]:
def generateAlea(nbr_dimensions,nbr_variables):
    X, _ = make_blobs(n_samples=nbr_variables, n_features=nbr_dimensions)
    df = pd.DataFrame(X)
    return df

def localCriterion(X, X_embedded, nbr_variables):
    T = trustworthiness(X, X_embedded, n_neighbors=int(nbr_variables/3))
    return T

def to_distance_matrix(X):
    distance_matrix=[]
    for i in range(len(X)):
        for j in range(i+1,len(X)):
            d=euclidean(X[i],X[j])
            distance_matrix.append(d)
    return distance_matrix

def globalCriterion(X, X_embedded):
    distX = to_distance_matrix(X)
    distX_emb = to_distance_matrix(X_embedded)
    R = np.corrcoef(distX, distX_emb)
    return 1-R[0][1]**2

In [None]:
with open('/home/selen/Documents/BioInfo_Master/dea/output.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerow(["metrics", "min_param", "nvoisins","dimensions", "variables", "time", "criterion1", "criterion2"])

In [None]:
metrics = ["euclidean", "bray-curtis"]
min_param = [0.1, 0.25, 0.5, 0.8]
nVoisins_param = [3, 5, 10, 25, 70, 150]
nbr_dimensions = [5, 10, 25, 50, 75]
nbr_variables = [10, 25, 75, 200]
nbr_repets = 1

In [None]:
with open('/home/selen/Documents/BioInfo_Master/dea/output.csv', 'a') as f:
    writer = csv.writer(f)
    for combination in itertools.product(metrics, min_param, nVoisins_param, nbr_dimensions, nbr_variables):
        df = generateAlea(combination[3],combination[4])
        start_time = time.time()
        for i in range(nbr_repets):
            reducer = umap.UMAP(
            n_neighbors=combination[2],
            min_dist=combination[1],
            n_components=2,
            metric=combination[0]
            )

            X = StandardScaler().fit_transform(df)
            X_embedded = reducer.fit_transform(X)

            T = localCriterion(X, X_embedded, combination[4])
            R = globalCriterion(X, X_embedded)

        t = (time.time() - start_time) / 2
        writer.writerow([combination[0], combination[1], combination[2],combination[3], combination[4], t, T, R])