In [2]:
import pandas as pd
from scipy import sparse as sps
import os
import numpy as np
from scipy.cluster.hierarchy import linkage, fcluster
from ipywidgets import interact, interactive, fixed
import ipywidgets as widgets
import matplotlib.pyplot as plt
from scipy.cluster import hierarchy
%matplotlib inline
from scipy.stats import wasserstein_distance, energy_distance
from scipy.spatial.distance import squareform
import math
from sklearn.neighbors import radius_neighbors_graph, kneighbors_graph
import time
import sys

In [3]:
def readin_clones(o):
    clone_information = pd.read_csv(o, sep='\t', index_col=0)
    clone_information = clone_information[clone_information.columns[clone_information.sum(axis=0)>0]]
    cellid_to_idx = dict(zip(clone_information.index, range(len(clone_information.index))))
    cloneid_to_idx = dict(zip(clone_information.columns, range(len(clone_information.columns))))
    #sparse representation of the clone gr
    #the rows of this correspond to each cell
    #the columns correspond to clone identity
    # if i,j = 1 then cell i belongs to clone group j
    clonegrouping_spmtx = sps.csc_matrix(clone_information.to_numpy())
    return cellid_to_idx, cloneid_to_idx, clonegrouping_spmtx

def readin_coords(o):
    coordinate_information = pd.read_csv(o, sep='\t', index_col = 0)
    coords_mtx = coordinate_information.to_numpy() 
    return coords_mtx

def readin_metad(o):
    metadata_information = pd.read_csv(o, sep='\t', index_col = 0)
    time_vec = metadata_information['time'].to_numpy()
    return time_vec

def readin(data_folder):
    clone_file = os.path.join(data_folder, "clones.tsv.gz")
    coord_file = os.path.join(data_folder, "coordinates.tsv.gz")
    metad_file = os.path.join(data_folder, "metadata.tsv.gz")
    
    cellid_to_idx, cloneid_to_idx, clonegrouping_spmtx = readin_clones(clone_file)
    coords_mtx = readin_coords(coord_file)
    time_vec = readin_metad(metad_file)
    print("Number of cells: ", len(cellid_to_idx))
    print("Number of clones: ", len(cloneid_to_idx))
    print("Number of dimensions: ", coords_mtx.shape[1])
    print("Time Steps: ", np.unique(time_vec))
    return cellid_to_idx, cloneid_to_idx, clonegrouping_spmtx, coords_mtx, time_vec

In [4]:
def getdistance_wasserstein(clonegrouping_spmtx, coords_mtx, time_vec, choice="wasserstein"):
    time_steps = np.unique(time_vec)
    num_clones = clonegrouping_spmtx.shape[1]
    num_dim = coords_mtx.shape[1]
    distance_matrix = np.zeros((num_clones, num_clones))
    num_noninform = 0
    for i in range(num_clones):
        cells_in_i = clonegrouping_spmtx[:,i].nonzero()[0]
        coords_for_i = coords_mtx[cells_in_i]
        time_for_i = time_vec[cells_in_i]
        for j in range(i):
            
            dist = 0
            
            cells_in_j = clonegrouping_spmtx[:,j].nonzero()[0]
            coords_for_j = coords_mtx[cells_in_j]
            time_for_j = time_vec[cells_in_j]
            for t in time_steps:
                #print(t)
                #print(time_for_j)
                #print(time_for_i)
                ts_i = np.where(time_for_i == t)[0]
                ts_j = np.where(time_for_j == t)[0]
                #print(ts_i.size)
                #print(ts_j.size)
                # continue if we can't do anything
                if ts_i.size == 0 or ts_j.size == 0:
                    continue
                
                i_weight = ts_i.shape[0] / cells_in_i.shape[0]
                j_weight = ts_j.shape[0] / cells_in_j.shape[0]
                dists = []
                for d in range(num_dim):
                    if choice == "wasserstein":
                        dists.append(wasserstein_distance(coords_for_i[ts_i][:,d], coords_for_j[ts_j][:,d]))
                    elif choice == "energy":
                        dists.append(energy_distance(coords_for_i[ts_i][:,d], coords_for_j[ts_j][:,d]))
                    else: 
                        print("not supported")
                        sys.exit(2)
                ts_avg = np.mean(dists) * np.mean([i_weight, j_weight])
                dist += ts_avg
            if dist == 0:
                #print(time_for_j)
                #print(time_for_i)
                #L00k -> THESE VALUES SHOULD BE INFINITY
                num_noninform += 1
            distance_matrix[i][j] = dist
    #print("Out of " + str(math.comb(num_clones, 2)) + " clonal distances, " + str(num_noninform) + " are noninformative")
    return squareform(distance_matrix + distance_matrix.transpose())

from itertools import combinations

colored = np.array([0, 1, 2])
empty = np.array([3, 4, 5])
cross = np.array([6, 7])
X = [[0],[0],[1],[0],[1],[1],[0],[1]]
#print(X)


#print("Options for coordinates of colored only")
in_colored = np.array(np.meshgrid(colored, colored)).T.reshape(-1,2)
#print(in_colored)

#print("Options for coordinates of crossed only")
in_cross = np.array(np.meshgrid(cross, cross)).T.reshape(-1,2)
#print(in_cross)

#print("Options for coordinates of 1 color 1 cross")
in_both = np.array(np.meshgrid(colored, cross)).T.reshape(-1,2)
#print(in_both)

# A is a csr matrix
A = radius_neighbors_graph(X, 0.001, mode='connectivity', include_self=False)
print(A.toarray())
nz = A.nonzero()
coords = np.stack((nz[0],nz[1]),axis=-1)
#print("nonzero coordinates in A")
#print(coords)

print("Coords of colored neighbors")
color_neigh= coords[(np.isin(coords, colored)[:,0] == True)]
print((color_neigh))


#print(color_neigh)
empty_neigh = coords[(np.isin(coords, empty)[:,0] == True)]

print("Coords of crossed neighbors")
cross_neigh=coords[(np.isin(coords, cross)[:,0] == True)]
print(cross_neigh)

print("Number of all neighbors to colored")
print(A[color_neigh[:,0],color_neigh[:,1]].sum())


print("Number of neighbors that also belong to colored")
colcol = color_neigh[(np.isin(color_neigh, colored)[:,1] == True)]
print(colcol)


print("Number of neighbors that are colored to cross")
colcro=color_neigh[(np.isin(color_neigh, cross)[:,1] == True)]
print(colcro)
x= colcro[:,0]
y = colcro[:,1]

#print(colcro[1,:])

total_color_to_color = A[x,y].sum()
print(total_color_to_color)

In [None]:
def getdistance_mnn(clonegrouping_spmtx, coords_mtx, time_vec, dist="kneighbors", radius=1.0, neighbors=5, 
                    mode="distance", slope = 1, xshift = 5):
    time_steps = np.unique(time_vec)
    num_clones = clonegrouping_spmtx.shape[1]
    num_dim = coords_mtx.shape[1]
    distance_matrix = np.zeros((num_clones, num_clones))
    num_noninform = 0

    if dist=="kneighbors":
        rng = kneighbors_graph(coords_mtx, neighbors, mode=mode, include_self=True)
    else:
        rng = radius_neighbors_graph(coords_mtx, radius=radius, mode=mode, include_self=True)
    
    nz = rng.nonzero()
    coords = np.stack((nz[0],nz[1]),axis=-1)
    # for O(1) lookup times
    rng = rng.todok()
    
    for i in range(num_clones):
        
        # get the coords for all cells in i
        cells_in_i = clonegrouping_spmtx[:,i].nonzero()[0]
        coords_for_i = coords_mtx[cells_in_i]
        time_for_i = time_vec[cells_in_i]
        #print(cells_in_i)
        #print(time_for_i)
        #print(cells_in_i[np.where(time_for_i==time_steps[0])])
        #exit
        
        ts_i_dict = {}
        for t in time_steps:
            ts_i = cells_in_i[np.where(time_for_i == t)[0]]
            ts_i_neighbors = coords[(np.isin(coords, ts_i)[:,0] == True)]
            total_all_ts_i_neighbors = rng[ts_i_neighbors[:,0],ts_i_neighbors[:,1]].sum()
            ts_i_neighbors_in_i = ts_i_neighbors[(np.isin(ts_i_neighbors, cells_in_i)[:,1] == True)]
            total_ts_i_neighbors_in_i = rng[ts_i_neighbors_in_i[:,0],ts_i_neighbors_in_i[:,1]].sum()
            ts_i_dict[t] = [ts_i_neighbors, total_all_ts_i_neighbors, total_ts_i_neighbors_in_i]

        for j in range(i):
            
            dist = 0
            
            # get cells in clone j
            cells_in_j = clonegrouping_spmtx[:,j].nonzero()[0]
            coords_for_j = coords_mtx[cells_in_j]
            time_for_j = time_vec[cells_in_j]
            
            # get neighborhood of j
            #j_neighbors = rng[cells_in_j]
            #j_neighbors = coords[(np.isin(coords, cells_in_j)[:,0] == True)]

            # stack neighborhoods and extract nonzero values
            #stacked = sps.vstack((i_neighbors,j_neighbors), format='dok')
            #nz = stacked.nonzero()
            #allneighbors_in_both = np.stack((nz[0],nz[1]), axis=-1)

            ts_dists = []
            #print("Cells in i %s" % cells_in_i)
            #print("Cells in j %s" % cells_in_j)
            for t in time_steps:
                
                ts_i_neighbors, total_all_ts_i_neighbors, total_ts_i_neighbors_in_i = ts_i_dict[t]
                
                #print("TIME %s" % t)
                #ts_i = cells_in_i[np.where(time_for_i == t)[0]]
                #print("cells in i @ time t %s" % ts_i)
                ts_j = cells_in_j[np.where(time_for_j == t)[0]]
                #print("cells in j @ time t %s" % ts_j)

                #if ts_i.size == 0 or ts_j.size == 0:
                #    continue

                #print(ts_i)
                #ts_i_neighbors = coords[(np.isin(coords, ts_i)[:,0] == True)]
                #print(ts_i_neighbors)
                #print("neighbors of ts_i")
                #print(ts_i_neighbors)


                ts_j_neighbors = coords[(np.isin(coords, ts_j)[:,0] == True)]
                #print("neighbors of ts_j")
                #print(ts_j_neighbors)
                
                #total_all_ts_i_neighbors = rng[ts_i_neighbors[:,0],ts_i_neighbors[:,1]].sum()
                total_all_ts_j_neighbors = rng[ts_j_neighbors[:,0],ts_j_neighbors[:,1]].sum()
                #print("total ts_i neighbors")
                #print(total_all_ts_i_neighbors)
                #print("total ts_j neighbors")
                #print(total_all_ts_j_neighbors)

                if total_all_ts_i_neighbors == 0 or total_all_ts_j_neighbors == 0:
                    continue            


                #ts_i_neighbors_in_i = ts_i_neighbors[(np.isin(ts_i_neighbors, cells_in_i)[:,1] == True)]
                #total_ts_i_neighbors_in_i = rng[ts_i_neighbors_in_i[:,0],ts_i_neighbors_in_i[:,1]].sum()
                
                ts_j_neighbors_in_j = ts_j_neighbors[(np.isin(ts_j_neighbors, cells_in_j)[:,1] == True)]
                total_ts_j_neighbors_in_j = rng[ts_j_neighbors_in_j[:,0],ts_j_neighbors_in_j[:,1]].sum()
                
                ts_i_neighbors_in_j = ts_i_neighbors[(np.isin(ts_i_neighbors, cells_in_j)[:,1] == True)]
                total_ts_i_neighbors_in_j = rng[ts_i_neighbors_in_j[:,0],ts_i_neighbors_in_j[:,1]].sum()
                
                ts_j_neighbors_in_i = ts_j_neighbors[(np.isin(ts_j_neighbors, cells_in_i)[:,1] == True)]
                total_ts_j_neighbors_in_i = rng[ts_j_neighbors_in_i[:,0],ts_j_neighbors_in_i[:,1]].sum()
                
                ts_i_frac_to_i = total_ts_i_neighbors_in_i / total_all_ts_i_neighbors
                ts_j_frac_to_j = total_ts_j_neighbors_in_j / total_all_ts_j_neighbors

                ts_i_frac_to_j = total_ts_i_neighbors_in_j / total_all_ts_i_neighbors
                ts_j_frac_to_i = total_ts_j_neighbors_in_i / total_all_ts_j_neighbors

                ts_i_frac = abs(ts_i_frac_to_i - ts_i_frac_to_j)
                ts_j_frac = abs(ts_j_frac_to_j - ts_j_frac_to_i)
                """
                ts_i_frac = total_all_ts_i_neighbors / (total_ts_i_neighbors_in_j + 1)
                ts_j_frac = total_all_ts_j_neighbors / (total_ts_j_neighbors_in_i + 1)
                """
                
                ts_dists.append(ts_i_frac)
                #print(ts_i_frac)
                ts_dists.append(ts_j_frac)
                #print(ts_j_frac)
                #sys.exit()
            distance_matrix[i][j] += np.mean(ts_dists)

            #distance_matrix[i][j] += (numer/denom) - 1
            #if np.isclose((numer/denom),1): continue
            #else: distance_matrix[i][j] += (numer/denom)
    
    return squareform(distance_matrix + distance_matrix.transpose())   

In [None]:
def getdistance_mnn_debug(clonegrouping_spmtx, coords_mtx, time_vec, dist="kneighbors", radius=1.0, neighbors=5, 
                    mode="distance", slope = 1, xshift = 5):
    time_steps = np.unique(time_vec)
    num_clones = clonegrouping_spmtx.shape[1]
    num_dim = coords_mtx.shape[1]
    distance_matrix = np.zeros((num_clones, num_clones))
    num_noninform = 0
    print("Number of clones: %s" % num_clones)

    if dist=="kneighbors":
        rng = kneighbors_graph(coords_mtx, neighbors, mode=mode, include_self=False)
    else:
        rng = radius_neighbors_graph(coords_mtx, radius=radius, mode=mode, include_self=False)
    print("Calculated neighbors graph")
    nz = rng.nonzero()
    coords = np.stack((nz[0],nz[1]),axis=-1)
    print("Calculated nonzero coordinates")

    dist_calc_num = 0
    fracs = []
    trans = []
    numers = []
    denoms = []
    for i in range(num_clones):
        
        # get the coords for all cells in i
        cells_in_i = clonegrouping_spmtx[:,i].nonzero()[0]
        coords_for_i = coords_mtx[cells_in_i]
        time_for_i = time_vec[cells_in_i]
        
        # get the neighborhood of clone i
        #i_neighbors = rng[cells_in_i]

        
        ts_i_dict = {}
        for t in time_steps:
            ts_i = np.where(time_for_i == t)[0]
            if ts_i.size == 0:
                continue
            i_weight = ts_i.shape[0] / cells_in_i.shape[0]
            poss_only_in_i = np.array(np.meshgrid(ts_i, ts_i)).T.reshape(-1,2)
            ts_i_dict[t] = poss_only_in_i

        for j in range(i):
            
            dist = 0
            
            # get cells in clone j
            cells_in_j = clonegrouping_spmtx[:,j].nonzero()[0]
            coords_for_j = coords_mtx[cells_in_j]
            time_for_j = time_vec[cells_in_j]
            
            i_neighbors = coords[(np.isin(coords, cells_in_i)[:,0] == True)]

            # get neighborhood of j
            #j_neighbors = rng[cells_in_j]
            j_neighbors = coords[(np.isin(coords, cells_in_j)[:,0] == True)]

            
            # stack neighborhoods and extract nonzero values
            stacked = sps.vstack((i_neighbors,j_neighbors), format='dok')
            nz = stacked.nonzero()
            allneighbors_in_both = np.stack((nz[0],nz[1]), axis=-1)

            ts_dists = []
            for t in time_steps:
                ts_i = np.where(time_for_i == t)[0]
                ts_j = np.where(time_for_j == t)[0]

                # continue if we can't do anything
                if ts_i.size == 0 or ts_j.size == 0:
                    continue
                
                denom = 1
                numer = 1
                
                poss_only_in_i = ts_i_dict[t]
                j_weight = ts_j.shape[0] / cells_in_j.shape[0]
                
                poss_only_in_j = np.array(np.meshgrid(ts_j, ts_j)).T.reshape(-1,2)
                poss_in_both = np.array(np.meshgrid(ts_i, ts_j)).T.reshape(-1,2)

                poss_in_all = np.vstack((poss_only_in_i, poss_only_in_j, poss_in_both))
                obs_in_all = allneighbors_in_both[(allneighbors_in_both[:, None] == poss_in_all).all(-1).any(-1)]

                #print(in_all)
                
                obs_in_i = obs_in_all[(obs_in_all[:, None] == poss_only_in_i).all(-1).any(-1)]
                obs_in_j = obs_in_all[(obs_in_all[:, None] == poss_only_in_j).all(-1).any(-1)]
                obs_in_both = obs_in_all[(obs_in_all[:, None] == poss_in_both).all(-1).any(-1)]

                
                x,y = obs_in_i.T
                numer += stacked[x,y].sum()
                numers.append(numer)
                
                x,y = obs_in_j.T
                numer += stacked[x,y].sum()
                numers.append(numer)
                
                x,y = obs_in_both.T
                denom += stacked[x,y].sum()
                denoms.append(denom)
                
                fracs.append((numer/denom)-1)
                
                #ts_dists.append(reflected_sigmoid_mapping((numer/denom)-1, slope, xshift))
                ts_dists.append(abs((numer/denom)-1))
                trans.append(reflected_sigmoid_mapping((numer/denom)-1, slope, xshift))
            
            distance_matrix[i][j] += np.mean(ts_dists)
            dist_calc_num += 1
            if dist_calc_num % 10000 == 0:
                print("Calculated %s distances..." % dist_calc_num)

            #distance_matrix[i][j] += (numer/denom) - 1
            #if np.isclose((numer/denom),1): continue
            #else: distance_matrix[i][j] += (numer/denom)
    
    return squareform(distance_matrix + distance_matrix.transpose()), fracs, trans, numers, denoms

In [None]:
def getdistance_mnn_debug_new(clonegrouping_spmtx, coords_mtx, time_vec, dist="kneighbors", radius=1.0, neighbors=5, 
                    mode="distance"):
    time_steps = np.unique(time_vec)
    num_clones = clonegrouping_spmtx.shape[1]
    num_dim = coords_mtx.shape[1]
    distance_matrix = np.zeros((num_clones, num_clones))
    num_noninform = 0
    print("Number of clones: %s" % num_clones)

    if dist=="kneighbors":
        rng = kneighbors_graph(coords_mtx, neighbors, mode=mode, include_self=False)
    else:
        rng = radius_neighbors_graph(coords_mtx, radius=radius, mode=mode, include_self=False)
    print("Calculated neighbors graph")
    dist_calc_num = 0
    fracs = []
    trans = []
    numers = []
    denoms = []
    for i in range(num_clones):
        
        # get the coords for all cells in i
        cells_in_i = clonegrouping_spmtx[:,i].nonzero()[0]
        coords_for_i = coords_mtx[cells_in_i]
        time_for_i = time_vec[cells_in_i]
        
        # get the neighborhood of clone i
        i_neighbors = rng[cells_in_i]
        
        ts_i_dict = {}
        for t in time_steps:
            ts_i = np.where(time_for_i == t)[0]
            if ts_i.size == 0:
                continue
            i_weight = ts_i.shape[0] / cells_in_i.shape[0]
            poss_only_in_i = np.array(np.meshgrid(ts_i, ts_i)).T.reshape(-1,2)
            ts_i_dict[t] = poss_only_in_i

        for j in range(i):
            
            dist = 0
            
            # get cells in clone j
            cells_in_j = clonegrouping_spmtx[:,j].nonzero()[0]
            coords_for_j = coords_mtx[cells_in_j]
            time_for_j = time_vec[cells_in_j]
            
            # get neighborhood of j
            j_neighbors = rng[cells_in_j]

            # stack neighborhoods and extract nonzero values
            stacked = sps.vstack((i_neighbors,j_neighbors), format='dok')
            nz = stacked.nonzero()
            allneighbors_in_both = np.stack((nz[0],nz[1]), axis=-1)

            ts_dists = []
            for t in time_steps:
                ts_i = np.where(time_for_i == t)[0]
                ts_j = np.where(time_for_j == t)[0]

                # continue if we can't do anything
                if ts_i.size == 0 or ts_j.size == 0:
                    continue
                
                denom = 1
                numer = 1
                
                poss_only_in_i = ts_i_dict[t]
                j_weight = ts_j.shape[0] / cells_in_j.shape[0]
                
                poss_only_in_j = np.array(np.meshgrid(ts_j, ts_j)).T.reshape(-1,2)
                poss_in_both = np.array(np.meshgrid(ts_i, ts_j)).T.reshape(-1,2)

                poss_in_all = np.vstack((poss_only_in_i, poss_only_in_j, poss_in_both))
                obs_in_all = allneighbors_in_both[(allneighbors_in_both[:, None] == poss_in_all).all(-1).any(-1)]

                #print(in_all)
                
                obs_in_i = obs_in_all[(obs_in_all[:, None] == poss_only_in_i).all(-1).any(-1)]
                obs_in_j = obs_in_all[(obs_in_all[:, None] == poss_only_in_j).all(-1).any(-1)]
                obs_in_both = obs_in_all[(obs_in_all[:, None] == poss_in_both).all(-1).any(-1)]

                
                x,y = obs_in_i.T
                numer += stacked[x,y].sum()
                numers.append(numer)
                
                x,y = obs_in_j.T
                numer += stacked[x,y].sum()
                numers.append(numer)
                
                x,y = obs_in_both.T
                denom += stacked[x,y].sum()
                denoms.append(denom)
                
                fracs.append((numer/denom-1))
                
                ts_dists.append(reflected_sigmoid_mapping((numer/denom)-1, slope, xshift))
                
                trans.append(reflected_sigmoid_mapping((numer/denom)-1, slope, xshift))
            
            distance_matrix[i][j] += np.mean(ts_dists)
            dist_calc_num += 1
            if dist_calc_num % 10000 == 0:
                print("Calculated %s distances..." % dist_calc_num)

            #distance_matrix[i][j] += (numer/denom) - 1
            #if np.isclose((numer/denom),1): continue
            #else: distance_matrix[i][j] += (numer/denom)
    
    return squareform(distance_matrix + distance_matrix.transpose()), fracs, trans, numers, denoms

In [None]:
#cellid_to_idx, cloneid_to_idx, clonegrouping_spmtx, coords_mtx, time_vec = readin("/home/luak/rotations/pinello/post/MEGATRON/preprocess/morrislab/output")
#squareform_distance_matrix = getdistance_mnn(clonegrouping_spmtx, coords_mtx, time_vec, 1)

In [None]:
def plot_dendrogram(Z, cloneids, num_clusters):
    plt.title('Hierarchical Clustering Dendrogram (truncated)')
    plt.xlabel('Clone Id or (cluster size)')
    plt.ylabel('distance')
    hierarchy.dendrogram(
        Z,
        truncate_mode='lastp',  # show only the last p merged clusters
        p=num_clusters,  # show only the last p merged clusters
        leaf_rotation=90.,
        leaf_font_size=10.,
        show_contracted=True,  # to get a distribution impression in truncated branches
        labels=cloneids
    )
    plt.show()

#plot_dendrogram(Z, [*cloneid_to_idx], 5)

def visualize(Z, cloneids, num_clusters=5):
    plot_dendrogram(Z, cloneids, num_clusters)
    plt.clf()
    
    clusters = fcluster(Z, num_clusters, criterion="maxclust")
    fig, ax = plt.subplots()
    
    interact_manual(plot_meta_clones_with_alltags, 
                    Z=fixed(Z), 
                    cloneids=fixed(cloneids),
                    big_clones=fixed(chunky), 
                    num_clusters=widgets.IntSlider(min=1,max=len(Z)-1,
                                              step=1))

In [None]:
#import pickle
#cellid_to_idx, cloneid_to_idx, clonegrouping_spmtx, coords_mtx, time_vec = readin("/home/luak/rotations/pinello/post/MEGATRON/preprocess/kleinlab/output_subset")
#squareform_distance_matrix = getdistance_wasserstein(clonegrouping_spmtx, coords_mtx, time_vec)
#with open('arya_wasserstein.pkl', 'wb') as f: 
#    pickle.dump(squareform_distance_matrix, f)
#Z = linkage(squareform_distance_matrix, method="ward")
#clone_clusters = fcluster(Z, 5, criterion='maxclust')
#print(clone_clusters)
## visualize(Z, cloneids)
#squareform_distance_matrix = getdistance_mnn(clonegrouping_spmtx, coords_mtx, time_vec, 1)
#Z = linkage(squareform_distance_matrix, method="ward")
#clone_clusters = fcluster(Z, 5, criterion='maxclust')
#print(clone_clusters)

In [None]:
def reflected_sigmoid_mapping(x0, slope, xshift=5):
    # higher values of slope (>3) lead to very steep score cutoffs
    # xshift refers to how far the xintercept is shifted from 0
    # note: these values interact idk what to do
    if x0 < 0:
        return ( 1 / ( 1 + np.exp(xshift + slope * x0 ) ) )
    
    elif x0 > 0:
        return ( 1 / ( 1 + np.exp(xshift - slope * x0 ) ) )
    else:
        return 0

In [None]:
%matplotlib inline
from ipywidgets import interactive
import matplotlib.pyplot as plt


def test_sigmoid(slope,x0):
    y = []
    x = np.linspace(-5,5,100)
    for i in x:
        val = reflected_sigmoid_mapping(i, x0, slope)
        y.append(val)
    plt.scatter(x,y)
    plt.ylim((0,1))
    plt.show()
    print(reflected_sigmoid_mapping(0, x0, slope))
interactive_plot = interactive(test_sigmoid, slope=(-10.0, 100.0), x0=(-10, 10, 0.1))
output = interactive_plot.children[-1]
output.layout.height = '350px'
interactive_plot
#rint(reflected_sigmoid_mapping(0, 2.2, xshift=1.4))