In [16]:
from dash import Dash, dash_table, dcc, html, Input, Output, callback
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import math
import matplotlib as plt

def Y_a_b(genes, a, b):
  return np.mean(genes[a:b])

def C_a_b(genes, a, b):
  mean = Y_a_b(genes, a, b+1)
  return sum( (np.array(genes[a:b+1]) - mean) ** 2 )

def determine_h(P, i, j, genes):
  N = len(genes)

  if (i == 0 and j > 0):
    return Y_a_b(genes, P[i][j], P[i+1][j]) - Y_a_b(genes, 0, P[i][j]);
  elif (i == j and j > 0):
    return Y_a_b(genes, P[i][j], N) - Y_a_b(genes, P[i-1][j], P[i][j]);
  elif (i == 0 and j == 0):
    return Y_a_b(genes, P[i][j], N) - Y_a_b(genes, 0, P[i][j]);
  else:
    return Y_a_b(genes, P[i][j], P[i+1][j]) - Y_a_b(genes, P[i-1][j], P[i][j]);

def BASC_A(gene):
    gene_og = gene
    gene = np.sort(gene)
    N = len(gene)

    cost_matrix = [[0 for _ in range(N - 1)] for _ in range(N)]
    ind_matrix = [[0 for _ in range(N - 2)] for _ in range(N - 1)]
    P = [[0 for _ in range(N - 2)] for _ in range(N - 2)]

    # Step 1: Compute a Series of Step Function

    # initialization C_i_(0) = c_i_N
    # calculate first cost matrix column with no intermidiate break points
    for i in range(N):
      cost_matrix[i][0] = C_a_b(gene, i, N)

    # Algorithm 1: Calculate optimal step functions
    for j in range(N-2):
      for i in range(N-j-1):
        min_value = math.inf
        min_index = math.inf

        for d in range(N-j-1):
          curr_value = C_a_b(gene, i, d) + cost_matrix[d+1][j]

          if(curr_value < min_value):
            min_value = curr_value
            min_index = d

        cost_matrix[i][j+1] = min_value
        ind_matrix[i][j] = min_index + 1

    #  Algorithm 2: Compute the break points of all optimal step functions
    for j in range(N-2):
      z = j
      P[0][j] = ind_matrix[0][z]
      if(j > 0):
        z = z - 1
        for i in range(1, j+1):
          P[i][j] = ind_matrix[P[i-1][j]][z]
          z = z - 1

    # Step 2: Find Strongest Discontinuity in Each Step Function
    v = [0] * (N-2)

    for j in range(N-2):
      max_value = -math.inf
      max_index = j
      for i in range(j+1):
        h = determine_h(P, i, j, gene)
        z = (gene[P[i][j]] + gene[P[i][j]-1]) / 2
        e = sum( (np.array(gene) - z) ** 2 )
        q_score = h / e
        if(q_score > max_value):
          max_value = q_score
          max_index = i

      v[j] = P[max_index][j]

    # Step 3: Estimate Location and Variation of the Strongest Discontinuities
    thr = (gene[round(np.median(v))-1] + gene[round(np.median(v))]) / 2
    gene_bin = []
    for i in range(N):
      if(gene_og[i] <= thr):
        gene_bin.append(0)
      else:
        gene_bin.append(1)

    return thr


In [17]:
from sklearn.cluster import KMeans

def K_Means(genes):
    data = np.array(genes).reshape(-1, 1)
    kmeans = KMeans(n_clusters=2)
    kmeans.fit(data)
    c=kmeans.labels_
    genes = np.array(genes)
    groupOne = genes[c==1]
    groupZero = genes[c==0]
    
    thr1 = np.mean(groupOne)
    thr2 = np.mean(groupZero)
    
    thr = (thr1 + thr2) / 2
    binary = np.zeros(len(genes), dtype=int)
    for i in range(len(genes)):
        if(genes[i] <= thr):
            binary[i] = 0
        else:
            binary[i] = 1
    return thr

In [19]:
df = pd.read_csv('HIVIn(Matlab).csv')

app = Dash(__name__)

app.layout = html.Div([
    html.Div([
    dash_table.DataTable(
        id='datatable-interactivity',
        columns=[
            {"name": i, "id": i} for i in df.columns
        ],
        data=df.to_dict('records'),
        column_selectable="single",
        row_selectable="multi",
        selected_columns=[],
        selected_rows=[],
        page_action="native",
        page_current= 0,
        page_size= 10,
    ),
    dcc.Graph(id='time-series-plot'),
    ]),


     html.Div([
            dcc.Dropdown(
                #options=[
                #    {'label': 'BASC A', 'value': 'BASC A'},
                #    {'label': 'K-Means', 'value': 'K-Means'}
                #],
                ['All','BASC A', 'K-Means'],
                placeholder="Select binarization method",
                id="dropdown-method",
                searchable=False
            )
        ]),
        html.Div(id='dd-output-container')
    ])

@app.callback(
    Output('time-series-plot', 'figure'),
    Input('datatable-interactivity', 'selected_rows')
)

def graph_gene(selected_rows):
    if not selected_rows:
        return go.Figure()
    
    selected_genes = df.iloc[selected_rows]
    
    genes = selected_genes.values
    
    sizeGene = len(genes[0])
    sizeRows = len(genes)
    data = []
    
    for i in range(sizeRows):
        data.append(go.Scatter(x=np.arange(1,sizeGene+1), y=genes[i], name="Gene "+ str(selected_rows[i]+1)))
    
    #data = [go.Scatter(x=np.arange(1,sizeGene+1), y=genes[0])]
    layout = go.Layout(title="Gene Expression Time Series Plot", xaxis={'title': 'Time Series'}, yaxis={'title': 'Gene Expression'})
    return {'data': data, 'layout': layout}

@app.callback(
    Output('dd-output-container', 'children'),
    Input('dropdown-method', 'value'),
    Input('datatable-interactivity', 'selected_rows')
)
def method_bin(algo_method, selected_rows):
    if not selected_rows:
            return "Select rows to binarize"
    
    if(algo_method == "All"):
            selected_genes = df.iloc[selected_rows]
            genes = selected_genes.values
            binary_genes_basc = []
            binary_genes_kmeans = []
            sizeRows = len(genes)
            
            for i in range(sizeRows):
                binary_genes_basc.append(BASC_A(genes[i]))
                binary_genes_kmeans.append(K_Means(genes[i]))
            
            selected_genes = df.iloc[selected_rows]
            genes = selected_genes.values
            sizeGene = len(genes[0])
            
            data = go.Figure(go.Scatter(x=np.arange(1,sizeGene+1), y=genes[0], name="Gene "+ str(selected_rows[0]+1)))
            data.add_hline(y=binary_genes_basc[0], line_width=3, line_dash="dash", line_color="green")
            data.add_annotation(
                xref="paper",  # Set the x-coordinate reference to "paper" for a relative position
                y=binary_genes_basc[0],  # Position the title at the same y-coordinate as the line
                text="BASC A",
                showarrow=False,
            )
            data.add_hline(y=binary_genes_kmeans[0], line_width=3, line_dash="dot", line_color="red")
            data.add_annotation(
                xref="paper",  # Set the x-coordinate reference to "paper" for a relative position
                y=binary_genes_kmeans[0],  # Position the title at the same y-coordinate as the line
                text="KMeans",
                showarrow=False,
            )

            #layout = go.Layout(title="Gene Expression Time Series Plot", xaxis={'title': 'Time Series'}, yaxis={'title': 'Gene Expression'})
            #graph = dcc.Graph(figure={'data': [data], 'layout': layout})
            
            #return graph
            return dcc.Graph(figure=data)
    elif(algo_method == "BASC A"):
            selected_genes = df.iloc[selected_rows]
            genes = selected_genes.values
            binary_genes = []
            sizeRows = len(genes)
            
            for i in range(sizeRows):
                binary_genes.append(BASC_A(genes[i]))
            
            selected_genes = df.iloc[selected_rows]
            genes = selected_genes.values
            sizeGene = len(genes[0])
            
            data = go.Figure(go.Scatter(x=np.arange(1,sizeGene+1), y=genes[0], name="Gene "+ str(selected_rows[0]+1)))
            data.add_hline(y=binary_genes[0], line_width=3, line_dash="dash", line_color="green")
            #layout = go.Layout(title="Gene Expression Time Series Plot", xaxis={'title': 'Time Series'}, yaxis={'title': 'Gene Expression'})
            #graph = dcc.Graph(figure={'data': [data], 'layout': layout})
            
            #return graph
            #return dcc.Graph(figure=data)
            
            #thr = BASC_A(genes[0])
            
            selected_genes = df.iloc[selected_rows]
            genes = selected_genes.values
            sizeGene = len(genes[0])
            
            genes = np.sort(genes[0])
            
            x = np.arange(1,sizeGene+1)
            y = genes
            x_dis = []
            y_dis = []
            for i in range(len(x)):
                x_dis.append(i)
                x_dis.append(i+1)
                x_dis.append(None)
                y_dis.append(y[i])
                y_dis.append(y[i])
                y_dis.append(None)
            
            data2 = go.Figure(go.Scatter(x=x_dis, y=y_dis))
            return dcc.Graph(figure=data), dcc.Graph(figure=data2)
            #return "hello"
            
            
            

    elif(algo_method == "K-Means"):
            selected_genes = df.iloc[selected_rows]
            genes = selected_genes.values
            binary_genes = []
            sizeRows = len(genes)
            
            for i in range(sizeRows):
                binary_genes.append(K_Means(genes[i]))
            
            selected_genes = df.iloc[selected_rows]
            genes = selected_genes.values
            sizeGene = len(genes[0])
            
            data = go.Figure(go.Scatter(x=np.arange(1,sizeGene+1), y=genes[0], name="Gene "+ str(selected_rows[0]+1)))
            data.add_hline(y=binary_genes[0], line_width=3, line_dash="dash", line_color="green")
            #layout = go.Layout(title="Gene Expression Time Series Plot", xaxis={'title': 'Time Series'}, yaxis={'title': 'Gene Expression'})
            #graph = dcc.Graph(figure={'data': [data], 'layout': layout})
            
            #return graph
            return dcc.Graph(figure=data)
            

if __name__ == '__main__':
    app.run(debug=True)


Mean of empty slice.


invalid value encountered in scalar divide




Mean of empty slice.


invalid value encountered in scalar divide




Mean of empty slice.


invalid value encountered in scalar divide




Mean of empty slice.


invalid value encountered in scalar divide



In [11]:
x = [1, 2, 3, 5, 6, 7, 8, 9, 10]
y = [0.22, 0.29, 0.1, 0.13, 0.8, 0.9, 0.22, 0.85, 0.81, 0.5]

y = np.sort(y)

In [13]:
x_dis = []
y_dis = []
for i in range(len(x)):
    x_dis.append(i)
    x_dis.append(i+1)
    x_dis.append(None)
    y_dis.append(y[i])
    y_dis.append(y[i])
    y_dis.append(None)
