In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go

import plotly.io as pio
pio.renderers.default = 'colab'

### Read data

In [2]:
# A  = pd.read_csv("spotfire_export_20210330012859_30min_rowA.csv")
# BH = pd.read_csv("spotfire_export_20210330013652_30min_rows_B-H.csv")

# #combine data
# data = pd.concat((A,BH))

data = pd.read_csv("spotfire_export_20210403130214_48hour_all_rows.csv")

### Plot Helpers

In [3]:
def get_violin_plot(dictionary, key, text):
    # Prepare data for violin plot
    wells = list(dictionary.keys())
    violin_data = {well: dictionary[well]['data'][key] for well in wells}
    
    # Create the violin plot
    
    fig = go.Figure()
    
    for i, well in enumerate(wells):
        fig.add_trace(go.Violin(
            y=violin_data[well],
            name=well,
            box_visible=True,  # Show box plot inside the violin
            meanline_visible=True,  # Show the mean line
            # points=False,  # Show all points
            points='outliers',  # Show outlier points
            
            line_color='rgb(8,81,156)',
            fillcolor='rgb(107,174,214)',
            
            marker=dict(
                size=4,  # Size of outlier points
                color='rgb(107,0,50)',  # Color of outlier points (red in this case)
            ),
            opacity=0.6,
            width = 0.9
        ))
    
    # Customize the layout
    fig.update_layout(
        title=f"Violin Plot of {key} for {text} wells",
        yaxis_title=f"{key} Intensity",
        xaxis_title='Wells',
        showlegend=False,
        height=600,
        width=1200,
        violinmode='group'
    )
    
    # Add gridlines
    fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='LightGrey')
    
    # # Show the plot
    fig.show()

### Extract out negative or positive controls

In [4]:
def get_negative_controls_data(df):
    """
        Need to extract out data from wells, 
            A6, A12, B7, B1, C6, C12, D7, D1, E6, E12, F7, F1, G6, G12, H7, H1,
    """
    wells = ["A6", "A12", "B7", "B1", "C6", "C12", "D7", "D1", "E6", "E12", "F7", "F1", "G6", "G12", "H7", "H1"]

    data = {}

    for well in wells:
        f_df = df[df.Well ==  well]
        data[well] = {
                      'data':f_df, 
                      'AvgIntenCh2': f_df.AvgIntenCh2.describe(),
                      'ObjectAvgIntenCh1': f_df.ObjectAvgIntenCh1.describe()
                     }
    return data

def get_positive_controls_data(df):
    """
        Need to extract out data from wells, 
            "A1", "A7", "B12", "B6", "C1", "C7", "D12", "D6", "E1", "E7", "F12", "F6", "G1", "G7", "H12", "H6",
    """
    wells = ["A1", "A7", "B12", "B6", "C1", "C7", "D12", "D6", "E1", "E7", "F12", "F6", "G1", "G7", "H12", "H6"]

    data = {}

    for well in wells:
        f_df = df[df.Well ==  well]
        data[well] = {
                      'data':f_df, 
                      'AvgIntenCh2': f_df.AvgIntenCh2.describe(),
                      'ObjectAvgIntenCh1': f_df.ObjectAvgIntenCh1.describe()
                     }
    return data

In [5]:
negative_controls_data = get_negative_controls_data(data)
positive_controls_data = get_positive_controls_data(data)

In [6]:
get_violin_plot(negative_controls_data, 'ObjectAvgIntenCh1', "negative control")
get_violin_plot(negative_controls_data, 'AvgIntenCh2', "negative control")

In [7]:
get_violin_plot(positive_controls_data, 'ObjectAvgIntenCh1', "positive control")
get_violin_plot(positive_controls_data, 'AvgIntenCh2', "negative control")

In [9]:
import plotly.graph_objects as go

def extract_mean_values(data, key):
    return [data[well][key]['mean'] for well in data]

def plot_mean_values_in_channel(channel):
    # Extract mean values for AvgIntenCh2
    neg_means = extract_mean_values(negative_controls_data, channel)
    pos_means = extract_mean_values(positive_controls_data, channel)
    
    # Wells for x-axis
    neg_wells = list(negative_controls_data.keys())
    pos_wells = list(positive_controls_data.keys())
    
    # Create the stacked bar chart
    fig = go.Figure(data=[
        go.Bar(
            name='Negative Controls', 
            x=neg_wells, 
            y=neg_means, 
            marker_color='red',
            text=neg_means,
            textposition='outside',
            texttemplate='%{text:.1f}'
        ),
        go.Bar(
            name='Positive Controls', 
            x=pos_wells, 
            y=pos_means, 
            marker_color='green',
            textfont=dict(
                size=20,
                family="Arial, sans-serif",
                color="black"
            ),
            text=pos_means,
            textposition='outside',
            texttemplate='%{text:.1f}',
        )
    ])
    fig.update_traces(textfont_size=123)
    # Customize the layout
    fig.update_layout(
        width = 1400,
        height = 800,
        barmode='group',
        title=f"{channel} for Negative and Positive Controls",
        xaxis_title='Wells',
        yaxis_title=f"Average Intensity ({channel})",
        legend_title='Control Type',
        xaxis_tickangle=-45,
        legend=dict(
            x=0.2,  # x position (0 to 1)
            y=0.95,  # y position (0 to 1)
            xanchor='center',  # x anchor point
            yanchor='top'  # y anchor point
        ),
        legend_font_size=16  # Increase legend font size
    )
    # Increase font size for bar labels
    fig.update_traces(textfont_size=26, textangle=0, textposition="outside", cliponaxis=False)
    
    # Increase font size for axis titles
    fig.update_xaxes(title_font=dict(size=22), tickfont=dict(size=18))
    fig.update_yaxes(title_font=dict(size=22), tickfont=dict(size=14))

    fig.update_layout(title_font=dict(size=24))
    
    # Show the plot
    fig.show()
    fig.write_image(f"images/48_{channel}.pdf")

### Channel 2 represents dead nuclei

In [10]:
plot_mean_values_in_channel('AvgIntenCh2')

### Channel 1 represents all dna nuclei (dead+alive)

In [11]:
plot_mean_values_in_channel('ObjectAvgIntenCh1')

# Finding LD50

In [17]:
## Say we fix the threshold as 20

In [12]:
len(data[data.Well == "A1"]['AvgIntenCh2'])

5821

In [13]:
# now we have 4 replicates of the same experiment. 
# need to count the deadl/all ratio

In [14]:
def count_ratio(df, row_ids = ["A", "C", "E", "G"], col_ids = [2,3,4,5,8,9,10,11], thresh = 20):

    
    fig = go.Figure()  # Create a single figure for all curves

    for idx, row in enumerate(row_ids):
        ratio_per_well = []
        total_counts = []

        for col in col_ids:
            well = row + str(col)

            f_df = df[df.Well == well]
            dead_count = (f_df.AvgIntenCh2.values > thresh).sum()
            all_count = len(f_df)

            ratio_per_well.append(dead_count / all_count * 100)
            total_counts.append(all_count)

        if(idx % 2 == 0):
            ratio_per_well = ratio_per_well[::-1]
            total_counts = total_counts[::-1]
        
        # Add each row's ratio to the figure
        fig.add_trace(
            go.Scatter(x=list(range(len(ratio_per_well))), y=ratio_per_well, mode='lines+markers', name=row)
        )

    # Update layout for better visualization
    fig.update_layout(
        title = f"Response curves for each replicate (thresh = {thresh})",
        xaxis_title = 'Concentration',
        yaxis_title = 'Response (%)',
        legend_title = 'Replicates'
    )

    
    # Increase font size for axis titles
    fig.update_xaxes(title_font=dict(size=22), tickfont=dict(size=18))
    fig.update_yaxes(title_font=dict(size=22), tickfont=dict(size=14))

    fig.update_layout(title_font=dict(size=24))

    
    fig.show()  # Display the plot

In [15]:
count_ratio(data, row_ids = ["B", "D", "F", "H"], col_ids = [2,3,4,5,8,9,10,11], thresh = 15)

In [16]:
count_ratio(data, row_ids = ["B", "D", "F", "H"], col_ids = [2,3,4,5,8,9,10,11], thresh = 15)

In [17]:
for well, df in positive_controls_data.items():
    print(df['data'].AvgIntenCh2)
    break

0        371.181976
1          0.000000
2         10.891447
3          0.023256
4          0.000000
           ...     
5816       0.000000
5817    7191.169811
5818    7314.082294
5819    7725.034524
5820    7625.802062
Name: AvgIntenCh2, Length: 5821, dtype: float64


In [18]:
def calculate_dead_cell_percentages(data, thresh=20):
    percentages = {}
    for well, df_ in data.items():
        df = df_['data']
        f_df = df[df.Well == well]
        dead_count = (f_df.AvgIntenCh2.values > thresh).sum()
        total_count = len(df)
        percentage = (dead_count / total_count * 100)
        percentages[well] = percentage
    return percentages

def plot_dead_cell_percentages(positive_controls_data, negative_controls_data, channel="AvgIntenCh2", thresh=20):
    # Calculate dead cell percentages
    neg_percentages = calculate_dead_cell_percentages(negative_controls_data, thresh)
    pos_percentages = calculate_dead_cell_percentages(positive_controls_data, thresh)
    
    # Wells for x-axis
    neg_wells = list(negative_controls_data.keys())
    pos_wells = list(positive_controls_data.keys())
    
    # Create the bar chart
    fig = go.Figure(data=[
        go.Bar(
            name='Negative Controls', 
            x=neg_wells, 
            y=list(neg_percentages.values()), 
            marker_color='red',
            text=[f"{p:.1f}%" for p in neg_percentages.values()],
            textposition='outside',
            texttemplate='%{text}'
        ),
        go.Bar(
            name='Positive Controls', 
            x=pos_wells, 
            y=list(pos_percentages.values()), 
            marker_color='green',
            text=[f"{p:.1f}%" for p in pos_percentages.values()],
            textposition='outside',
            texttemplate='%{text}'
        )
    ])

    # Customize the layout
    fig.update_layout(
        width=1400,
        height=800,
        barmode='group',
        title=f"Dead Cell Percentages (Threshold: {thresh})",
        xaxis_title='Wells',
        yaxis_title='Dead Cell Percentage',
        legend_title='Control Type',
        xaxis_tickangle=-45,
        legend=dict(
            x=0.2,
            y=0.95,
            xanchor='center',
            yanchor='top'
        ),
        legend_font_size=16
    )

    # Increase font sizes
    fig.update_traces(textfont_size=26, textangle=0, textposition="outside", cliponaxis=False)
    fig.update_xaxes(title_font=dict(size=22), tickfont=dict(size=18))
    fig.update_yaxes(title_font=dict(size=22), tickfont=dict(size=14))
    fig.update_layout(title_font=dict(size=24))
    
    # Show and save the plot
    fig.show()
    fig.write_image(f"images/48_dead_cell_percentages_{channel}_thresh_{thresh}.pdf")

In [22]:
plot_dead_cell_percentages(positive_controls_data, negative_controls_data, channel="AvgIntenCh2", thresh=100)