In [None]:
from platform import python_version
print(python_version())

In [None]:
import xlwings as xw
import numpy as np
import os, os.path
import matplotlib.pyplot as plt
import pandas as pd
from scipy import stats
from outliers import smirnov_grubbs as grubbs
import seaborn as sns
%matplotlib inline

In [None]:
def get_mean_intensity(his_dir, th):
    file_name = []
    for filename in os.listdir(his_dir):
        if filename.endswith('.csv'):
            file_name.append(filename)
    excel_app = xw.App(visible=False)
    all_mean = []
    for j in range(len(file_name)):
        temp_bk = excel_app.books.open(f'{his_dir}/{file_name[j]}')
        print(temp_bk.sheets)
        temp_sht = temp_bk.sheets[0]
        temp_data = temp_sht.range(f'C{th+2}').options(np.array, expand="down").value
        temp_bk.close()

        pixel_value = np.array(range(th,256))
        if temp_data.sum() == 0:
            all_mean.append(0)
            print("no pixel over threshold happened!")
        else:
            all_mean.append((temp_data * pixel_value).sum() / temp_data.sum())
    excel_app.quit()
    return all_mean

In [None]:
root_dir = 'histogram_list'
file_num = []
for filenum in os.listdir(root_dir):
    file_num.append(filenum)
file_num

## Get the number of conditions

In [None]:
for i in file_num:
    print(f'{root_dir}/{i}')
    if i == '0X':
        _0X_intensity = get_mean_intensity(f'{root_dir}/{i}', 0)
    elif i == '1X':
        _1X_intensity = get_mean_intensity(f'{root_dir}/{i}', 0)
    elif i == '3X':
        _3X_intensity = get_mean_intensity(f'{root_dir}/{i}', 0)
    elif i == '6X':
        _6X_intensity = get_mean_intensity(f'{root_dir}/{i}', 0)

In [None]:
autoThresholdValue = pd.read_csv("autoThresholdValue.csv")
print(type(autoThresholdValue))

In [None]:
print(len(autoThresholdValue))
print(len(_0X_intensity))
print(len(_1X_intensity))
print(len(_3X_intensity))
print(len(_6X_intensity))

In [None]:
print(len(autoThresholdValue['Value'].tolist()))
print('===')
_0X_th_value = autoThresholdValue['Value'].tolist()[0:60]
_1X_th_value = autoThresholdValue['Value'].tolist()[60:110]
_3X_th_value = autoThresholdValue['Value'].tolist()[110:174]
_6X_th_value = autoThresholdValue['Value'].tolist()[174:232]
print(len(_0X_th_value))
print(len(_1X_th_value))
print(len(_3X_th_value))
print(len(_6X_th_value))

In [None]:
_0X_th = np.array(_0X_th_value)
_1X_th = np.array(_1X_th_value)
_3X_th = np.array(_3X_th_value)
_6X_th = np.array(_6X_th_value)
print(np.mean(_0X_th))
print(np.mean(_1X_th))
print(np.mean(_3X_th))
print(np.mean(_6X_th))

## The real deal

In [None]:
root_dir = 'histogram_list'
file_num = []
for filenum in os.listdir(root_dir):
    file_num.append(filenum)

for i in file_num:
    print(f'{root_dir}/{i}')
    if i == '0X':
        _0X_intensity = get_mean_intensity(f'{root_dir}/{i}', 27)
    elif i == '1X':
        _1X_intensity = get_mean_intensity(f'{root_dir}/{i}', 26)
    elif i == '3X':
        _3X_intensity = get_mean_intensity(f'{root_dir}/{i}', 23)
    elif i == '6X':
        _6X_intensity = get_mean_intensity(f'{root_dir}/{i}', 23)

In [None]:
def formatpv(pv, threshold=1E-4):
    if pv > threshold:
        return str(f'P={pv:.4f}')
    else:
        return str(f'P={pv:.2E}')

## Box plot

In [None]:
name = "Intensity"
in_data = [_0X_intensity, _1X_intensity, _3X_intensity, _6X_intensity]
mydata = [grubbs.test(in_data[0], alpha=0.05), grubbs.test(in_data[1], alpha=0.05), grubbs.test(in_data[2], alpha=0.05), grubbs.test(in_data[3], alpha=0.05)]
max_value = np.amax([np.amax(mydata[0]), np.amax(mydata[1]), np.amax(mydata[2]), np.amax(mydata[3])])
numBoxes = 4

fig1, ax1 = plt.subplots(figsize=(6, 9),dpi=300)
sns.boxplot(data = mydata, linewidth=3, showfliers=False)
sns.swarmplot(data = mydata, color=".25", size=6)
ax1.set_title(name, size = 34, fontweight='bold')
# ax1.set_aspect(3/max_value)
#plt.ylabel("Area ($\mathbf{\mu m^2}$)", size = 20, fontweight='bold', labelpad=10)
#plt.ylabel("Length ($\mathbf{\mu m}$)", size = 20, fontweight='bold', labelpad=10)
#plt.ylabel("Count", size = 20, fontweight='bold', labelpad=10)
plt.ylabel(r"$\mathbf{FI_{TMRM}}$ (a.u.)", size = 20, fontweight='bold', labelpad=10)

plt.xticks([0., 1., 2., 3.], ["0X", "1X", "3X", "6X"], size = 26, fontweight='bold', rotation=30)
plt.yticks(size = 25, fontweight='bold')

print(f'Levena for {name} = {stats.levene(mydata[0], mydata[1], mydata[2], mydata[3])}')
print(f'ANOVA for {name} = {stats.f_oneway(mydata[0], mydata[1], mydata[2], mydata[3])}')

# 0 vs 1
pv = stats.ttest_ind(mydata[0], mydata[1], equal_var = False).pvalue
print(f'P-value between group 0 and 1: {pv}')
if pv <= 0.05:
    bar_start = max_value * 1.04
    barh = max_value * 0.02
    left_x = 0
    right_x = 0.95
    barx = [left_x, left_x, right_x, right_x]
    bary = [bar_start, bar_start+barh, bar_start+barh, bar_start]
    mid = ((left_x+right_x)/2, bar_start+barh)
    plt.plot(barx, bary, c='black', linewidth=5)
    kwargs = dict(ha='center', va='bottom', fontsize='14')
    plt.text(*mid, formatpv(pv), **kwargs)

# 1 vs 2
pv = stats.ttest_ind(mydata[1], mydata[2], equal_var = False).pvalue
print(f'P-value between group 1 and 2: {pv}')
if pv <= 0.05:
    bar_start = max_value * 1.04
    barh = max_value * 0.02
    left_x = 1.05
    right_x = 1.95
    barx = [left_x, left_x, right_x, right_x]
    bary = [bar_start, bar_start+barh, bar_start+barh, bar_start]
    mid = ((left_x+right_x)/2, bar_start+barh)
    plt.plot(barx, bary, c='black', linewidth=5)
    kwargs = dict(ha='center', va='bottom', fontsize='14')
    plt.text(*mid, formatpv(pv), **kwargs)

# 2 vs 3
pv = stats.ttest_ind(mydata[2], mydata[3], equal_var = False).pvalue
print(f'P-value between group 2 and 3: {pv}')
if pv <= 0.05:
    bar_start = max_value * 1.04
    barh = max_value * 0.02
    left_x = 2.05
    right_x = 2.95
    barx = [left_x, left_x, right_x, right_x]
    bary = [bar_start, bar_start+barh, bar_start+barh, bar_start]
    mid = ((left_x+right_x)/2, bar_start+barh)
    plt.plot(barx, bary, c='black', linewidth=5)
    kwargs = dict(ha='center', va='bottom', fontsize='14')
    plt.text(*mid, formatpv(pv), **kwargs)

# 0 vs 2
pv = stats.ttest_ind(mydata[0], mydata[2], equal_var = False).pvalue
print(f'P-value between group 0 and 2: {pv}')
if pv <= 0.05:
    bar_start = max_value * 1.12
    barh = max_value * 0.02
    left_x = 0
    right_x = 1.95
    barx = [left_x, left_x, right_x, right_x]
    bary = [bar_start, bar_start+barh, bar_start+barh, bar_start]
    mid = ((left_x+right_x)/2, bar_start+barh)
    plt.plot(barx, bary, c='black', linewidth=5)
    kwargs = dict(ha='center', va='bottom', fontsize='14')
    plt.text(*mid, formatpv(pv), **kwargs)


# 1 vs 3
pv = stats.ttest_ind(mydata[1], mydata[3], equal_var = False).pvalue
print(f'P-value between group 1 and 3: {pv}')
if pv <= 0.05:
    bar_start = max_value * 1.20
    barh = max_value * 0.02
    left_x = 1
    right_x = 2.95
    barx = [left_x, left_x, right_x, right_x]
    bary = [bar_start, bar_start+barh, bar_start+barh, bar_start]
    mid = ((left_x+right_x)/2, bar_start+barh)
    plt.plot(barx, bary, c='black', linewidth=5)
    kwargs = dict(ha='center', va='bottom', fontsize='14')
    plt.text(*mid, formatpv(pv), **kwargs)


# 0 vs 3
pv = stats.ttest_ind(mydata[0], mydata[3], equal_var = False).pvalue
print(f'P-value between group 0 and 3: {pv}')
if pv <= 0.05:
    bar_start = max_value * 1.28
    barh = max_value * 0.02
    left_x = 0
    right_x = 2.95
    barx = [left_x, left_x, right_x, right_x]
    bary = [bar_start, bar_start+barh, bar_start+barh, bar_start]
    mid = ((left_x+right_x)/2, bar_start+barh)
    plt.plot(barx, bary, c='black', linewidth=5)
    kwargs = dict(ha='center', va='bottom', fontsize='14')
    plt.text(*mid, formatpv(pv), **kwargs)

plt.tight_layout()
plt.savefig('Intensity_pv.tif', dpi=300, format="tiff", pil_kwargs={"compression": "tiff_lzw"})