In [10]:
# imports
import os
import cv2
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# load images


images_subset = pd.DataFrame()

data_path = r"data"
imaging_path = os.path.join(data_path, r"imaging_subset")
filenames = os.listdir(imaging_path)
depths = pd.read_csv(os.path.join(data_path, "depths.csv"))

for i in filenames:
    img = cv2.imread(os.path.join(imaging_path, i), 0)
    try:
        depth = depths[depths["Filenames"].str.lower() == i.lower()][
            "Depth from lung surface (in micrometers) where image was acquired"
        ].values[0]
        # some files are named with SK658 and some with Sk658
    except IndexError:
        print(f"couldn't find depth for file {i}")
        continue
    images_subset = pd.concat(
        [images_subset, pd.DataFrame([{"filename": i, "image": img, "depth": depth}])],
        ignore_index=True,
    )

print(images_subset.shape)

(20, 3)


In [11]:
# merge pct_white_pixels.csv with manual_contour_area.csv on filename
pct_white_pixels = pd.read_csv(os.path.join(data_path, "pct_white_pixels.csv"))
manual_contour_area = pd.read_csv(os.path.join(data_path, "manual_contour_area.csv"))

merged_data = pd.merge(pct_white_pixels, manual_contour_area, on=["filename", "depth"], how="inner")
display(merged_data.head())

Unnamed: 0,filename,depth,white_percent,area_pixels
0,MASK_SK658 Slobe ch010048.jpg,540,1.179957,339495
1,MASK_Sk658 Llobe ch010061.jpg,585,1.335216,175368
2,MASK_SK658 Slobe ch010103.jpg,9600,4.619193,192249
3,MASK_SK658 Slobe ch010063.jpg,7400,2.882719,85161
4,MASK_SK658 Slobe ch010113.jpg,7300,2.859545,49410


In [12]:
# create slobe vs llobe column
df = merged_data.copy()

df["slobe"] = df["filename"].str.contains("slobe", case=False)
display(df.head())

Unnamed: 0,filename,depth,white_percent,area_pixels,slobe
0,MASK_SK658 Slobe ch010048.jpg,540,1.179957,339495,True
1,MASK_Sk658 Llobe ch010061.jpg,585,1.335216,175368,False
2,MASK_SK658 Slobe ch010103.jpg,9600,4.619193,192249,True
3,MASK_SK658 Slobe ch010063.jpg,7400,2.882719,85161,True
4,MASK_SK658 Slobe ch010113.jpg,7300,2.859545,49410,True


In [13]:
# T test for slobe vs llobe for white percent
import scipy.stats as stats

y = "slobe"

t_test_slobe_white_percent = stats.ttest_ind(
    df[df[y] == True]["white_percent"],
    df[df[y] == False]["white_percent"],
)

t_test_slobe_depth = stats.ttest_ind(
    df[df[y] == True]["depth"],
    df[df[y] == False]["depth"],
)

t_test_slobe_area_pixels = stats.ttest_ind(
    df[df[y] == True]["area_pixels"],
    df[df[y] == False]["area_pixels"],
)

# depth vs area pixels
correlation_depth_area_pixels = stats.pearsonr(df["depth"], df["area_pixels"])

print("T test for slobe vs llobe for white percent:")
print(t_test_slobe_white_percent)

print("T test for slobe vs llobe for depth:")
print(t_test_slobe_depth)

print("T test for slobe vs llobe for area pixels:")
print(t_test_slobe_area_pixels)

print("Correlation between depth and area pixels:")
print(correlation_depth_area_pixels)

T test for slobe vs llobe for white percent:
TtestResult(statistic=np.float64(3.469815814239402), pvalue=np.float64(0.002565092387819093), df=np.float64(19.0))
T test for slobe vs llobe for depth:
TtestResult(statistic=np.float64(3.133607768897513), pvalue=np.float64(0.005469240026446949), df=np.float64(19.0))
T test for slobe vs llobe for area pixels:
TtestResult(statistic=np.float64(0.9099205557822232), pvalue=np.float64(0.3742635498003356), df=np.float64(19.0))
Correlation between depth and area pixels:
PearsonRResult(statistic=np.float64(0.15346973591218055), pvalue=np.float64(0.5065787295909403))


In [14]:
# MANOVA: joint effect of lobe type (slobe vs llobe) on depth and area_pixels
from statsmodels.multivariate.manova import MANOVA

df = df.copy()
if "lobe" not in df.columns:
    df["lobe"] = df["slobe"].map({True: "slobe", False: "llobe"})

# run MANOVA
maov = MANOVA.from_formula("depth + area_pixels ~ C(lobe)", data=df)
print(maov.mv_test())

                 Multivariate linear model
                                                            
------------------------------------------------------------
       Intercept        Value  Num DF  Den DF F Value Pr > F
------------------------------------------------------------
          Wilks' lambda 0.6498 2.0000 18.0000  4.8494 0.0207
         Pillai's trace 0.3502 2.0000 18.0000  4.8494 0.0207
 Hotelling-Lawley trace 0.5388 2.0000 18.0000  4.8494 0.0207
    Roy's greatest root 0.5388 2.0000 18.0000  4.8494 0.0207
------------------------------------------------------------
                                                            
------------------------------------------------------------
        C(lobe)         Value  Num DF  Den DF F Value Pr > F
------------------------------------------------------------
          Wilks' lambda 0.6458 2.0000 18.0000  4.9365 0.0195
         Pillai's trace 0.3542 2.0000 18.0000  4.9365 0.0195
 Hotelling-Lawley trace 0.5485 2.0000 18.0

In [15]:
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm

for dv in ["depth", "area_pixels"]:
    model = ols(f"{dv} ~ C(lobe)", data=df).fit()
    print(f"\nANOVA for {dv}:")
    print(anova_lm(model, typ=2))


ANOVA for depth:
                sum_sq    df         F    PR(>F)
C(lobe)   9.994400e+07   1.0  9.819498  0.005469
Residual  1.933842e+08  19.0       NaN       NaN

ANOVA for area_pixels:
                sum_sq    df         F    PR(>F)
C(lobe)   6.609092e+10   1.0  0.827955  0.374264
Residual  1.516661e+12  19.0       NaN       NaN
