In [1]:
import os
import radiomics
import pandas as pd
import numpy as np
import SimpleITK as sitk

from sklearn.utils import shuffle # 数据混序
from sklearn.preprocessing import StandardScaler

# for t test
from scipy.stats import levene, ttest_ind



## 特征提取

In [2]:
from radiomics import featureextractor
from radiomics import imageoperations

In [None]:
maskName = 'D:/pancreas-LU/WWCT202102251903/CT1.nii'
imageName = 'D:/pancreas-LU/WWCT202102251903/CT_0000.nii.gz'
paramPath = 'E:/AIDataSet/Radiomics/Dataset/RadiomicsParams.yaml'
data_img, meta_img = LoadImage()(imageName)
data_msk, meta_msk = LoadImage()(maskName)
print(f"image data shape:{data_img.shape}")
print(f"meta data:{meta_img}")

In [None]:
print(f"image data shape:{data_msk.shape}")
print(f"meta data:{meta_msk}")

In [None]:
maskName = 'E:/AIDataSet/Radiomics/Dataset/chen2/roi.nii'
imageName = 'E:/AIDataSet/Radiomics/Dataset/chen2/t2.nii'

# Initialize feature extractor
extractor = featureextractor.RadiomicsFeatureExtractor(paramPath)
# boundingBox, correctedMask = imageoperations.checkMask(imageName,maskName,**settings)
featureVector = extractor.execute(imageName,maskName)

In [None]:
paramPath = 'E:/03_AIDataSet/00_Radiomics/Dataset/RadiomicsParams.yaml'
extractor = featureextractor.RadiomicsFeatureExtractor(paramPath)
dataDir = 'D:/pancreas-LU/'

In [None]:
df = pd.DataFrame()
for root,dirs,files in os.walk(dataDir):
        if dirs:
            print(dirs)
            for dir in dirs:
                for subroot,subdirs,subfiles in os.walk(root + '/'+ dir):
                    maskName = subroot + '/' + "CT1.nii"
                    print(maskName)
                    print(subroot)
                    print(subdirs)
                    print(subfiles)
                    df_temp = pd.DataFrame()
                    for file in subfiles:
                        if file == "adc_Reg.nii" or file == "max_b_dwi_Reg.nii" or file == "CT_0000.nii.gz":
                            imageName = subroot + '/' + file
                            print(imageName)
                            featureVector = extractor.execute(imageName,maskName)
                            df_add = pd.DataFrame.from_dict(featureVector.values()).T
                            df_add.columns = featureVector.keys()
                            df_temp = pd.concat([df_temp, df_add],axis=1)
                            df_temp = df_temp.drop(df_temp.columns[0], axis=1)       
                            df_temp.insert(0,'Sub',dir)
                            print(df_temp)
                        else:
                            continue
                df = pd.concat([df, df_temp],axis=0)
        else:
            continue
df.to_excel(dataDir + 'results.xlsx')

## basic operation

In [None]:
# 导入数据
A_filePath = 'E:/AIDataSet/Radiomics/numeric_feature.xlsx'
B_filePath = 'E:/AIDataSet/Radiomics/B.xlsx'
# 读取数据
data_A = pd.read_excel(A_filePath)
data_B = pd.read_excel(B_filePath)

In [None]:
# 查看数据信息
rows,cols = data_A.shape
print(rows,cols)
# 查看前几行
data_A.head()

In [None]:
# 查看column名称
print(data_A.columns)

In [None]:
# 新增1列
data_A.insert(0,'label',np.ones(rows))
data_B.insert(0,'label',np.zeros(rows))

In [None]:
# 数据混序
data_A = shuffle(data_A)

In [None]:
# 排除NaN
data_A['FeatureA'] = np.nan
data_A.head()

In [None]:
data_A = data_A.fillna(0)
data_A.head()

In [None]:
# 定位数据
# 选择多列
data_A[['FeatureA','FeatureB']]
# 选择多行
data_A[0:2]
data_A.iloc[1:3,2:3]

## 特征选择：Variance

In [None]:
from sklearn.feature_selection import VarianceThreshold
# exclude the label and patient No.
X = data_A[data_A.columns[2:]]
X.head()
selector = VarianceThreshold(1e8)
selector.fit_transform(X)
#print('每个特征的方差为：'+str(selector.variances_))
print('筛选出特征序号为：'+str(selector.get_support(True)))
print('筛选出特征名称为：'+str(X.columns[selector.get_support(True)]))
print('筛选掉特征名称为：'+str(X.columns[~ selector.get_support(True)]))

## LASSO 特征筛选

In [None]:

X = data_A[data_A.columns[2:]]
y = data_A['label']
colNames = X.columns
X = X.astype(np.float64)
# 标准化
X = StandardScaler().fit_transform(X)
X = pd.DataFrame(X)
X.columns = colNames

In [None]:
from sklearn.linear_model import LassoCV
alphas = np.logspace(-3,1,50) # the regulation factors
model_lassoCV = LassoCV(alphas=alphas,cv = 10, max_iter=100000).fit(X,y)

In [None]:
print(model_lassoCV.alpha_)
coef = pd.Series(model_lassoCV.coef_, index=X.columns)
# print(coef)
print("Lasso picked "+str(sum(coef != 0)) + " variables and eliminated the other "+ str(sum(coef == 0)))

In [None]:
index = coef[coef != 0].index
X = X[index]
X.head()
print(coef[coef != 0])

## Demo for T-test

In [None]:
x = [1,2,4,7,2,4]
y = [3,2,5,7,4]

In [None]:
levene(x,y)

In [None]:
if levene(x,y)[1] > 0.05:
    res = ttest_ind(x,y)
else:
    res = ttest_ind(x,y,equal_var = False)
print(res)
print("the p value ="+ str(res[1]))

In [7]:
import nibabel as nib


In [22]:
dataDir = 'E://02_MriDataSet//100_CT\gongbingxin//20240413_ltx/images'
maskDir= 'E://02_MriDataSet//100_CT\gongbingxin//20240413_ltx/masks'
TargetDir = 'E://02_MriDataSet//100_CT\gongbingxin//20240413_ltx/newimages'


In [24]:
df = pd.DataFrame()
for root,dirs,files in os.walk(dataDir):
        for file in files:
            # print(os.path.join(root, file))
            imageName=os.path.join(TargetDir, file)
            maskName=os.path.join(maskDir, file)
            print(imageName)
            print(maskName)

            imageNib = nib.load(imageName)
            maskNib = nib.load(maskName)
            
            imageRawArr = imageNib.get_fdata()
            print(imageArr.shape)

            # maskArr = maskNib.get_fdata()
            # imageArr = imageRawArr[:,:,0,0,0]
            # imageArr = imageArr.reshape(imageArr.shape[0],imageArr.shape[1],1)
            # # print(imageArr.shape)
            # imageAffine = maskNib.affine.copy()
            # imageHead = maskNib.header.copy()
            # print(imageHead)
            # imageNii = nib.Nifti1Image(imageArr,imageAffine,imageHead)
            
            # TarImageName=os.path.join(TargetDir, file)

            # nib.save(imageNii,TarImageName)


E://02_MriDataSet//100_CT\gongbingxin//20240413_ltx/newimages\anfumian.nii.gz
E://02_MriDataSet//100_CT\gongbingxin//20240413_ltx/masks\anfumian.nii.gz
(1260, 910, 1)
E://02_MriDataSet//100_CT\gongbingxin//20240413_ltx/newimages\anshuyuan.nii.gz
E://02_MriDataSet//100_CT\gongbingxin//20240413_ltx/masks\anshuyuan.nii.gz
(1260, 910, 1)
E://02_MriDataSet//100_CT\gongbingxin//20240413_ltx/newimages\baijianjun.nii.gz
E://02_MriDataSet//100_CT\gongbingxin//20240413_ltx/masks\baijianjun.nii.gz
(1260, 910, 1)
E://02_MriDataSet//100_CT\gongbingxin//20240413_ltx/newimages\bixiuqin.nii.gz
E://02_MriDataSet//100_CT\gongbingxin//20240413_ltx/masks\bixiuqin.nii.gz
(1260, 910, 1)
E://02_MriDataSet//100_CT\gongbingxin//20240413_ltx/newimages\caiying.nii.gz
E://02_MriDataSet//100_CT\gongbingxin//20240413_ltx/masks\caiying.nii.gz
(1260, 910, 1)
E://02_MriDataSet//100_CT\gongbingxin//20240413_ltx/newimages\changzhanfeng.nii.gz
E://02_MriDataSet//100_CT\gongbingxin//20240413_ltx/masks\changzhanfeng.nii.gz