In [1]:
from keras.preprocessing import image
from glob import glob
import pandas as pd
import os
import matplotlib.pyplot as plt

def img2array(img_path, grayscale = True):
    img = image.image_utils.load_img(img_path,grayscale=grayscale)
    return image.image_utils.img_to_array(img) / 255.                         

2022-10-04 15:53:04.545537: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [2]:
save_dir = './Met2Img_AggMapNet_data'
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

In [3]:
data_info_list = [{'path': './images/metagenomics/cirphy_fill_pix_r0p1.0spbm0.0a1.0graybi10_0.0_1.0/fill_*.png',
  'dataset': 'Cirrhosis', 'y':'./metagenomics/cirphy_y.csv', 'x': './metagenomics/cirphy_x.csv',
  'Met2Img': 'fillup-spb-gray'},
 {'path': './images/metagenomics/cirphy_fill_pix_r0p1.0spbm0.0a1.0jetbi10_0.0_1.0/fill_*.png',
  'dataset': 'Cirrhosis', 'y':'./metagenomics/cirphy_y.csv', 'x': './metagenomics/cirphy_x.csv',
  'Met2Img': 'fillup-spb-jet'},
 {'path': './images/metagenomics/ibdphy_fill_pix_r0p1.0spbm0.0a1.0graybi10_0.0_1.0/fill_*.png',
  'dataset': 'IBD', 'y':'./metagenomics/ibdphy_y.csv', 'x': './metagenomics/ibdphy_x.csv',
  'Met2Img': 'fillup-spb-gray'},
 {'path': './images/metagenomics/ibdphy_fill_pix_r0p1.0spbm0.0a1.0jetbi10_0.0_1.0/fill_*.png',
  'dataset': 'IBD', 'y':'./metagenomics/ibdphy_y.csv', 'x': './metagenomics/ibdphy_x.csv',
  'Met2Img': 'fillup-spb-jet'},
 {'path': './images/metagenomics/obephy_fill_pix_r0p1.0spbm0.0a1.0graybi10_0.0_1.0/fill_*.png',
  'dataset': 'Obesity', 'y':'./metagenomics/obephy_y.csv', 'x': './metagenomics/obephy_x.csv',
  'Met2Img': 'fillup-spb-gray'},
 {'path': './images/metagenomics/obephy_fill_pix_r0p1.0spbm0.0a1.0jetbi10_0.0_1.0/fill_*.png',
  'dataset': 'Obesity', 'y':'./metagenomics/obephy_y.csv', 'x': './metagenomics/obephy_x.csv',
  'Met2Img': 'fillup-spb-jet'},
 {'path': './images/metagenomics/t2dphy_fill_pix_r0p1.0spbm0.0a1.0graybi10_0.0_1.0/fill_*.png',
  'dataset': 'T2D', 'y':'./metagenomics/t2dphy_y.csv', 'x': './metagenomics/t2dphy_x.csv',
  'Met2Img': 'fillup-spb-gray'},
 {'path': './images/metagenomics/t2dphy_fill_pix_r0p1.0spbm0.0a1.0jetbi10_0.0_1.0/fill_*.png',
  'dataset': 'T2D', 'y':'./metagenomics/t2dphy_y.csv', 'x': './metagenomics/t2dphy_x.csv',
  'Met2Img': 'fillup-spb-jet'},
                  
 {'path': './images/metagenomics/colphy_fill_pix_r0p1.0spbm0.0a1.0graybi10_0.0_1.0/fill_*.png',
  'dataset': 'CRC', 'y':'./metagenomics/colphy_y.csv', 'x': './metagenomics/colphy_x.csv',
  'Met2Img': 'fillup-spb-gray'},
 {'path': './images/metagenomics/colphy_fill_pix_r0p1.0spbm0.0a1.0jetbi10_0.0_1.0/fill_*.png',
  'dataset': 'CRC', 'y':'./metagenomics/colphy_y.csv', 'x': './metagenomics/colphy_x.csv',
  'Met2Img': 'fillup-spb-jet'} ]

In [4]:
df_img = pd.DataFrame(data_info_list)
df_img.head(3)

Unnamed: 0,path,dataset,y,x,Met2Img
0,./images/metagenomics/cirphy_fill_pix_r0p1.0sp...,Cirrhosis,./metagenomics/cirphy_y.csv,./metagenomics/cirphy_x.csv,fillup-spb-gray
1,./images/metagenomics/cirphy_fill_pix_r0p1.0sp...,Cirrhosis,./metagenomics/cirphy_y.csv,./metagenomics/cirphy_x.csv,fillup-spb-jet
2,./images/metagenomics/ibdphy_fill_pix_r0p1.0sp...,IBD,./metagenomics/ibdphy_y.csv,./metagenomics/ibdphy_x.csv,fillup-spb-gray


In [5]:

for i in range(len(df_img)):
    ts = df_img.iloc[i]
    if 'gray' in ts.Met2Img:
        grayscale = True
    else:
        grayscale = False
        
    img_list = glob(ts.path)
    df = pd.DataFrame(img_list, columns = ['img_path'])
    df['sid'] = df.img_path.apply(lambda x:os.path.basename(x).split('_')[-1].split('.')[0])
    df.sid = df.sid.astype(int) + 1
    df = df.sort_values('sid')
    df = df.set_index('sid')
    df['X'] = df.img_path.apply(lambda x:img2array(x, grayscale = grayscale))
    df['dataset'] = ts.dataset
    df['Met2Img'] = ts.Met2Img
    df['x'] = ts.x
    df['y'] = ts.y
    lb = pd.read_csv(ts.y, index_col=0).x.to_frame(name= 'label')
    df = df.join(lb)
    
    df.to_pickle('./%s/Met2Img_%s_%s.pkl' % (save_dir, ts.dataset, ts.Met2Img))



In [6]:
ls -lh Met2Img_AggMapNet_data/

total 9.1M
-rw-rw-r-- 1 shenwanxiang shenwanxiang 561K 10月  4 15:53 Met2Img_Cirrhosis_fillup-spb-gray.pkl
-rw-rw-r-- 1 shenwanxiang shenwanxiang 1.6M 10月  4 15:53 Met2Img_Cirrhosis_fillup-spb-jet.pkl
-rw-rw-r-- 1 shenwanxiang shenwanxiang 271K 10月  4 15:53 Met2Img_CRC_fillup-spb-gray.pkl
-rw-rw-r-- 1 shenwanxiang shenwanxiang 771K 10月  4 15:53 Met2Img_CRC_fillup-spb-jet.pkl
-rw-rw-r-- 1 shenwanxiang shenwanxiang 227K 10月  4 15:53 Met2Img_IBD_fillup-spb-gray.pkl
-rw-rw-r-- 1 shenwanxiang shenwanxiang 643K 10月  4 15:53 Met2Img_IBD_fillup-spb-jet.pkl
-rw-rw-r-- 1 shenwanxiang shenwanxiang 520K 10月  4 15:53 Met2Img_Obesity_fillup-spb-gray.pkl
-rw-rw-r-- 1 shenwanxiang shenwanxiang 1.5M 10月  4 15:53 Met2Img_Obesity_fillup-spb-jet.pkl
-rw-rw-r-- 1 shenwanxiang shenwanxiang 831K 10月  4 15:53 Met2Img_T2D_fillup-spb-gray.pkl
-rw-rw-r-- 1 shenwanxiang shenwanxiang 2.4M 10月  4 15:53 Met2Img_T2D_fillup-spb-jet.pkl
-rw-rw-r-- 1 shenwanxiang shenwanxiang   72 10月  4 15:51 Untitled.ipynb
