## Header Files

In [5]:
import cv2
import numpy as np 
import matplotlib.pyplot as plt
import pandas as pd

## Reading Files

In [6]:
# Reading files in Directory
import os
files=os.listdir()
files

['.ipynb_checkpoints',
 '1_new_nod.jpg',
 '1_new_non_nod.jpg',
 '2_new_nod.jpg',
 '2_new_non_nod.jpg',
 '3_new_nod.jpg',
 '3_new_non_nod.jpg',
 '4_new_non_nod.jpg',
 '5_new_nod.jpg',
 '5_new_non_nod.jpg',
 '6_new_nod.jpg',
 'Feature Extraction and basic CNN model.ipynb']

In [12]:
# Segregating Image Files
images=[]
for x in files:
    if x.endswith('.jpg'):
        images.append(x)
images

['1_new_nod.jpg',
 '1_new_non_nod.jpg',
 '2_new_nod.jpg',
 '2_new_non_nod.jpg',
 '3_new_nod.jpg',
 '3_new_non_nod.jpg',
 '4_new_non_nod.jpg',
 '5_new_nod.jpg',
 '5_new_non_nod.jpg',
 '6_new_nod.jpg']

## Automating Feature Extraction

In [13]:
# Creating a pandas DataFrame
tumor=pd.DataFrame(columns=['image','area','max_h','max_w','ecc','x_cent','y_cent','texture_mean','texture_std'])
tumor

Unnamed: 0,image,area,max_h,max_w,ecc,x_cent,y_cent,texture_mean,texture_std


In [16]:
# Automating Reading Features of a set of files
for x in images:
    img=cv2.imread(x,0)
    
    image=img.copy()

    # Segmentation
    image[image<100]=0
    image[image>100]=255

    image=image/255.
    mask=image.copy()

    # Area
    area=(mask!=0).sum()

    # Max Height
    max_h=(mask!=0).sum(axis=0).max()
    
    # Max Width
    max_w=(mask!=0).sum(axis=1).max()

    # Eccentricity
    ecc=max_h/max_w
    
    # Centeroid
    
    m=cv2.moments(mask)
    x_cent=np.round(m['m10']/m['m00'])
    y_cent=np.round(m['m01']/m['m00'])
    
    # Texture Features
    mask1=mask*img
    
    texture_mean=np.mean(mask1[mask1!=0])
    texture_std=np.std(mask1[mask1!=0])

    
    new=pd.Series({'image':x,
               'area':area,
               'max_h':max_h,
               'max_w':max_w,
               'ecc':ecc,
               'x_cent':x_cent,
               'y_cent':y_cent,
               'texture_mean':texture_mean,
               'texture_std':texture_std})
    tumor=tumor.append(new,ignore_index=True)

tumor

    

Unnamed: 0,image,area,max_h,max_w,ecc,x_cent,y_cent,texture_mean,texture_std
0,1_new_nod.jpg,178,15,15,1.0,6.0,8.0,188.162921,33.128476
1,1_new_non_nod.jpg,32,8,5,1.6,5.0,5.0,149.78125,23.897874
2,2_new_nod.jpg,63,8,9,0.888889,6.0,5.0,172.301587,30.66596
3,2_new_non_nod.jpg,48,10,7,1.428571,5.0,5.0,198.416667,28.714713
4,3_new_nod.jpg,55,7,10,0.7,7.0,5.0,153.981818,17.316824
5,3_new_non_nod.jpg,35,7,7,1.0,5.0,5.0,217.8,16.712356
6,4_new_non_nod.jpg,30,7,5,1.4,4.0,4.0,174.966667,30.612071
7,5_new_nod.jpg,49,8,8,1.0,4.0,3.0,163.367347,27.311277
8,5_new_non_nod.jpg,54,10,7,1.428571,5.0,6.0,208.888889,33.350366
9,6_new_nod.jpg,98,10,14,0.714286,8.0,6.0,150.867347,28.539496


In [17]:
# Adding a target variable of files with non nodal as ending as 0 and nodal images as 1

a=[]
for x in tumor['image']:
    if x.endswith('non_nod.jpg'):
        a.append(0)
    else:
        a.append(1)

tumor['Y']=a

tumor

Unnamed: 0,image,area,max_h,max_w,ecc,x_cent,y_cent,texture_mean,texture_std,Y
0,1_new_nod.jpg,178,15,15,1.0,6.0,8.0,188.162921,33.128476,1
1,1_new_non_nod.jpg,32,8,5,1.6,5.0,5.0,149.78125,23.897874,0
2,2_new_nod.jpg,63,8,9,0.888889,6.0,5.0,172.301587,30.66596,1
3,2_new_non_nod.jpg,48,10,7,1.428571,5.0,5.0,198.416667,28.714713,0
4,3_new_nod.jpg,55,7,10,0.7,7.0,5.0,153.981818,17.316824,1
5,3_new_non_nod.jpg,35,7,7,1.0,5.0,5.0,217.8,16.712356,0
6,4_new_non_nod.jpg,30,7,5,1.4,4.0,4.0,174.966667,30.612071,0
7,5_new_nod.jpg,49,8,8,1.0,4.0,3.0,163.367347,27.311277,1
8,5_new_non_nod.jpg,54,10,7,1.428571,5.0,6.0,208.888889,33.350366,0
9,6_new_nod.jpg,98,10,14,0.714286,8.0,6.0,150.867347,28.539496,1
