In [2]:
import pandas as pd
import numpy as np
from glob import glob
import cv2
import pandas as pd
import autogluon.core as ag
from autogluon.vision import ImagePredictor
from sklearn.metrics import classification_report,f1_score
from sklearn.model_selection import train_test_split



In [3]:
def flatten(path,label=None):
  '''
  path: images path in a list
  label: corresponding label in list
  '''
  path = [item for sublist in path for item in sublist]
  label = [item for sublist in label for item in sublist]
  return path, label
  

def prepare(path,split=False,flat=True):
  '''
  Pass the path of main folder in which binary class folders are present
  Split True will split the data and return two sets
  '''
  main_path=glob(path+'/*/')
  pos_path=[glob(folder+'*.jpg') for folder in glob(main_path[0]+'*/')]
  neg_path=[glob(folder+'*.jpg') for folder in glob(main_path[1]+'*/')]
  #get folder name as label
  pos_label=[[main_path[0].split('/')[-2]]*len(i)  for i in pos_path]
  neg_label=[[main_path[1].split('/')[-2]]*len(i)  for i in neg_path]
  path=pos_path+neg_path
  label=pos_label+neg_label
  if flat:
    if split:
      train_path,val_path,train_label,val_label=train_test_split(path,label,test_size=0.2)
      train_path,train_label=flatten(train_path,train_label)
      val_path,val_label=flatten(val_path,val_label)
      return train_path,val_path,train_label,val_label
    else:
      path,label=flatten(path,label)
      return path,label
  else:
    return path,label

In [4]:
train_path,train_label=prepare('input/covidctscan3d/train',split=False)
val_path,val_label=prepare('input/covidctscan3d/val',split=False)
print('Train Images and labels',len(train_path),len(train_label))
print('Val Images and labels',len(val_path),len(val_label))


Train Images and labels 335672 335672
Val Images and labels 75532 75532


In [5]:
train_df=pd.DataFrame(zip(train_path,train_label),columns=['image','label'])
train_df = train_df.sample(frac=1).reset_index(drop=True)
train_df.label=train_df.label.map({'non-covid':0,'covid':1})
val_df=pd.DataFrame(zip(val_path,val_label),columns=['image','label'])
val_df = val_df.sample(frac=1).reset_index(drop=True)
val_df.label=val_df.label.map({'non-covid':0,'covid':1})

In [7]:
%%time
model='squeezenet1.1'
predictor = ImagePredictor(verbosity=1)
predictor.fit(train_df,tuning_data=val_df,hyperparameters={'model':model},time_limit=4*60*60)

The number of requested GPUs is greater than the number of available GPUs.Reduce the number to 1


Downloading /root/.mxnet/models/squeezenet1.1-33ba0f93.zip from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/models/squeezenet1.1-33ba0f93.zip...


100%|██████████| 4495/4495 [00:01<00:00, 2456.00KB/s]


CPU times: user 23.1 s, sys: 2.53 s, total: 25.6 s
Wall time: 28.2 s


<autogluon.vision.predictor.predictor.ImagePredictor at 0x7fdb3860be10>

In [None]:
%%time
fit_result = predictor.fit_summary()
print('Top-1 train acc: %.3f, val acc: %.3f' %(fit_result['train_acc'], fit_result['valid_acc']))
fit_result

In [None]:
res = predictor.evaluate(val_df)
print('Top-1 test acc: %.3f' % res[0])
val_pred=predictor.predict(val_df)
print(classification_report(val_df.label,val_pred))

In [None]:
val_path,val_label=prepare('input/covidctscan3d/val',split=False,flat=False)
len(val_path),len(val_label)

In [None]:
def evaluate(path,label):
  test_pred,test_true=[],[]
  for i, j in zip(path,label):
    test_df=pd.DataFrame(zip(i,j),columns=['image','label'])
    test_df.label=test_df.label.map({'non-covid':0,'covid':1})
    pred=list(predictor.predict(test_df))
    #find maximum occurent of element
    test_pred.append(max(pred,key=pred.count))
    test_true.append(test_df.label.mean())
  return test_pred,test_true

In [None]:
val_pred,val_true=evaluate(val_path,val_label)

In [None]:
print(classification_report(val_true,val_pred,target_names=['non-covid','covid']))

In [None]:
f1=f1_score(val_true,val_pred,average='macro')

In [None]:
predictor.save('predictor_{}_{}.ag'.format(model,f1))