<a href="https://colab.research.google.com/github/talhaanwarch/Corona_Virus/blob/master/CT_scan/cross_validation_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
%%capture
!pip install efficientnet

In [0]:
%%capture
!pip install focal-loss

### CLone github repo, to dowlonad data

In [2]:
!git clone https://github.com/UCSD-AI4H/COVID-CT.git

Cloning into 'COVID-CT'...
remote: Enumerating objects: 130, done.[K
remote: Counting objects: 100% (130/130), done.[K
remote: Compressing objects: 100% (128/128), done.[K
remote: Total 5443 (delta 60), reused 9 (delta 2), pack-reused 5313[K
Receiving objects: 100% (5443/5443), 1.09 GiB | 12.77 MiB/s, done.
Resolving deltas: 100% (351/351), done.
Checking out files: 100% (1048/1048), done.


### Extract data in one format

In [0]:
%%capture
!unzip /content/COVID-CT/Images-processed/CT_COVID.zip
!unzip /content/COVID-CT/Images-processed/CT_NonCOVID.zip

In [0]:
!mkdir images
!cp -r /content/CT_COVID/* images
!cp -r /content/CT_NonCOVID/* images


###read meta files

In [0]:
import pandas as pd
import os

In [6]:
covid=os.listdir('/content/CT_COVID')
cov=list(zip(covid,['covid']*(len(covid))))
covid_df = pd.DataFrame(cov,columns=['file','label']) 
covid_df.head()

Unnamed: 0,file,label
0,PIIS0140673620303603%3.png,covid
1,2020.03.30.20047985-p9-56%1.png,covid
2,PIIS0140673620302117_2.png,covid
3,2020.03.22.20040782-p25-1542.png,covid
4,bmj.m606.full-p4-22%1.png,covid


In [7]:
noncovid=os.listdir('/content/CT_NonCOVID')
noncov=list(zip(noncovid,['noncovid']*(len(noncovid))))
noncovid_df = pd.DataFrame(noncov,columns=['file','label']) 
noncovid_df.head()

Unnamed: 0,file,label
0,81%2.jpg,noncovid
1,583.png,noncovid
2,5%7.jpg,noncovid
3,1212.png,noncovid
4,46.jpg,noncovid


In [8]:
dataframe=pd.concat([covid_df,noncovid_df])
dataframe.head()

Unnamed: 0,file,label
0,PIIS0140673620303603%3.png,covid
1,2020.03.30.20047985-p9-56%1.png,covid
2,PIIS0140673620302117_2.png,covid
3,2020.03.22.20040782-p25-1542.png,covid
4,bmj.m606.full-p4-22%1.png,covid


In [9]:
df = pd.get_dummies(dataframe['label'])
df = pd.concat([dataframe, df], axis=1)
df=df.sample(frac=1)
df.head()


Unnamed: 0,file,label,covid,noncovid
60,2020.02.10.20021584-p6-52%6.png,covid,1,0
227,2020.02.17.20024018-p17-61%2.png,covid,1,0
327,2020.03.07.20031393-p7-50%2.png,covid,1,0
78,1920.png,noncovid,0,1
210,1123.png,noncovid,0,1


#### TEST DATA

In [10]:
covid_test=pd.read_csv('/content/COVID-CT/Data-split/COVID/testCT_COVID.txt',header=None,squeeze=True).tolist()
noncovid_test=pd.read_csv('/content/COVID-CT/Data-split/NonCOVID/testCT_NonCOVID.txt',header=None,squeeze=True).tolist()
test=covid_test+noncovid_test
len(test)

203

In [11]:
len(test),len(covid_test),len(noncovid_test)

(203, 98, 105)

In [0]:
test_df=df[df['file'].isin(test)]
train_df=df[~df['file'].isin(test)]

In [13]:
len(df),len(train_df),len(test_df)

(746, 543, 203)

### Data generator

In [0]:
from keras_preprocessing.image import ImageDataGenerator
data_gen= ImageDataGenerator(
    horizontal_flip=True,
    vertical_flip=True,
    #rotation_range=360,
    #width_shift_range=0.2,
    #height_shift_range=0.2,
    #zoom_range=.2,
    rescale=1/255,
    fill_mode='nearest',
    #shear_range=0.2,
    #brightness_range=[0.2, 2.0],
    )

In [0]:
from tensorflow.keras.models import Sequential,Model
from tensorflow.keras.layers import Activation, Dropout, Flatten, Dense, Conv2D, MaxPool2D, BatchNormalization,Input,MaxPooling2D,GlobalMaxPooling2D,concatenate
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.callbacks import ModelCheckpoint,ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
import tensorflow.keras.backend as K
import tensorflow as tf

#Train model

In [0]:
img_shape=300
batch_size=20

In [45]:
from sklearn.model_selection import KFold
import gc
from focal_loss import BinaryFocalLoss

from sklearn.metrics import classification_report,accuracy_score,f1_score,roc_auc_score
classification_reports=[]
kf = KFold(n_splits=5)
history=[]
accuracy=[]
f1_scores=[]
auc=[]
y_pred=[]
fold=0
for train_index, test_index in kf.split(train_df):
  train=train_df.iloc[train_index,:]
  val=train_df.iloc[test_index,:]
  print('-----------fold {}--------------'.format(fold))
  train_generator=data_gen.flow_from_dataframe(train,directory='images',
                                                      target_size=(img_shape,img_shape),
                                                      x_col="file",
                                                      y_col=['covid','noncovid'],
                                                      class_mode='raw',
                                                      shuffle=True,
                                                      batch_size=batch_size)

  val_generator=data_gen.flow_from_dataframe(val,directory='images',
                                                        target_size=(img_shape,img_shape),
                                                        x_col="file",
                                                        y_col=['covid','noncovid'],
                                                        class_mode='raw',
                                                        shuffle=False,
                                                        batch_size=batch_size)
  
  test_generator=data_gen.flow_from_dataframe(test_df,directory='images',
                                                      target_size=(img_shape,img_shape),
                                                      x_col="file",
                                                      y_col=['covid','noncovid'],
                                                      class_mode='raw',
                                                      shuffle=False,
                                                      batch_size=batch_size)
  #train_generator.next()[0].shape,train_generator.next()[1].shape
  import efficientnet.tfkeras as efn
  model =efn.EfficientNetB4(weights ='noisy-student', include_top=False, input_shape = (img_shape,img_shape,3))
    
  x = model.output
  x = GlobalAveragePooling2D()(x)
  #x = Dropout(0.3)(x)
  #x = Dense(128, activation="relu")(x)
  #x = Dropout(0.3)(x)
  #x = Dense(64, activation="relu")(x)
  predictions = Dense(2, activation="softmax")(x)
  model = Model(inputs=model.input, outputs=predictions)


  model.compile(optimizer=Adam(0.001), loss=BinaryFocalLoss(gamma=2), metrics=['accuracy'])

  results = model.fit(train_generator,epochs=25,
                              steps_per_epoch=train_generator.n/batch_size,
                              validation_data=val_generator,
                             validation_steps=val_generator.n/batch_size,
                              callbacks=[ReduceLROnPlateau(monitor='val_loss', factor=0.5,patience=3, min_lr=0.000001),
                                         #ModelCheckpoint('model_{}.hdf5'.format(fold), save_best_only=True, monitor='val_loss', mode='min')
                                         ])
  #model.load_weights('model_{}.hdf5'.format(fold))
  test_generator.reset()
  y_pred.append(model.predict(test_generator,steps=test_generator.n/batch_size,verbose=1))


  
  fold+=1
  del model
  tf.keras.backend.clear_session()
  gc.collect()

-----------fold 0--------------
Found 434 validated image filenames.
Found 109 validated image filenames.
Found 203 validated image filenames.
Downloading data from https://github.com/qubvel/efficientnet/releases/download/v0.0.1/efficientnet-b4_noisy-student_notop.h5
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
-----------fold 1--------------
Found 434 validated image filenames.
Found 109 validated image filenames.
Found 203 validated image filenames.
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25
-------

In [0]:
y_mean=np.mean(y_pred,axis=0)

In [47]:
y_mean=y_mean.round().astype(int)
y_true=test_df.iloc[:,2::]
print(classification_report(y_true, y_mean,target_names=['covid','noncovid']))#
accuracy.append(accuracy_score(y_true,y_mean))
#auc.append(roc_auc_score(y_true,y_pred))
f1=f1_score(y_true,y_mean,average='macro')
print('f1 score is ', f1)
f1_scores.append(f1)

              precision    recall  f1-score   support

       covid       0.93      0.69      0.80        98
    noncovid       0.77      0.95      0.85       105

   micro avg       0.83      0.83      0.83       203
   macro avg       0.85      0.82      0.82       203
weighted avg       0.85      0.83      0.82       203
 samples avg       0.83      0.83      0.83       203

f1 score is  0.8231927336070672
