# Create vectors as the input of Task 2
### Task 2:<br>
**input:** a vector of numbers 0/1/2<br>
**output:** 6-category severity<br>

In this notebook the input of Task2 is generated by using the trained `newmodel3.h5` to predict on the patients.

In [None]:
import keras
from keras import backend as K
from keras.models import Sequential,Model
from keras.layers import Dense, Dropout,Input
from keras.layers import Conv2D, MaxPooling2D, Flatten,GlobalAveragePooling2D, BatchNormalization
from keras_preprocessing.image import ImageDataGenerator
from keras.callbacks import EarlyStopping
from keras.applications.vgg16 import VGG16

import os
import cv2
import time
import json
import random
from PIL import Image
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
%matplotlib inline

### Load newmodel3 weight

In [None]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=(256,256,3)))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(256, (3, 3), activation='relu'))
model.add(Conv2D(256, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(512, (3, 3), activation='relu'))
model.add(Conv2D(512, (3, 3), activation='relu'))
model.add(Conv2D(512, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.3))
model.add(Dense(128, activation='relu'))
model.add(Dense(3, activation='softmax'))


model.load_weights('../input/models/newmodel3.h5')

In [None]:
csv_file = pd.read_csv('../input/task-2/morbidity.csv')
patient = np.array(csv_file['Patient'])
morbidity = np.array(csv_file['Morbidity'])

Since we cannot run all the patients one time, the whole training set created by running this notebook for 4 times.<br>
999 training samples, 342 test samples

In [None]:
print(patient[590])

In [None]:
print(patient[890])

In [None]:
train_x = patient[:999]
test_x = patient[999:]
train_y = morbidity[:999]
test_y = morbidity[999:]
print(len(train_x))
print(len(train_y))
print(len(test_x))
print(len(test_y))

## Generate training data
Running this cell takes around half an hour on kaggle. 

In [None]:
train_data = []
start_time = time.time()
    
for Id in train_x[9:10]:
    folder_path = '../input/task-2/Covid-19 CT/Covid-19 CT/train/Patient ' + str(Id) + '/CT'
    if Id==991:
        # a folder has special naming
        folder_path = '../input/task-2/Covid-19 CT/Covid-19 CT/train/Patient ' + str(Id) + '/2020_1_22'
    all_imgs = list(sorted(os.listdir(folder_path)))
    processed = np.stack([np.array(Image.open(folder_path + '/' + file).resize((256,256)))/255 for file in all_imgs])
    Y_pred = model.predict(processed,batch_size=64)
    Volume_rep = np.argmax(Y_pred,axis=1)
    # Because model 1 has {'nCi':0, 'niCT':1} while we want the opposite for task 2, 
    # we modify the prediction by adding the following step.
    Volume_rep = abs(Volume_rep - (Volume_rep != 2))
    print('{:0}: ({:0},)'.format(Id,Volume_rep.shape[0]))
    train_data.append(Volume_rep)
    
length = time.time() - start_time
print('{:.0f}m {:.0f}s'.format(length // 60, length % 60))

In [None]:
print(len(train_data))
np.save('train2.npy',np.array(train_data))

## Generate test data
Running this cell takes around half an hour on kaggle. 

In [None]:
test_data = []
start_time = time.time()
    
for Id in test_x:
    folder_path = '../input/task-2/Covid-19 CT/Covid-19 CT/test/Patient ' + str(Id) + '/CT'
    all_imgs = list(sorted(os.listdir(folder_path)))
    processed = np.stack([np.array(Image.open(folder_path + '/' + file).resize((256,256)))/255 for file in all_imgs])
    Y_pred = model.predict(processed,batch_size=64)
    Volume_rep = np.argmax(Y_pred,axis=1)
    # Because model 1 has {'nCi':0, 'niCT':1} while we want the opposite for task 2, 
    # we modify the prediction by adding the following step.
    Volume_rep = abs(Volume_rep - (Volume_rep != 2))
    print('{:0}: ({:0},)'.format(Id,Volume_rep.shape[0]))
    test_data.append(Volume_rep)
    
length = time.time() - start_time
print('{:.0f}m {:.0f}s'.format(length // 60, length % 60))

In [None]:
print(len(test_data))
np.save('test.npy',np.array(test_data))