In [None]:
# ! conda install -y gdown

# import gdown
# !gdown --id <file_ID>

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Image and Deep Learning Libraries
import os
import cv2
import random
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D

# Plotting libraries
pd.plotting.register_matplotlib_converters()
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [None]:
training_metadata = pd.read_csv('/kaggle/input/lungdata/assignment5_training_data_metadata.csv')
testing_metadata = pd.read_csv('/kaggle/input/lungdata/assignment5_test_data_metadata.csv')

In [None]:
training_metadata['type'] = training_metadata['type'].fillna('Normal')  
training_metadata['cause'] = training_metadata['cause'].fillna('Unknown')

In [None]:
training_metadata['type'] = training_metadata['type'].astype('category')
training_metadata['type'] = training_metadata['type'].cat.reorder_categories(['Normal', 'Virus', 'bacteria', 'Stress-Smoking'], ordered=True)
training_metadata['type'] = training_metadata['type'].cat.codes
print(training_metadata['type'])

In [None]:
row_index = 0

training_metadata['Normal'] = np.zeros(5286)
training_metadata['Virus'] = np.zeros(5286)
training_metadata['Bacteria'] = np.zeros(5286)
training_metadata['Stress-Smoking'] = np.zeros(5286)

for value in training_metadata["type"]:
    if value == 0:
        training_metadata.iloc[row_index, 5] = 1
    if value == 1:
        training_metadata.iloc[row_index, 6] = 1
    if value == 2:
        training_metadata.iloc[row_index, 7] = 1
    if value == 3:
        training_metadata.iloc[row_index, 8] = 1
    row_index += 1

In [None]:
training_metadata.head(7)

In [None]:
training_metadata.tail(100)

In [None]:
train_generator = ImageDataGenerator(rescale = 1./255, validation_split = 0.3, zoom_range = 0.2, horizontal_flip = True)
validation_generator = ImageDataGenerator(rescale = 1./255)
test_generator = ImageDataGenerator(rescale = 1./255)

TARGET_SIZE = (200, 200)
BATCH_SIZE = 45
CLASS_MODE = "raw" 
columns = ["Normal", "Virus", "Bacteria", "Stress-Smoking"]

generated_train = train_generator.flow_from_dataframe(dataframe=training_metadata,
                                                 directory='/kaggle/input/lung-data/train/train',
                                                 x_col='image_name',
                                                 y_col=columns,
                                                 target_size=TARGET_SIZE,
                                                 subset='training',
                                                 batch_size=BATCH_SIZE, 
                                                 class_mode = CLASS_MODE)

generated_validation = train_generator.flow_from_dataframe(dataframe=training_metadata,
                                                 directory='/kaggle/input/lung-data/train/train',
                                                 x_col='image_name',
                                                 y_col=columns,
                                                 target_size=TARGET_SIZE,
                                                 subset='validation',
                                                 batch_size=BATCH_SIZE, 
                                                 class_mode = CLASS_MODE)

generator_test = test_generator.flow_from_dataframe(dataframe=testing_metadata,
                                              directory='/kaggle/input/lungdata/images/images/test',
                                              x_col='image_name',
                                              target_size=TARGET_SIZE,
                                              batch_size=BATCH_SIZE,
                                              shuffle = False,
                                              class_mode = None)

In [None]:
from tensorflow.keras import regularizers

model = Sequential()
model.add(Conv2D(64, (3, 3), kernel_regularizer = regularizers.l2(0.05), input_shape = generated_train.image_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(128, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(128, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(128, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(4))
model.add(Activation('softmax'))

model.summary()

In [None]:
from keras import Input
from keras.models import Model
from keras.layers import Concatenate

size = 248
convs = []
parallel_kernels = [3, 5, 7]
inp = Input(shape = generated_train.image_shape)

for i in range(len(parallel_kernels)):
    convolution = Conv2D(size, parallel_kernels[i], activation = 'relu', padding = 'same', input_shape = generated_train.image_shape, strides = 1)(inp)
    convs.append(convolution)
    size -= 2

out = Concatenate()(convs)
conv_model = Model(inputs = inp, outputs = out)
    
model = Sequential()
model.add(Conv2D(64, (3, 3), input_shape = generated_train.image_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.50))

model.add(Dense(4))
model.add(Activation('softmax'))

model.summary()

In [None]:
from tensorflow.keras import optimizers

opt = optimizers.SGD(lr = 0.01, decay = 1e-6, momentum = 0.9, nesterov = True)

model.compile(loss = 'categorical_crossentropy', 
              optimizer = opt, 
              metrics = ['accuracy'])

performance = model.fit(generated_train, epochs = 5, validation_data = generated_validation)

In [None]:
y_pred_proba = model.predict_classes(generator_test)

for x in range(624):
    y_pred_proba[x] += 1

In [None]:
for i in range(len(y_pred_proba)):
    print(y_pred_proba[i])

In [None]:
output = pd.DataFrame(y_pred_proba)
output.index += 5309
print(output)

# output.to_csv("output.csv", header = ["type"], index_label = "id")