In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

###### Uncomment the code below if required #########
#import os
#for dirname, _, filenames in os.walk('/kaggle/input'):
 #   for filename in filenames:
  #      print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Environment Set up

In [None]:
# usual imports #
import os
import numpy as np
import pandas as pd

# visualization imports #
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.image import imread
%matplotlib inline

# consistent plots #
from pylab import rcParams
rcParams['figure.figsize']= 12,5
rcParams['xtick.labelsize']= 12
rcParams['ytick.labelsize']= 12
rcParams['axes.labelsize']= 12

# ignore unwanted warnings #
import warnings
warnings.filterwarnings(action='ignore',message='^internal gelsd')

In [None]:
# designate directory to save the images #
ROOT_DIR = '/kaggle/input/day-3-kaggle-competition'


In [None]:
DATA_PATH = os.path.join(ROOT_DIR , 'data_comp/data_comp')
TRAIN_PATH = os.path.join(DATA_PATH,'train')
TEST_PATH = os.path.join(DATA_PATH + '/' + 'test')

In [None]:
# check the files or directories in the training path #
os.listdir(TRAIN_PATH)

## Data Exploration 
Select any random directory and view one of the images from the training folder

In [None]:
rand = np.random.randint(len(os.listdir(TRAIN_PATH)))
furniture_title = os.listdir(TRAIN_PATH)[rand]

In [None]:
furniture_path = os.path.join(TRAIN_PATH,furniture_title)
furniture_images  = os.listdir(furniture_path)

In [None]:
n_rows = 2
n_cols = 4

for row in range(n_rows):
    for col in range(n_cols):
        index = n_cols * row + col
        plt.subplot(n_rows,n_cols,index+1)
        sample_image_path = os.path.join(furniture_path + '/',furniture_images[index])
        furniture = imread(sample_image_path)
        plt.imshow(furniture,cmap='binary',interpolation='nearest')
        plt.axis('off')
        plt.title(furniture_title,fontsize=10)  

Clearly 
- These are colored 3D images of the furnitures
- The same furniture can be presented in any orientation and can have varying size
- The brightness varies etc

Hene the model should be good enough to be able to identify the furniture type 
when it is shown in different angles. 

***Check the number of images in the training folder***

In [None]:
num_images = 0
for folder in os.listdir(TRAIN_PATH):
    num_images = num_images + len(os.listdir(os.path.join(TRAIN_PATH + '/' + folder)))    
print ('Total number of images in the train dir = {}'.format(num_images))

***Check the dimension of these images***
Calculate the mean dimension to set as the input_shape


In [None]:
# check the dimension of each training image and calculate the mean shape #
dim1 = []
dim2 = []

for folder in os.listdir(TRAIN_PATH):
    for image_filename in os.listdir(TRAIN_PATH + '/' + folder):
        img = imread(os.path.join(TRAIN_PATH,folder,image_filename))
        #print(os.path.join(TRAIN_PATH,folder,image_filename))
        d1,d2 = img.shape[0],img.shape[1]
        dim1.append(d1)
        dim2.append(d2)
print (np.mean(dim1),np.mean(dim2))


In [None]:
IMAGE_SHAPE = (int(np.mean(dim1)),int(np.mean(dim2)),3)


In [None]:
# image shape with the color channel to be later fed into the model #
IMAGE_SHAPE

## Prepare the data for Deep Learning Model 

More than 6000 images is too much data to read all at once in memory. The better strategy would be to use some built in functions in Keras to automatically process the data, generate a flow of batches from a directory, and also manipulate the images.

### Image Manipulation

Its usually a good idea to manipulate the images with rotation, resizing, and scaling so the model becomes more robust to different images that our data set doesn't have. We can use the **ImageDataGenerator** to do this automatically for us. 

In [None]:
# import the image data generator 
from tensorflow.keras.preprocessing.image import ImageDataGenerator

***Use the ImageDataGenerator to generate images by using the following parameters***
- rotate_range --> rotate images by 20%
- width_shift_range --> shift the width by 10%
- height_shift_range --> shift the height by 10%
- rescale --> rescale the image to be between 0 and 1 
- shear_range --> cut off by a certain percentage 10%
- zoom_range --> zoom the image by 10%
- horizontal_flip --> flip the image horizontally
- fill_mode --> fill the empty pixel based on the values of the nearest pixel in original image
- vertical_flip --> flip the image vertically (upside down)
- validation_split --> use 30% of the data in training folder for validation

In [None]:
# generate images using the data generator --> check help(ImageDataGenerator) #
image_gen = ImageDataGenerator(rotation_range=90,
                               width_shift_range=0.10, 
                               height_shift_range=0.10,
                               rescale=1./255,
                               shear_range=0.1,
                               zoom_range=0.1,
                               horizontal_flip=True,
                               fill_mode='nearest',
                               vertical_flip=False,
                               validation_split=0.3)  

In [None]:
# visualize one of the original image of a furniture #
furniture_orig = imread(sample_image_path)
plt.imshow(furniture_orig)
plt.axis('off')
plt.title('Original Image');

In [None]:
# visualize one the randomly generated image by the image generator of the same fruit #
plt.imshow(image_gen.random_transform(furniture_orig))
plt.axis('off')
plt.title('Image Generated using Data Generator');

### Generating many manipulated images from directory

In [None]:
image_gen.flow_from_directory(TRAIN_PATH)

## Model Creation

***Use combination of Covolutional, Pooling Layer and finally Dense Layer***

In [None]:
# import the libraries #
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D,Dense,Flatten,MaxPooling2D,Dropout
from tensorflow import keras

In [None]:
# clear the session #
keras.backend.clear_session()
np.random.seed(42)

In [None]:
# create a sequential model #
model = Sequential()

# convolutional and max pool layer #
model.add(Conv2D(filters=32,kernel_size=(3,3),padding='same',strides=(1,1),
                activation='relu',input_shape=IMAGE_SHAPE))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(filters=64,kernel_size=(3,3),padding='same',strides=(1,1),
                activation='relu',input_shape=IMAGE_SHAPE))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(filters=128,kernel_size=(3,3),padding='same',strides=(1,1),
                activation='relu',input_shape=IMAGE_SHAPE))
model.add(MaxPooling2D(pool_size=(2,2)))

# flatten the layer before feeding into the dense layer #
model.add(Flatten())

# dense layer together with dropout to prevent overfitting #
model.add(Dense(units=128,activation='relu',kernel_initializer='he_normal'))
model.add(Dense(units=64,activation='relu',kernel_initializer='he_normal'))
model.add(Dropout(0.5))
model.add(Dense(units=32,activation='relu',kernel_initializer='he_normal'))
model.add(Dropout(0.5))

# there are 5 classes, hence 5 neurons in the final layer #
model.add(Dense(units=5,activation='softmax'))

# compile the model #
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

In [None]:
# check the model summary # 
model.summary()

In [None]:
#model.layers

## Early Stopping

In [None]:
# import early stopping and model checkpoint #
from tensorflow.keras.callbacks import ModelCheckpoint,EarlyStopping

In [None]:
early_stop = EarlyStopping(monitor='val_loss',patience=5,restore_best_weights=True)

## Data for training the Model

In [None]:
BATCH_SIZE = 16
IMAGE_SHAPE[:2]

***Train Image***

In [None]:
train_image_gen = image_gen.flow_from_directory(TRAIN_PATH,target_size=IMAGE_SHAPE[:2],
                                               color_mode='rgb',batch_size=BATCH_SIZE,
                                               class_mode='categorical',seed=1,subset='training')

***Validation Image***

In [None]:
validation_image_gen = image_gen.flow_from_directory(TRAIN_PATH,target_size=IMAGE_SHAPE[:2],
                                               color_mode='rgb',batch_size=BATCH_SIZE,
                                               class_mode='categorical', shuffle=False,subset='validation',
                                               seed=1)

In [None]:
# check the class indices #
train_image_gen.class_indices

## Train the Model

In [None]:
# fit the model and train with early stop enabled #
epoch = 30
history=model.fit(train_image_gen,
                  validation_data = validation_image_gen,
                  epochs = epoch,callbacks=[early_stop])

In [None]:
# create dataframe of the loss and accuracy of the train and validation data #
df_loss = pd.DataFrame(model.history.history)
df_loss.head()

In [None]:
df_loss[['loss','accuracy','val_loss','val_accuracy']].plot()
plt.xlabel('epochs')
plt.ylabel('loss')

In [None]:
model.evaluate(validation_image_gen)

In [None]:
test_image_gen = ImageDataGenerator(rescale=1./255)   


In [None]:
os.makedirs('/kaggle/test',exist_ok=True)

In [None]:
from distutils.dir_util import copy_tree

In [None]:
TEST_PATH

### Arrange the test folder in teh required dir needed to use the image generator test/test/files

In [None]:
src = TEST_PATH
dest = '/kaggle/test/test'

In [None]:
copy_tree(src,dest)
#test_path = '/kaggle/test'

In [None]:
test_path = '/kaggle/test'

In [None]:
test_generator = test_image_gen.flow_from_directory(directory=test_path,
                                                 target_size=IMAGE_SHAPE[:2],
                                                 color_mode='rgb',
                                                 batch_size=BATCH_SIZE,
                                                 class_mode=None,
                                                  shuffle=False)

In [None]:
pred = model.predict(test_generator,steps=len(test_generator),verbose=1)

In [None]:
predicted_class_indices=np.argmax(pred,axis=1)

In [None]:
labels = (train_image_gen.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]

In [None]:
# get filenames 
filenames=test_generator.filenames
results=pd.DataFrame({"Filename":filenames,
                      "Predictions":predictions})

In [None]:
results

In [None]:
results.head()

In [None]:
def file_name(st):
    x = st.split('/')
    y = x[1].split('.')
    return y[0]

In [None]:
results['image'] = results['Filename'].apply(file_name)

In [None]:
results.head()

In [None]:
results.drop('Filename',axis=1,inplace=True)

In [None]:
results.head(5)

In [None]:
def pred(st):
    if st=='chair':
        return 1
    elif st == 'swivelchair':
        return 3
    elif st == 'bed':
        return 0
    elif st == 'table':
        return 4
    else:
        return 2
        
    
    

In [None]:
results['target'] = results['Predictions'].apply(pred)

In [None]:
results.head(100)

In [None]:
results.drop('Predictions',axis=1,inplace=True)

In [None]:
results.head(10)

In [None]:
results.to_csv('furn30_submission_2.csv',index=False)

In [None]:
os.listdir('/kaggle/working')

In [None]:
results.head()

In [None]:
type(results['image'][0])