In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
%matplotlib inline

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import matplotlib.pyplot as plt
import cv2
from PIL import Image
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten

In [3]:
DATA_INPUT_PATH = '/content/gdrive/MyDrive/Question Generation Paper Implementation/QG_Classifier/Dataset/'

In [4]:
# creating image list and corresponding labels for input
def create_image_and_labels():
    images = []
    images_shape = []
    labels = []
    df = pd.read_csv('/content/gdrive/MyDrive/Question Generation Paper Implementation/QG_Classifier/input.csv')
    for index, item in df.iterrows():
        filename = f"{item['id']}.jpg"
        image = Image.open(os.path.join(DATA_INPUT_PATH, filename)).convert("L")
        images.append(np.asarray(image))
        labels.append(item['class'])
        images_shape.append(np.array(np.asarray(image).shape))
    images_shape = np.array(images_shape)


    images = [cv2.resize(img,(64,64)) for img in images]
    return images, labels

images, labels = create_image_and_labels()

In [12]:
for i in range(0, 46):
  if labels[i]==2:
    labels[i] = 0

In [15]:
# shuffling input data
def shuffle_data(images, labels):
    import sklearn
    array1_shuffled, array2_shuffled = sklearn.utils.shuffle(images, labels)
    return array1_shuffled, array2_shuffled

images_shuffled, labels_shuffled = shuffle_data(images, labels)

In [16]:
def create_train_test_data(data, labels, test_data=0.2):
    idx = int((1-test_data) * len(data))
    X_train, y_train = np.array(data[:idx]), np.array(labels[:idx])
    X_test, y_test = np.array(data[idx:]), np.array(labels[idx:])

    # reshaping images to 64*64*1 for model input
    X_train = X_train.reshape(len(X_train),64,64,1)
    X_test = X_test.reshape(len(X_test),64,64,1)

    print(f'Shape of input image: {X_train[0].shape}')
    return X_train, y_train, X_test, y_test

X_train, y_train, X_test, y_test = create_train_test_data(images_shuffled, labels_shuffled)

Shape of input image: (64, 64, 1)


In [None]:
def create_model():
    #create model
    model = Sequential()
    
    #add model layers
    model.add(Conv2D(64, kernel_size=3, activation='relu', input_shape=(64,64,1)))
    model.add(Conv2D(32, kernel_size=3, activation='relu'))
    model.add(Flatten())
    model.add(Dense(1, activation='sigmoid'))
    print('-------------MODEL INFORMATION-----------------')
    model.summary()
    return model

model = create_model()

In [None]:
def compile_model(model):
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

compile_model(model)

In [None]:
def train_model(model, X_train, y_train, X_test, y_test, epochs=10):
    history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=epochs)
    print(f"Trained for {epochs} epochs!")
    return history

history = train_model(model, X_train, y_train, X_test, y_test, epochs=20)

In [None]:
print(history)

In [22]:
def predict(model, list_of_images):
    predictions = model.predict(list_of_images)
    print("Predicted!")
    return predictions

# pred = predict(model, X_test[:4])
# print(pred)

In [None]:
print(y_test[:4])

[0 1 1 1]


In [None]:
def prediction_scaling(predictions):
  for i, predicted in enumerate(predictions):
    if predicted[0] >= 0.5:
      predictions[i] = 1
      #assign i to class 1
    else:
      predictions[i] = 0
      #assign i to class 0
  predictions = np.asarray(predictions).astype('int32')
  return predictions

preds = prediction_scaling(pred)
print(preds)

In [None]:
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
ip = '/content/gdrive/MyDrive/Question Generation Paper Implementation/test_img.jpg'
img_test = Image.open(ip).convert("L")
print(img_test)
test = []
test.append(np.asarray(img_test))
test = [cv2.resize(img,(64,64)) for img in test] #trying out with 64x64
test = np.array(test)
print(test.shape)
test = test.reshape(len(test),64,64,1)
pred_test = predict(model, test)
print(pred_test)

In [None]:
print(X_train[:2].shape)

In [None]:
def predict_output(images_dir_path):
  """returns list of preprocessed images ready to be fed to the model"""
  test = []
  for image_path in os.listdir(images_dir_path):
    img = Image.open(image_path).convert("L")
    test.append(np.asarray(img))
  test = [cv2.resize(img, (64,64)) for img in test]
  test = np.array(test)
  test = test.reshape(len(test),64,64,1)
  return test

In [17]:
def create_lenet_model():
  import keras
  from keras import layers
  model = keras.Sequential()

  model.add(layers.Conv2D(filters=6, kernel_size=(3, 3), activation='relu', input_shape=(64, 64, 1)))
  model.add(layers.AveragePooling2D())

  model.add(layers.Conv2D(filters=16, kernel_size=(3, 3), activation='relu'))
  model.add(layers.AveragePooling2D())

  model.add(layers.Flatten())
  
  model.add(layers.Dense(units=120, activation='relu'))
  model.add(layers.Dense(units=84, activation='relu'))
  model.add(layers.Dense(units=1, activation = 'sigmoid'))

  return model

lenet = create_lenet_model()

In [18]:
lenet.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_2 (Conv2D)            (None, 62, 62, 6)         60        
_________________________________________________________________
average_pooling2d_2 (Average (None, 31, 31, 6)         0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 29, 29, 16)        880       
_________________________________________________________________
average_pooling2d_3 (Average (None, 14, 14, 16)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 3136)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 120)               376440    
_________________________________________________________________
dense_4 (Dense)              (None, 84)               

In [19]:
def compile_model(model):
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    
compile_model(lenet)

In [20]:
def train_model(model, X_train, y_train, X_test, y_test, epochs=3):
    history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=epochs)
    print(f"Trained for {epochs} epochs!")
    return history

history = train_model(lenet, X_train, y_train, X_test, y_test, epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Trained for 20 epochs!


In [None]:
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
plt.plot(history.history['val_loss'])
plt.ylabel('validation loss')
plt.xlabel('epochs')
plt.show()

In [None]:
test = predict_output('/content/gdrive/MyDrive/Question Generation Paper Implementation/test')
preds = predict(lenet, test)

print(preds)

In [35]:
ip = '/content/gdrive/MyDrive/Question Generation Paper Implementation/test/test10.jpg'
# ip = DATA_INPUT_PATH + '44.jpg'
img_test = Image.open(ip).convert("L")
print(img_test)
test = []
test.append(np.asarray(img_test))
test = [cv2.resize(img,(64,64)) for img in test]
test = np.array(test)
print(test.shape)
test = test.reshape(len(test),64,64,1)
pred_test = predict(lenet, test)
print(pred_test)

<PIL.Image.Image image mode=L size=714x364 at 0x7F5D04469CD0>
(1, 64, 64)
Predicted!
[[1.]]


In [34]:
1 - 0.00134894

0.99865106

In [None]:
plt.figure()
img = cv2.imread('/content/gdrive/MyDrive/Question Generation Paper Implementation/test/test3.jpg')
plt.imshow(img)

In [None]:
pred_test = predict(lenet, X_test)

Predicted!


In [None]:
count = 0
for i in range(len(pred_test)):
  if pred_test[i] <=0.5:
    pred_test[i] = 0
  else: 
    pred_test[i] = 1

for i in range(len(pred_test)):
  if pred_test[i] == y_test[i]:
    count +=1

print("Accuracy: ", count/ len(pred_test))

In [None]:
print(len(pred_test))

In [None]:
def train_model(model, X_train, y_train, X_test, y_test, epochs=3):
    history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=epochs)
    print(f"Trained for {epochs} epochs!")
    return history

history = train_model(lenet, X_train, y_train, X_test, y_test, epochs=100)