In [1]:
import numpy as np
import pandas as pd
import cv2
import os
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Define constants
IMG_SIZE = 128
NUM_CLASSES = 5
BATCH_SIZE = 32
NUM_EPOCHS = 20

# Define class mapping
class_map = {
    'line': 0,
    'dot_line': 1,
    'hbar_categorical': 2,
    'vbar_categorical': 3,
    'pie': 4
}

# Load data
data_dir = '/content/drive/My Drive/Colab Notebooks/Machine Learning/Assignment 2/Problem 3/charts/'
labels = pd.read_csv(os.path.join(data_dir, 'train_val.csv'))
labels['type'] = labels['type'].map(class_map)
X = []
y = []
for i, row in labels.iterrows():
    img = cv2.imread(os.path.join(data_dir, 'train_val', str(row['image_index'] )+".png"))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
    X.append(img)
    y.append(row['type'])
X = np.array(X)
y = to_categorical(y, NUM_CLASSES)

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Define CNN architecture
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(NUM_CLASSES, activation='softmax'))

# Compile model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train model
model.fit(X_train, y_train, epochs=NUM_EPOCHS, batch_size=BATCH_SIZE, validation_data=(X_val, y_val))


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f6f4c26fd30>

In [3]:
# Evaluate model on test data
test_labels = pd.read_csv(os.path.join(data_dir, 'test.csv'))
test_labels['type'] = test_labels['type'].map(class_map)
X_test = []
y_test = []
for i, row in test_labels.iterrows():
    img = cv2.imread(os.path.join(data_dir, 'test', str(row['image_index'] )+".png"))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
    X_test.append(img)
    y_test.append(row['type'])
X_test = np.array(X_test)
y_test = to_categorical(y_test, NUM_CLASSES)
loss, acc = model.evaluate(X_test, y_test)
print(f'Test loss: {loss:.4f}')
print(f'Test accuracy: {acc:.4f}')

Test loss: 0.6459
Test accuracy: 0.9130
