In [None]:
!pip install scikit-image

In [None]:


import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from skimage import filters
from skimage.filters import unsharp_mask, sobel_h, sobel_v, prewitt_h, prewitt_v
import numpy as np
import time
import pandas as pd
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from tensorflow.keras.preprocessing.image import img_to_array

data_dir = '../PokemonData'

def blur_image(image):
    return tf.numpy_function(lambda x: filters.gaussian(x, sigma=2, channel_axis=-1), [image], tf.float32)

def sharpen_image(image):
    return tf.numpy_function(lambda x: unsharp_mask(x, radius=1, amount=1), [image], tf.float32)

def high_pass_filter(image):
    original = image
    blurred = filters.gaussian(image, sigma=2, channel_axis=-1)
    high_pass = original - blurred
    return high_pass

def sobel_horizontal(image):
    sobel_output = tf.numpy_function(lambda x: sobel_h(x[:,:,0]), [image], tf.float32)
    sobel_output = tf.stack([sobel_output, sobel_output, sobel_output], axis=-1)
    return sobel_output

def sobel_vertical(image):
    sobel_output = tf.numpy_function(lambda x: sobel_v(x[:,:,0]), [image], tf.float32)
    sobel_output = tf.stack([sobel_output, sobel_output, sobel_output], axis=-1)
    return sobel_output

def prewitt_horizontal(image):
    prewitt_output = tf.numpy_function(lambda x: prewitt_h(x[:,:,0]), [image], tf.float32)
    prewitt_output = tf.stack([prewitt_output, prewitt_output, prewitt_output], axis=-1)
    return prewitt_output

def prewitt_vertical(image):
    prewitt_output = tf.numpy_function(lambda x: prewitt_v(x[:,:,0]), [image], tf.float32)
    prewitt_output = tf.stack([prewitt_output, prewitt_output, prewitt_output], axis=-1)
    return prewitt_output

def create_datagen(preprocessing_function=None):
    return ImageDataGenerator(
        rescale=1./255,
        validation_split=0.2,
        preprocessing_function=preprocessing_function
    )

def create_model(input_shape=(128, 128, 3)):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        MaxPooling2D(2, 2),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D(2, 2),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(150, activation='softmax')
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

def create_dnn_model(input_shape=(128, 128, 3)):
    model = Sequential([
        Flatten(input_shape=input_shape),
        Dense(512, activation='relu'),
        Dropout(0.2),
        Dense(512, activation='relu'),
        Dropout(0.2),
        Dense(150, activation='softmax')
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])
    return model

def train_and_evaluate(preprocessing_function=None, preprocessing_name="", model_type="CNN"):
    start_time = time.time()
    print(f"Training with {preprocessing_name} preprocessing and {model_type} model...")
    datagen = create_datagen(preprocessing_function)
    train_generator = datagen.flow_from_directory(
        data_dir,
        target_size=(128, 128),
        batch_size=32,
        class_mode='categorical',
        subset='training'
    )

    validation_generator = datagen.flow_from_directory(
        data_dir,
        target_size=(128, 128),
        batch_size=32,
        class_mode='categorical',
        subset='validation'
    )

    if model_type == "DNN":
        model = create_dnn_model()
    elif model_type == "Random Forest":
        model = RandomForestClassifier(n_estimators=100, random_state=42)
    elif model_type == "Gradient Boosting":
        model = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=42)
    elif model_type == "SVM":
        model = SVC(kernel='linear', random_state=42)
    elif model_type == "Logistic Regression":
        model = LogisticRegression(max_iter=1000, random_state=42)
    else:
        model = create_model()

    if model_type in ["DNN", "CNN"]:
        model.fit(train_generator, epochs=10, validation_data=validation_generator, verbose=1)
        _, accuracy = model.evaluate(validation_generator, verbose=0)
    else:
        train_features, train_labels = next(train_generator)
        validation_features, validation_labels = next(validation_generator)
        model.fit(train_features.reshape(len(train_features), -1), np.argmax(train_labels, axis=1))
        accuracy = accuracy_score(np.argmax(validation_labels, axis=1), model.predict(validation_features.reshape(len(validation_features), -1)))

    training_time = time.time() - start_time
    print(f"Accuracy with {preprocessing_name} and {model_type}: {accuracy:.2f}")
    return accuracy, training_time, model_type + " + " + preprocessing_name

preprocessing_techniques = [
    (None, "No Preprocessing"),
    (blur_image, "Blur"),
    (sharpen_image, "Sharpen"),
    (high_pass_filter, "High-Pass Filter"),
    (sobel_horizontal, "Sobel Horizontal"),
    (sobel_vertical, "Sobel Vertical"),
    (prewitt_horizontal, "Prewitt Horizontal"),
    (prewitt_vertical, "Prewitt Vertical")
]

model_types = ["CNN", "DNN", "Random Forest", "Gradient Boosting", "SVM", "Logistic Regression"]

metrics = []

for model_type in model_types:
    for preprocess_func, preprocess_name in preprocessing_techniques:
        accuracy, training_time, description = train_and_evaluate(preprocess_func, preprocess_name, model_type)
        metrics.append({
            "Model + Preprocessing": description,
            "Accuracy": accuracy,
            "Training Time (s)": training_time
        })

df_metrics = pd.DataFrame(metrics)
print(df_metrics)

In [None]:
import pandas as pd
from tabulate import tabulate
import matplotlib.pyplot as plt

data = {
    "Model": [
        "CNN", "CNN", "CNN", "CNN", "CNN", "CNN", "CNN", "CNN",
        "DNN", "DNN", "DNN", "DNN", "DNN", "DNN", "DNN", "DNN",
        "Random Forest", "Random Forest", "Random Forest", "Random Forest", "Random Forest", "Random Forest", "Random Forest", "Random Forest",
        "Gradient Boosting", "Gradient Boosting", "Gradient Boosting", "Gradient Boosting", "Gradient Boosting", "Gradient Boosting", "Gradient Boosting", "Gradient Boosting",
        "SVM", "SVM", "SVM", "SVM", "SVM", "SVM", "SVM", "SVM",
        "Logistic Regression", "Logistic Regression", "Logistic Regression", "Logistic Regression", "Logistic Regression", "Logistic Regression", "Logistic Regression", "Logistic Regression"
    ],
    "Preprocessing": [
        "No Preprocessing", "Blur", "Sharpen", "High-Pass Filter", "Sobel Horizontal", "Sobel Vertical", "Prewitt Horizontal", "Prewitt Vertical",
        "No Preprocessing", "Blur", "Sharpen", "High-Pass Filter", "Sobel Horizontal", "Sobel Vertical", "Prewitt Horizontal", "Prewitt Vertical",
        "No Preprocessing", "Blur", "Sharpen", "High-Pass Filter", "Sobel Horizontal", "Sobel Vertical", "Prewitt Horizontal", "Prewitt Vertical",
        "No Preprocessing", "Blur", "Sharpen", "High-Pass Filter", "Sobel Horizontal", "Sobel Vertical", "Prewitt Horizontal", "Prewitt Vertical",
        "No Preprocessing", "Blur", "Sharpen", "High-Pass Filter", "Sobel Horizontal", "Sobel Vertical", "Prewitt Horizontal", "Prewitt Vertical",
        "No Preprocessing", "Blur", "Sharpen", "High-Pass Filter", "Sobel Horizontal", "Sobel Vertical", "Prewitt Horizontal", "Prewitt Vertical"
    ],
    "Accuracy": [
        0.923077, 0.615385, 0.769231, 0.769231, 0.769231, 0.769231, 0.846154, 0.923077,
        0.692308, 0.461538, 0.769231, 0.615385, 0.692308, 0.384615, 0.615385, 0.538462,
        0.846154, 0.692308, 0.538462, 0.769231, 0.769231, 0.769231, 0.769231, 0.769231,
        0.538462, 0.384615, 0.461538, 0.615385, 0.461538, 0.615385, 0.692308, 0.384615,
        0.846154, 0.769231, 0.769231, 0.769231, 0.769231, 0.769231, 0.769231, 0.769231,
        0.769231, 0.538462, 0.769231, 0.769231, 0.769231, 0.769231, 0.769231, 0.769231
    ],
    "Training Time (s)": [
        48.058440, 56.633320, 48.027893, 48.898570, 63.238348, 51.686615, 56.045794, 48.915993,
        45.766466, 44.351937, 51.481051, 54.415695, 52.088268, 48.996642, 48.366774, 47.622144,
        1.163560, 2.405584, 3.167753, 1.905416, 1.142788, 1.001018, 1.211761, 1.163381,
        16.167434, 12.488234, 6.066616, 23.346955, 13.848549, 12.097733, 16.955513, 17.397891,
        1.285106, 1.356832, 1.021704, 1.614405, 1.079370, 1.373316, 1.764342, 1.508606,
        110.745154, 126.193660, 4.222481, 10.666535, 8.093194, 11.186874, 17.907882, 8.518475
    ]
}

df = pd.DataFrame(data)
df['Accuracy per Second'] = df['Accuracy'] / df['Training Time (s)']
avg_accuracy_per_model = df.groupby('Model')['Accuracy'].mean().reset_index()
avg_accuracy_per_filter = df.groupby('Preprocessing')['Accuracy'].mean().reset_index()

plt.figure(figsize=(10, 6))
plt.bar(avg_accuracy_per_model['Model'], avg_accuracy_per_model['Accuracy'], color='skyblue')
plt.title('Average Accuracy per Model')
plt.xlabel('Model')
plt.ylabel('Average Accuracy')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

plt.figure(figsize=(10, 6))
plt.bar(avg_accuracy_per_filter['Preprocessing'], avg_accuracy_per_filter['Accuracy'], color='lightgreen')
plt.title('Average Accuracy per Filter')
plt.xlabel('Preprocessing Filter')
plt.ylabel('Average Accuracy')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Data
data = {
    "Model": [
        "CNN", "CNN", "CNN", "CNN", "CNN", "CNN", "CNN", "CNN",
        "DNN", "DNN", "DNN", "DNN", "DNN", "DNN", "DNN", "DNN",
        "Random Forest", "Random Forest", "Random Forest", "Random Forest", "Random Forest", "Random Forest", "Random Forest", "Random Forest",
        "Gradient Boosting", "Gradient Boosting", "Gradient Boosting", "Gradient Boosting", "Gradient Boosting", "Gradient Boosting", "Gradient Boosting", "Gradient Boosting",
        "SVM", "SVM", "SVM", "SVM", "SVM", "SVM", "SVM", "SVM",
        "Logistic Regression", "Logistic Regression", "Logistic Regression", "Logistic Regression", "Logistic Regression", "Logistic Regression", "Logistic Regression", "Logistic Regression"
    ],
    "Preprocessing Technique": [
        "No Preprocessing", "Blur", "Sharpen", "High-Pass Filter", "Sobel Horizontal", "Sobel Vertical", "Prewitt Horizontal", "Prewitt Vertical",
        "No Preprocessing", "Blur", "Sharpen", "High-Pass Filter", "Sobel Horizontal", "Sobel Vertical", "Prewitt Horizontal", "Prewitt Vertical",
        "No Preprocessing", "Blur", "Sharpen", "High-Pass Filter", "Sobel Horizontal", "Sobel Vertical", "Prewitt Horizontal", "Prewitt Vertical",
        "No Preprocessing", "Blur", "Sharpen", "High-Pass Filter", "Sobel Horizontal", "Sobel Vertical", "Prewitt Horizontal", "Prewitt Vertical",
        "No Preprocessing", "Blur", "Sharpen", "High-Pass Filter", "Sobel Horizontal", "Sobel Vertical", "Prewitt Horizontal", "Prewitt Vertical",
        "No Preprocessing", "Blur", "Sharpen", "High-Pass Filter", "Sobel Horizontal", "Sobel Vertical", "Prewitt Horizontal", "Prewitt Vertical"
    ],
    "Accuracy": [
        0.923077, 0.615385, 0.769231, 0.769231, 0.769231, 0.769231, 0.846154, 0.923077,
        0.692308, 0.461538, 0.769231, 0.615385, 0.692308, 0.384615, 0.615385, 0.538462,
        0.846154, 0.692308, 0.538462, 0.769231, 0.769231, 0.769231, 0.769231, 0.769231,
        0.538462, 0.384615, 0.461538, 0.615385, 0.461538, 0.615385, 0.692308, 0.384615,
        0.846154, 0.769231, 0.769231, 0.769231, 0.769231, 0.769231, 0.769231, 0.769231,
        0.769231, 0.538462, 0.769231, 0.769231, 0.769231, 0.769231, 0.769231, 0.769231
    ],
    "Training Time (s)": [
        48.058440, 56.633320, 48.027893, 48.898570, 63.238348, 51.686615, 56.045794, 48.915993,
        45.766466, 44.351937, 51.481051, 54.415695, 52.088268, 48.996642, 48.366774, 47.622144,
        1.163560, 2.405584, 3.167753, 1.905416, 1.142788, 1.001018, 1.211761, 1.163381,
        16.167434, 12.488234, 6.066616, 23.346955, 13.848549, 12.097733, 16.955513, 17.397891,
        1.285106, 1.356832, 1.021704, 1.614405, 1.079370, 1.373316, 1.764342, 1.508606,
        110.745154, 126.193660, 4.222481, 10.666535, 8.093194, 11.186874, 17.907882, 8.518475
    ]
}

df = pd.DataFrame(data)

df['Accuracy to Second Ratio'] = df['Accuracy'] / df['Training Time (s)']

plt.figure(figsize=(12, 6))
for model in df['Model'].unique():
    model_df = df[df['Model'] == model]
    plt.plot(model_df['Preprocessing Technique'], model_df['Accuracy'], label=model)

plt.xlabel('Preprocessing Technique', fontsize=14)
plt.ylabel('Accuracy', fontsize=14)
plt.title('Comparison of Accuracy for Different Preprocessing Techniques', fontsize=16)
plt.xticks(rotation=45)
plt.legend(title='Model', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(True)
plt.tight_layout()
plt.show()

plt.figure(figsize=(12, 6))
for model in df['Model'].unique():
    model_df = df[df['Model'] == model]
    plt.plot(model_df['Preprocessing Technique'], model_df['Accuracy to Second Ratio'], label=model)

plt.xlabel('Preprocessing Technique', fontsize=14)
plt.ylabel('Accuracy to Second Ratio', fontsize=14)
plt.title('Comparison of Accuracy to Second Ratio for Different Preprocessing Techniques', fontsize=16)
plt.xticks(rotation=45)
plt.legend(title='Model', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.grid(True)
plt.tight_layout()
plt.show()
