# Description

The goal of the Board Game Classifier project is to develop a model capable of accurately classifying board game names by analyzing images of game elements. By leveraging advanced image recognition techniques and a comprehensive database of board game images, the aim is to enable the model to identify key features such as game boards, cards, tokens, and dice, and determine the most probable associated board game. This project aims to provide a valuable tool to assist board game enthusiasts, collectors, and researchers in rapidly identifying unknown board games based on visual cues.

# Imports

In [None]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/Colab Notebooks/BoardGames

Mounted at /content/drive
/content/drive/MyDrive/Colab Notebooks/BoardGames


In [None]:
import requests
from bs4 import BeautifulSoup
import time
import  random
import os
from io import BytesIO
from PIL import Image
import cv2
import html
import numpy as np
import time
import shutil
import filecmp
import joblib
import json
import time
import datetime

from tensorflow.keras.applications import EfficientNetB0
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers
from tensorflow.keras import regularizers
from keras.callbacks import EarlyStopping

## Install selenium

In [None]:
!pip install selenium

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting selenium
  Downloading selenium-4.10.0-py3-none-any.whl (6.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.7/6.7 MB[0m [31m41.5 MB/s[0m eta [36m0:00:00[0m
Collecting trio~=0.17 (from selenium)
  Downloading trio-0.22.0-py3-none-any.whl (384 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m384.9/384.9 kB[0m [31m37.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting trio-websocket~=0.9 (from selenium)
  Downloading trio_websocket-0.10.3-py3-none-any.whl (17 kB)
Collecting async-generator>=1.9 (from trio~=0.17->selenium)
  Downloading async_generator-1.10-py3-none-any.whl (18 kB)
Collecting outcome (from trio~=0.17->selenium)
  Downloading outcome-1.2.0-py2.py3-none-any.whl (9.7 kB)
Collecting wsproto>=0.14 (from trio-websocket~=0.9->selenium)
  Downloading wsproto-1.2.0-py3-none-any.whl (24 kB)
Collecting h11<1,>=0.9.0 (from wsproto>=0

In [None]:
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [None]:
%%shell

# Add debian buster
cat > /etc/apt/sources.list.d/debian.list <<'EOF'
deb [arch=amd64 signed-by=/usr/share/keyrings/debian-buster.gpg] http://deb.debian.org/debian buster main
deb [arch=amd64 signed-by=/usr/share/keyrings/debian-buster-updates.gpg] http://deb.debian.org/debian buster-updates main
deb [arch=amd64 signed-by=/usr/share/keyrings/debian-security-buster.gpg] http://deb.debian.org/debian-security buster/updates main
EOF

# Add keys
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys DCC9EFBF77E11517
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 648ACFD622F3D138
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 112695A0E562B32A

apt-key export 77E11517 | gpg --dearmour -o /usr/share/keyrings/debian-buster.gpg
apt-key export 22F3D138 | gpg --dearmour -o /usr/share/keyrings/debian-buster-updates.gpg
apt-key export E562B32A | gpg --dearmour -o /usr/share/keyrings/debian-security-buster.gpg

# Prefer debian repo for chromium* packages only
# Note the double-blank lines between entries
cat > /etc/apt/preferences.d/chromium.pref << 'EOF'
Package: *
Pin: release a=eoan
Pin-Priority: 500


Package: *
Pin: origin "deb.debian.org"
Pin-Priority: 300


Package: chromium*
Pin: origin "deb.debian.org"
Pin-Priority: 700
EOF



Executing: /tmp/apt-key-gpghome.tS00AyZWla/gpg.1.sh --keyserver keyserver.ubuntu.com --recv-keys DCC9EFBF77E11517
gpg: key DCC9EFBF77E11517: public key "Debian Stable Release Key (10/buster) <debian-release@lists.debian.org>" imported
gpg: Total number processed: 1
gpg:               imported: 1
Executing: /tmp/apt-key-gpghome.VOXARhffaB/gpg.1.sh --keyserver keyserver.ubuntu.com --recv-keys 648ACFD622F3D138
gpg: key DC30D7C23CBBABEE: public key "Debian Archive Automatic Signing Key (10/buster) <ftpmaster@debian.org>" imported
gpg: Total number processed: 1
gpg:               imported: 1
Executing: /tmp/apt-key-gpghome.8LoUgYQsTU/gpg.1.sh --keyserver keyserver.ubuntu.com --recv-keys 112695A0E562B32A
gpg: key 4DFAB270CAA96DFA: public key "Debian Security Archive Automatic Signing Key (10/buster) <ftpmaster@debian.org>" imported
gpg: Total number processed: 1
gpg:               imported: 1




In [None]:
!apt-get update
!apt-get install chromium chromium-driver

0% [Working]            Get:1 http://deb.debian.org/debian buster InRelease [122 kB]
0% [Connecting to archive.ubuntu.com] [Connecting to security.ubuntu.com (91.18                                                                               Get:2 https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/ InRelease [3,622 B]
0% [Connecting to archive.ubuntu.com] [Connecting to security.ubuntu.com (91.180% [Connecting to archive.ubuntu.com] [Connecting to security.ubuntu.com (91.180% [Connecting to archive.ubuntu.com] [Connecting to security.ubuntu.com (91.18                                                                               Get:3 http://deb.debian.org/debian buster-updates InRelease [56.6 kB]
0% [Connecting to archive.ubuntu.com] [Connecting to security.ubuntu.com (91.180% [Connecting to archive.ubuntu.com] [Connecting to security.ubuntu.com (91.18                                                                               Get:4 http://deb.debian.org/debian

#Scrapping

In [None]:
url = f'https://boardgamegeek.com/image/2702639/pandemic-legacy-season-1'

content = requests.get(url).content
soup = BeautifulSoup(content, 'html.parser')

print(soup.prettify())
soup.find('link', {'as': 'image'})['href']

In [None]:
import os
import shutil
import random
import filecmp
import time
import requests
import cv2
import numpy as np
from PIL import Image
from io import BytesIO
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By

class BoardGamesScraper:
    """A class for scraping board game data and images from BoardGameGeek."""

    def __init__(self):
        """Initialize the BoardGamesScraper class."""
        self.base_url = 'https://boardgamegeek.com/browse/boardgame/page/'
        self.urls = []
        self.path = 'images/raw'

    def web_driver(self):
        """Create and return a WebDriver instance for Chrome."""
        options = webdriver.ChromeOptions()
        options.add_argument("--verbose")
        options.add_argument('--no-sandbox')
        options.add_argument('--headless')
        options.add_argument('--disable-gpu')
        options.add_argument("--window-size=1920, 1200")
        options.add_argument('--disable-dev-shm-usage')
        driver = webdriver.Chrome(options=options)
        return driver

    def get_urls(self, page):
        """
        Retrieve the URLs of board games from a page.

        Args:
            page (str): The URL of the page to scrape.

        Returns:
            list: A list of URLs of board games.
        """
        content = requests.get(page).content
        time.sleep(1)
        soup = BeautifulSoup(content, 'html.parser')
        return ['https://boardgamegeek.com' + link['href'] for link in soup.find_all('a', {'class': 'primary'})]

    def get_img_urls(self, url):
        """
        Retrieve the image URLs of a board game.

        Args:
            url (str): The URL of the board game.

        Returns:
            dict: A dictionary mapping the board game title to a list of image URLs.
        """
        images_urls = []
        title = url.split('/')[-1]
        base_url = url + '/images' + f'?pageid='
        driver = self.web_driver()
        driver.get(base_url + '1' + '&gallery=game&tag=')
        time.sleep(0.25)
        try:
            last_page = int(driver.find_elements(By.XPATH, '//a[@ng-click="selectPage(totalPages)"]')[0]
                            .get_attribute('innerHTML'))
            print(last_page)
        except Exception as e:
            print(f'{e}')
        driver.quit()
        for page in range(1, last_page + 1):
            page_url = base_url + str(page) + '&gallery=game&tag='
            driver = self.web_driver()
            driver.get(page_url)
            time.sleep(0.25)
            images_urls += [element.get_attribute('href') for element in
                            driver.find_elements(By.CLASS_NAME, "summary-image-thumbnail")]
            driver.quit()
        print(title, len(images_urls), len(set(images_urls)))
        return {title: images_urls}

    def get_url_img_to_download(self, url):
        """
        Get the URL of the image to download from the page of a board game.

        Args:
            url (str): The URL of the board game page.

        Returns:
            str: The URL of the image to download.
        """
        content = requests.get(url).content
        time.sleep(0.1)
        soup = BeautifulSoup(content, 'html.parser')
        return soup.find('link', {'as': 'image'})['href']

    def get_image(self, url):
        """
        Download and retrieve the image from a given URL.

        Args:
            url (str): The URL of the image to download.

        Returns:
            np.array: The image as a NumPy array.
        """
        try:
            content = requests.get(url).content
            time.sleep(0.1)
            img = Image.open(BytesIO(content))
            img = np.array(img)
            return img
        except Exception as e:
            print(f'Error: {e}')

    def save_image(self, img, url, title, path):
        """
        Save the image to the specified path.

        Args:
            img (np.array): The image as a NumPy array.
            url (str): The URL of the image.
            title (str): The title of the board game.
            path (str): The path to save the image.
        """
        try:
            file_name = url.split('/')[-1].split('.')[0] + url.split('/')[-2].split('.')[0] + '.jpg'
            os.makedirs(f'{path}/{title}', exist_ok=True)
            cv2.imwrite(f'{path}/{title}/{file_name}', cv2.cvtColor(img, cv2.COLOR_RGB2BGR))
        except Exception as e:
            print(f'{e}')

    def get_data(self, start_page, end_page, limit_games=False, games_start=None, games_stop=None):
        """
        Scrape board game data and images from BoardGameGeek.

        Args:
            start_page (int): The starting page number to scrape.
            end_page (int): The ending page number to scrape.
            limit_games (bool): Whether to limit the number of games to scrape.
            games_start (int): The starting index of the games to scrape.
            games_stop (int): The ending index of the games to scrape.
        """
        self.urls = []
        for page in range(start_page, end_page + 1):
            self.urls += self.get_urls(self.base_url + f'{page}?sort=numvoters&sortdir=desc')
        if not limit_games:
            range_start = 0
            range_stop = len(self.urls)
        else:
            if games_start is not None:
                range_start = games_start
            else:
                range_start = 0

            if games_stop is not None:
                range_stop = games_stop
            else:
                range_stop = len(self.urls)

        for count, url in enumerate(self.urls[range_start: range_stop]):
            game_details = self.get_img_urls(url)

            for title, urls in game_details.items():
                for url in urls:
                    img_url = self.get_url_img_to_download(url)
                    img = self.get_image(img_url)
                    self.save_image(img, url, title, self.path)

    def organize_folders(self, source_folder, destination_folder, num_random_files):
        """
        Organize the game images into different folders.

        Args:
            source_folder (str): The path of the source folder containing game images.
            destination_folder (str): The path of the destination folder to organize the images.
            num_random_files (int): The number of random files to copy to the 'rest' folder.
        """
        game_folder_main = self.generate_unique_list(self.get_folder_info(destination_folder), self.get_folder_info(source_folder).keys())
        game_folders_rand = [folder for folder in os.listdir(source_folder) if
                          os.path.isdir(os.path.join(source_folder, folder))]

        for game_name in game_folder_main:
            print(game_name)
            rest_images = []
            game_folder_path = os.path.join(source_folder, game_name)
            same_folder_path = os.path.join(destination_folder, game_name, game_name)
            rest_folder_path = os.path.join(destination_folder, game_name, 'rest')
            os.makedirs(same_folder_path, exist_ok=True)
            os.makedirs(rest_folder_path, exist_ok=True)

            game_images = [item for item in os.listdir(game_folder_path) if item.endswith(('.jpg', '.png'))]

            for image in game_images:
                image_path = os.path.join(game_folder_path, image)
                shutil.copy(image_path, same_folder_path)

            other_game_folders = [folder for folder in game_folders_rand if folder != game_name]
            random_num = int(num_random_files / len(other_game_folders))
            for other_game_folder in other_game_folders:
                other_game_folder_path = os.path.join(source_folder, other_game_folder)
                other_game_images = [item for item in os.listdir(other_game_folder_path) if
                                      item.endswith(('.jpg', '.png'))]
                random_images = random.sample(other_game_images, random_num)

                for image in random_images:
                    image_path = os.path.join(other_game_folder_path, image)
                    shutil.copy(image_path, rest_folder_path)

    def get_folder_info(self, directory):
        """
        Get information about the folders in a directory.

        Args:
            directory (str): The path of the directory.

        Returns:
            dict: A dictionary containing the folder names as keys and their paths as values.
        """
        location_details = {}
        items = os.listdir(directory)
        for item in items:
            path = os.path.join(directory, item)
            if os.path.isdir(path):
                location_details[item] = path
        return location_details

    def generate_unique_list(self, list1, list2):
        """
        Generate a unique list by combining two lists and removing duplicates.

        Args:
            list1 (list): The first list.
            list2 (list): The second list.

        Returns:
            list: A unique list containing elements from both lists.
        """
        unique_list = []
        for item in list1:
            if item not in list2:
                unique_list.append(item)
        for item in list2:
            if item not in list1:
                unique_list.append(item)
        return unique_list

    def create_train_test_folders(self, source_directory, destination_directory, train_ratio=0.2):
        """
        Create train and test folders with shuffled data.

        Args:
            source_directory (str): The path of the source directory.
            destination_directory (str): The path of the destination directory.
            train_ratio (float): The ratio of training data to total data.
        """
        train_directory = os.path.join(destination_directory, "train")
        test_directory = os.path.join(destination_directory, "test")

        os.makedirs(train_directory, exist_ok=True)
        os.makedirs(test_directory, exist_ok=True)

        subdirectories = os.listdir(source_directory)

        for subdir in subdirectories:

            train_subdir = os.path.join(train_directory, subdir)
            test_subdir = os.path.join(test_directory, subdir)

            os.makedirs(train_subdir, exist_ok=True)
            os.makedirs(test_subdir, exist_ok=True)

            files = os.listdir(os.path.join(source_directory, subdir))
            random.shuffle(files)
            split_index = int(len(files) * train_ratio)

            train_files = files[split_index:]
            test_files = files[:split_index]

            for game_name in train_files:
                source_path = os.path.join(source_directory, subdir, game_name)
                destination_path = os.path.join(train_subdir, game_name)
                shutil.copy2(source_path, destination_path)

            for game_name in test_files:
                source_path = os.path.join(source_directory, subdir, game_name)
                destination_path = os.path.join(test_subdir, game_name)
                shutil.copy2(source_path, destination_path)

    def compare_folders(self, folder1, folder2):
        """
        Compare two folders to check if their contents are the same.

        Args:
            folder1 (str): The path of the first folder.
            folder2 (str): The path of the second folder.

        Returns:
            bool: True if the folders have the same contents, False otherwise.
        """
        return filecmp.cmpfiles(folder1, folder2, shallow=False) == ([], [], [])

    def delete_folder(self, folder):
        """
        Delete a folder and its contents.

        Args:
            folder (str): The path of the folder to delete.
        """
        try:
            shutil.rmtree(folder)
        except Exception as e:
            print(f'Error: {e}')


In [None]:
games_scraper = BoardGamesScraper()

In [None]:
games_scraper.organize_folders('images/games', 'images/testing_area/two_classes',  200)

In [None]:
from flask import Flask, jsonify, request
import pickle
import pandas as pd

app = Flask(__name__)

@app.route('/predict', methods=['POST'])
def predict():
     json_ = request.json
     df = pd.DataFrame(json_)
     X = df[['LSTAT']]
     prediction = clf.predict(X)
     return jsonify({'prediction': list(prediction)})

if __name__ == '__main__':
     with open('model.pickle', 'rb') as f:
          clf = pickle.load(f)

     app.run(host='0.0.0.0', port=8080)

In [None]:
games_list = games_scraper.get_folder_info('images/testing_area/two_classes')

In [None]:
for name, dir in games_list.items():
  games_scraper.create_train_test_folders(dir, f'images/testing_area/train_test/{str(name)}')

Category: arkham-horror
Train items: 200
Test items: 50
---------------------------
Category: rest
Train items: 160
Test items: 40
---------------------------
Category: carcassonne
Train items: 601
Test items: 150
---------------------------
Category: rest
Train items: 160
Test items: 40
---------------------------
Category: gloomhaven
Train items: 201
Test items: 50
---------------------------
Category: rest
Train items: 160
Test items: 40
---------------------------


#Gloomhaven

##Train

In [None]:
games_scraper.check_identical_files('images/testing_area/train_test/carcassonne/train', 'images/testing_area/train_test/carcassonne/test')


Folders are disjoint


[]

In [None]:
path = 'images/testing_area/train_test/gloomhaven/train'

batch_size = 8
img_height = 256
img_width = 256

In [None]:
train_ds = keras.utils.image_dataset_from_directory(
    path,
    validation_split=0.2,
    subset='training',
    image_size = (img_height, img_width),
    batch_size = batch_size,
    label_mode = 'categorical',
    seed = 0
)

val_ds = keras.utils.image_dataset_from_directory(
    path,
    validation_split=0.2,
    subset='validation',
    image_size = (img_height, img_width),
    batch_size = batch_size,
    label_mode = 'categorical',
    seed = 0
)

Found 361 files belonging to 2 classes.
Using 289 files for training.
Found 361 files belonging to 2 classes.
Using 72 files for validation.


In [None]:
efficient_model = EfficientNetB0(include_top=False,
                                 weights='imagenet',
                                 input_shape = (img_height,
                                                img_width,
                                                3))

In [None]:
model_gloomhaven = Sequential()
model_gloomhaven.add(keras.layers.RandomFlip(mode="horizontal_and_vertical"))
model_gloomhaven.add(keras.layers.RandomZoom(height_factor=(-0.3, -0.2), interpolation="bilinear"))
model_gloomhaven.add(efficient_model)
model_gloomhaven.add(layers.Flatten())
model_gloomhaven.add(layers.Dense(128, activation='relu', kernel_regularizer=regularizers.L1L2(l1=1e-5, l2=1e-4)))
model_gloomhaven.add(layers.Dropout(0.5))
model_gloomhaven.add(layers.Dense(2, activation='sigmoid'))

In [None]:
opt = keras.optimizers.Adam(learning_rate=0.00001)
model_gloomhaven.compile(optimizer=opt,
              loss='categorical_crossentropy',
              metrics = ['accuracy'])

In [None]:
history = model_gloomhaven.fit(train_ds,
                    validation_data = val_ds,
                    epochs = 30,
                    batch_size = 8)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


##Test

In [None]:
path = 'images/testing_area/train_test/gloomhaven/test'

batch_size = 8
img_height = 256
img_width = 256

In [None]:
test_ds = keras.utils.image_dataset_from_directory(path,
                                                    validation_split=None,
                                                    subset=None,
                                                    image_size = (img_height, img_width),
                                                    batch_size = batch_size,
                                                    label_mode='categorical',
                                                    interpolation='bilinear',
                                                    seed=0)

Found 90 files belonging to 2 classes.


In [None]:
test_loss, test_acc = model_gloomhaven.evaluate(test_ds)
print('Test loss:', test_loss)
print('Test accuracy:', test_acc)

Test loss: 0.7570341229438782
Test accuracy: 0.8666666746139526


#Carcassonne

##Train

In [None]:
path = 'images/testing_area/train_test/carcassonne/train'

batch_size = 8
img_height = 256
img_width = 256

In [None]:
train_ds = keras.utils.image_dataset_from_directory(
    path,
    validation_split=0.2,
    subset='training',
    image_size = (img_height, img_width),
    batch_size = batch_size,
    label_mode = 'categorical',
    seed = 0
)

val_ds = keras.utils.image_dataset_from_directory(
    path,
    validation_split=0.2,
    subset='validation',
    image_size = (img_height, img_width),
    batch_size = batch_size,
    label_mode = 'categorical',
    seed = 0
)

Found 761 files belonging to 2 classes.
Using 609 files for training.
Found 761 files belonging to 2 classes.
Using 152 files for validation.


In [None]:
efficient_model = EfficientNetB0(include_top=False,
                                 weights='imagenet',
                                 input_shape = (img_height,
                                                img_width,
                                                3))

In [None]:
model_carcassonne = Sequential()
model_carcassonne.add(keras.layers.RandomFlip(mode="horizontal_and_vertical"))
model_carcassonne.add(keras.layers.RandomZoom(height_factor=(-0.3, -0.2), interpolation="bilinear"))
model_carcassonne.add(efficient_model)
model_carcassonne.add(layers.Flatten())
model_carcassonne.add(layers.Dense(128, activation='relu', kernel_regularizer=regularizers.L1L2(l1=1e-5, l2=1e-4)))
model_carcassonne.add(layers.Dropout(0.5))
model_carcassonne.add(layers.Dense(2, activation='sigmoid'))

In [None]:
opt = keras.optimizers.Adam(learning_rate=0.00001)
model_carcassonne.compile(optimizer=opt,
              loss='categorical_crossentropy',
              metrics = ['accuracy'])

In [None]:
history = model_carcassonne.fit(train_ds,
                    validation_data = val_ds,
                    epochs = 30,
                    batch_size = 8)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


##Train

In [None]:
path = 'images/testing_area/train_test/carcassonne/test'

batch_size = 8
img_height = 256
img_width = 256

In [None]:
test_ds = keras.utils.image_dataset_from_directory(path,
                                                    validation_split=None,
                                                    subset=None,
                                                    image_size = (img_height, img_width),
                                                    batch_size = batch_size,
                                                    label_mode='categorical',
                                                    interpolation='bilinear',
                                                    seed=0)

Found 190 files belonging to 2 classes.


In [None]:
test_loss, test_acc = model_carcassonne.evaluate(test_ds)
print('Test loss:', test_loss)
print('Test accuracy:', test_acc)

Test loss: 0.6327834725379944
Test accuracy: 0.9473684430122375


#arkham-horror

##train

In [None]:
path = 'images/testing_area/train_test/arkham-horror/train'

batch_size = 8
img_height = 256
img_width = 256

In [None]:
train_ds = keras.utils.image_dataset_from_directory(
    path,
    validation_split=0.2,
    subset='training',
    image_size = (img_height, img_width),
    batch_size = batch_size,
    label_mode = 'categorical',
    seed = 0
)

val_ds = keras.utils.image_dataset_from_directory(
    path,
    validation_split=0.2,
    subset='validation',
    image_size = (img_height, img_width),
    batch_size = batch_size,
    label_mode = 'categorical',
    seed = 0
)

Found 360 files belonging to 2 classes.
Using 288 files for training.
Found 360 files belonging to 2 classes.
Using 72 files for validation.


In [None]:
efficient_model = EfficientNetB0(include_top=False,
                                 weights='imagenet',
                                 input_shape = (img_height,
                                                img_width,
                                                3))

In [None]:
model_arkham_horror = Sequential()
model_arkham_horror.add(keras.layers.RandomFlip(mode="horizontal_and_vertical"))
model_arkham_horror.add(keras.layers.RandomZoom(height_factor=(-0.3, -0.2), interpolation="bilinear"))
model_arkham_horror.add(efficient_model)
model_arkham_horror.add(layers.Flatten())
model_arkham_horror.add(layers.Dense(128, activation='relu', kernel_regularizer=regularizers.L1L2(l1=1e-5, l2=1e-4)))
model_arkham_horror.add(layers.Dropout(0.5))
model_arkham_horror.add(layers.Dense(2, activation='sigmoid'))

In [None]:
opt = keras.optimizers.Adam(learning_rate=0.00001)
model_arkham_horror.compile(optimizer=opt,
              loss='categorical_crossentropy',
              metrics = ['accuracy'])

In [None]:
history = model_arkham_horror.fit(train_ds,
                    validation_data = val_ds,
                    epochs = 30,
                    batch_size = 8)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


##test

In [None]:
path = 'images/testing_area/train_test/arkham-horror/test'

batch_size = 8
img_height = 256
img_width = 256

In [None]:
test_ds = keras.utils.image_dataset_from_directory(path,
                                                    validation_split=None,
                                                    subset=None,
                                                    image_size = (img_height, img_width),
                                                    batch_size = batch_size,
                                                    label_mode='categorical',
                                                    interpolation='bilinear',
                                                    seed=0)

Found 90 files belonging to 2 classes.


In [None]:
test_loss, test_acc = model_arkham_horror.evaluate(test_ds)
print('Test loss:', test_loss)
print('Test accuracy:', test_acc)

Test loss: 0.7598509788513184
Test accuracy: 0.8999999761581421


#All models data preparation

In [None]:
games_scraper.organize_folders('images/raw', 'images/all_data_sep', 800)

In [None]:
games_location_details = games_scraper.get_folder_info('images/all_data_sep')

In [None]:
len(games_location_details)

241

In [None]:
def find_empty_parent_folders(root_directory):
    empty_parent_folders = []

    for folder_name in os.listdir(root_directory):
        folder_path = os.path.join(root_directory, folder_name)

        if os.path.isdir(folder_path):
            has_files = False
            for child_folder in os.listdir(folder_path):
                child_folder_path = os.path.join(folder_path, child_folder)

                if os.path.isdir(child_folder_path):
                    if len(os.listdir(child_folder_path)) > 0:
                        has_files = True
                        break

            if not has_files:
                empty_parent_folders.append(folder_path)

    return empty_parent_folders

In [None]:
def delete_folders(folders):
    for folder in folders:
        if os.path.exists(folder):
            try:
                shutil.rmtree(folder)
                print(f"Deleted folder: {folder}")
            except OSError as e:
                print(f"Error deleting folder: {folder}\n{str(e)}")
        else:
            print(f"Folder does not exist: {folder}")

In [None]:
find_empty_parent_folders('images/main_separated')

In [None]:
for name, dir in games_location_details.items():
  games_scraper.create_train_test_folders(dir, f'images/main_separated/{str(name)}')

Category: carcassonne
Train items: 876
Test items: 219
---------------------------
Category: rest
Train items: 554
Test items: 138
---------------------------
Category: catan
Train items: 929
Test items: 232
---------------------------
Category: rest
Train items: 568
Test items: 142
---------------------------
Category: pandemic
Train items: 948
Test items: 236
---------------------------
Category: rest
Train items: 560
Test items: 140
---------------------------
Category: 7-wonders
Train items: 489
Test items: 122
---------------------------
Category: rest
Train items: 576
Test items: 144
---------------------------
Category: terraforming-mars
Train items: 647
Test items: 161
---------------------------
Category: rest
Train items: 576
Test items: 144
---------------------------
Category: dominion
Train items: 276
Test items: 68
---------------------------
Category: rest
Train items: 554
Test items: 138
---------------------------
Category: codenames
Train items: 243
Test items: 60
---

# Auto Modeling

In [None]:
import os
import datetime
import json
import joblib
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras import layers, regularizers
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping


class AutoModeling:
    def __init__(self, main_dir, img_height=256, img_width=256, batch_size=8, learning_rate=0.000025, epochs=30,
                 patience=8):
        """
        Initializes the AutoModeling class.

        Args:
            main_dir (str): The main directory containing subdirectories for each game.
            img_height (int): The height of the input images (default: 256).
            img_width (int): The width of the input images (default: 256).
            batch_size (int): The batch size for training (default: 8).
            learning_rate (float): The learning rate for the optimizer (default: 0.000025).
            epochs (int): The number of epochs to train the model (default: 30).
            patience (int): The patience value for early stopping (default: 8).
        """
        self.main_dirs = [os.path.join(main_dir, game) for game in os.listdir(main_dir)]
        self.img_height = img_height
        self.img_width = img_width
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.patience = patience
        self.model_info = {}

    def train_test_val(self, path):
        """
        Create train, test, and validation datasets for a given game.

        Args:
            path (str): The path to the game directory.

        Returns:
            tuple: A tuple containing the train, test, and validation datasets.
        """
        game_name = os.path.split(path)[-1]
        class_names = [game_name, 'rest']
        path_train = os.path.join(path, 'train')
        path_test = os.path.join(path, 'test')
        train_ds = keras.utils.image_dataset_from_directory(
            path_train,
            validation_split=0.2,
            subset='training',
            image_size=(self.img_height, self.img_width),
            batch_size=self.batch_size,
            label_mode='categorical',
            seed=42,
            class_names=class_names
        )

        val_ds = keras.utils.image_dataset_from_directory(
            path_train,
            validation_split=0.2,
            subset='validation',
            image_size=(self.img_height, self.img_width),
            batch_size=self.batch_size,
            label_mode='categorical',
            seed=42,
            class_names=class_names
        )

        test_ds = keras.utils.image_dataset_from_directory(
            path_test,
            validation_split=None,
            subset=None,
            image_size=(self.img_height, self.img_width),
            batch_size=self.batch_size,
            label_mode='categorical',
            interpolation='bilinear',
            seed=42,
            class_names=class_names
        )

        return train_ds, test_ds, val_ds

    def model_set_up(self):
        """
        Set up the model architecture using EfficientNetB0 as the base model.
        """
        efficient_model = EfficientNetB0(
            include_top=False,
            weights='imagenet',
            input_shape=(self.img_height, self.img_width, 3)
        )
        model = Sequential()
        model.add(keras.layers.RandomFlip(mode="horizontal_and_vertical"))
        model.add(keras.layers.RandomZoom(height_factor=(-0.3, -0.2), interpolation="bilinear"))
        model.add(efficient_model)
        model.add(layers.Flatten())
        model.add(layers.Dense(128, activation='relu', kernel_regularizer=regularizers.L1L2(l1=1e-5, l2=1e-4)))
        model.add(layers.Dropout(0.5))
        model.add(layers.Dense(2, activation='sigmoid'))

        opt = keras.optimizers.Adam(self.learning_rate)
        model.compile(
            optimizer=opt,
            loss='categorical_crossentropy',
            metrics=['accuracy']
        )

        print(f"Model architecture:\n{model.summary()}")
        self.model = model

    def model_fit(self, train_ds, val_ds):
        """
        Train the model using the provided train and validation datasets.

        Args:
            train_ds (tf.data.Dataset): The training dataset.
            val_ds (tf.data.Dataset): The validation dataset.

        Returns:
            tf.keras.callbacks.History: The training history.
        """
        early_stopping = EarlyStopping(monitor='val_loss', patience=self.patience)
        return self.model.fit(
            train_ds,
            validation_data=val_ds,
            epochs=self.epochs,
            batch_size=self.batch_size,
            callbacks=[early_stopping]
        )

    def evaluate_model(self, test_ds):
        """
        Evaluate the trained model using the provided test dataset.

        Args:
            test_ds (tf.data.Dataset): The test dataset.

        Returns:
            tuple: A tuple containing the test loss and test accuracy.
        """
        test_loss, test_acc = self.model.evaluate(test_ds)
        print('Test loss:', test_loss)
        print('Test accuracy:', test_acc)
        return test_loss, test_acc

    def save_model_info(self, model_name, train_time, history, test_acc, path):
        """
        Save the model information to a JSON file.

        Args:
            model_name (str): The name of the model.
            train_time (float): The total training time in seconds.
            history (tf.keras.callbacks.History): The training history.
            test_acc (float): The test accuracy.
            path (str): The path to save the model information.
        """
        model_info = {
            'log_start': datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            'log_end': datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            'model': model_name,
            'train_time': train_time,
            'train_time_per_epoch': train_time / len(history.history['loss']),
            'train_acc': history.history['accuracy'][-1],
            'val_acc': history.history['val_accuracy'][-1],
            'test_acc': test_acc,
            'training_details': {
                'epochs': len(history.history['loss']),
                'loss': history.history['loss'],
                'val_loss': history.history['val_loss'],
                'accuracy': history.history['accuracy'],
                'val_accuracy': history.history['val_accuracy']
            }
        }

        if os.path.exists(f'{path}/model_info.json'):
            with open(f'{path}/model_info.json', 'r') as f:
                self.model_info = json.load(f)

        self.model_info[model_name] = model_info

        with open(f'{path}/model_info.json', 'w') as f:
            json.dump(self.model_info, f, indent=4)

    def save_model(self, model_name, models_path):
        """
        Save the trained model to a joblib file.

        Args:
            model_name (str): The name of the model.
            models_path (str): The path to save the trained models.
        """
        joblib.dump(self.model, f'{models_path}/{model_name}.joblib')
        del self.model


In [None]:
list_of_dirs = os.listdir('images/main_separated')

In [None]:
['images/main_separated' + '/' + game for game in os.listdir('images/main_separated')]

In [None]:
model_handler = AutoModeling(main_dir='images/main_separated', batch_size=32)

In [None]:
for i in range(16, 20):
  path = model_handler.main_dirs[i]
  train_ds, test_ds, val_ds = model_handler.train_test_val(path)
  model_handler.model_set_up()
  start_time = time.time()
  history = model_handler.model_fit(train_ds, val_ds)
  end_time = time.time()
  train_time = end_time - start_time
  test_loss, test_acc = model_handler.evaluate_model(test_ds)
  model_handler.save_model_info(model_name=f'{path.split("/")[-1]} model', train_time=train_time, history=history, test_acc=test_acc, path='models')
  model_handler.save_model(model_name=f'{path.split("/")[-1]} model', models_path='models')
  del train_ds, test_ds, val_ds, history



Found 904 files belonging to 2 classes.
Using 724 files for training.
Found 904 files belonging to 2 classes.
Using 180 files for validation.
Found 228 files belonging to 2 classes.
Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
<keras.engine.sequential.Sequential object at 0x7fd7e6d4e050> saved successfully.
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Test loss: 0.8087489008903503
Test accuracy: 0.9078947305679321
Found 1087 files belonging to 2 classes.
Using 870 files for training.
Found 1087 files belonging to 2 classes.
Using 217 files for validation.
Found 271 files belonging to 2 classes.
<keras.engine.sequential.Sequential object at 0x7fd7320e1c90> saved successfully.
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epo