Αποστολάτος Ιωάννης sdi1900012 \\
Βασιλείου Ρηγίνος sdi1900019

In [None]:
import pandas as pd
import numpy as np
import datetime

In [None]:
df = pd.read_csv('drive/MyDrive/books_1.Best_Books_Ever.csv')

df

Remove nan

In [None]:
columns_to_dropna = ['bookId', 'ratingsByStars', "description", "genres", "publishDate"]
df = df.dropna(subset=columns_to_dropna)

df

In [None]:
df["ratingsByStars"].head(10)
type(df["ratingsByStars"][1])

df["ratingsByStars"][1]

Separate ratings

In [None]:
df[['ratingStar5', 'ratingStar4', 'ratingStar3', 'ratingStar2', 'ratingStar1']] = df['ratingsByStars'].str.split(',', expand=True)

# keep numerical values
columns_to_convert = ['ratingStar5', 'ratingStar4', 'ratingStar3', 'ratingStar2', 'ratingStar1']

for column in columns_to_convert:
    df[column] = df[column].str.replace(r'\D', '')
    df[column] = pd.to_numeric(df[column], errors='coerce')

df.head(5)

In [None]:
df['genres'].head(5)

### Transform genres to genreSingle

In [None]:
df['genreSingle'] = df['genres'].str.split(',').str[0].str.strip()
df['genreSingle'] = df['genreSingle'].str.replace(r'\[', '', regex=True)

df['genreSingle'].head(5)

In [None]:
df['publishDate']

### Transform publishDate column to the same date format

In [None]:
import re

# Define a regular expression pattern to match valid date strings
date_pattern = r'(\b\d{1,2}/\d{1,2}/\d{2}\b)|(\b[A-Za-z]+\s\d{1,2}(?:st|nd|rd|th)?\s\d{2,4}\b)|(\b[A-Za-z]+\s\d{4}\b)|(\bHalloween\s\d{4}\b)'

# Extract the valid date strings based on the pattern
extracted_dates = df['publishDate'].str.extract(date_pattern)

extracted_dates[1] = extracted_dates[1].str.replace(r'\bHalloween\b', 'October')

# Convert the extracted dates to datetime
converted_dates = pd.to_datetime(extracted_dates[0].fillna('') + extracted_dates[1].fillna('') + extracted_dates[2].fillna(''), infer_datetime_format=True, errors='coerce')

# Assign the converted dates to the original column
df['publishDate'] = converted_dates

df['publishDate'].head(10)

### Create publishYear column

In [None]:
df['publishYear'] = df['publishDate'].dt.year

In [None]:
df['publishYear'] = df['publishYear'].fillna(0).astype(int)

df['publishYear']

###Ποιά είναι τα 10 βιβλία με τις περισσότερες σελίδες.

In [None]:
df['pages'] = pd.to_numeric(df['pages'], errors='coerce')

books = df.nlargest(10, 'pages')

for index in books.index:
    print(df['title'][index])
    print(df['pages'][index])

###Ποιά είναι τα 10 βιβλία με τα περισσότερα 5-αστέρια

In [None]:
filtered_df = df[df['ratingStar5'] > 10000]

# Get the 10 highest values of 'ratingStar5' from the filtered DataFrame
books = filtered_df.nlargest(10, 'ratingStar5')

for index in books.index:
    print(df['title'][index])
    print(df['ratingStar5'][index])

###Ποιοι είναι οι 10 συγγραφεις με τα περισσότερα βιβλία

In [None]:
top_10_authors = df['author'].value_counts().head(10)

top_10_authors

###Ποιοι είναι οι 10 συγγραφείς με τις περισσότερες κριτικές

In [None]:
type(df['numRatings'][1])

def sumRatings(x, authors):
    author_ratings = df.loc[df['author'] == x, 'numRatings'].sum()
    authors[x] = author_ratings

    return author_ratings

In [None]:
authors = dict()
df['author'].apply(lambda x: sumRatings(x, authors))

highest_10 = sorted(authors.items(), key=lambda x: x[1], reverse=True)[:10]

highest_10

###Ποιές είναι οι πιο συχνές γλώσσες που έχουν γραφτεί τα βιβλία στα δεδομένα σας

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(14, 6))

common_lang = df['language'].value_counts()

common_lang.plot(kind="bar")
plt.xlabel("languages")
plt.ylabel("count")
plt.title("Most Common Languages")
plt.show()

##Recomendation System

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

nltk.download('omw-1.4')
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

### Δημιουργία νέου dataframe

In [None]:
recomender_df = df[df['language'] == 'English']
recomender_df = recomender_df[['bookId', 'description']]

recomender_df

In [None]:
recomender_df = recomender_df[:10000]

###Preprocess για το description

In [None]:
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

def preprocess(desc):
  desc = str(desc)
  desc = desc.lower()
  desc = re.sub(r'[^a-zA-Z]', ' ', desc)

  tokens = nltk.word_tokenize(desc)
  tokens = [word for word in tokens if word not in stop_words]
  tokens = [lemmatizer.lemmatize(word) for word in tokens]

  return " ".join(tokens)

def preprocess_df(df, column_name):
  df[column_name] = df[column_name].apply(preprocess)
  return df

recomender_df = preprocess_df(recomender_df, 'description')

recomender_df.head(10)

In [None]:
vectorizer = TfidfVectorizer(stop_words='english', ngram_range=(1,2), min_df=0, analyzer = "word")
tfidf_matrix = vectorizer.fit_transform(recomender_df['description'])
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

cosine_sim

In [None]:
# Calculate similar books and store in dictionary
num_similar = 100
similar_books = {}

num_books = len(recomender_df)

for i in range(num_books):
    similarities = cosine_sim[i]
    similar_books[i] = sorted(enumerate(similarities), key=lambda x: x[1], reverse=True)[:num_similar]

# Prediction function
def get_similar_books(book_id, N):
    # Find the index of the book with the given book_id
    book_index = recomender_df[recomender_df['bookId'] == book_id].index[0]

    similar_books_list = similar_books[book_index][:N]
    book_data = [(recomender_df.iloc[book[0]]['bookId'], book[1]) for book in similar_books_list]
    return book_data

def print_similar_books(recommended_books, book_id, N):
    # Find the index of the book with the given book_id
    book_index = recomender_df[recomender_df['bookId'] == book_id].index[0]

    print("Recommending", N, "books similar to", df.loc[book_index, 'title'])
    print("---------------------------------------------------------------------------------------\n")
    for recommended_book_id, similarity_score in recommended_books:
        book_index = recomender_df[recomender_df['bookId'] == recommended_book_id].index[0]
        title = df.loc[book_index, 'title']
        print("Recommended:", title.upper())
        print("Description:", df.loc[book_index, 'description'])
        print("(score:",similarity_score,")")
        print()

# Example usage
book_id = "2.Harry_Potter_and_the_Order_of_the_Phoenix"  # Example book ID
N = 10  # Number of similar books to retrieve

recommended_books = get_similar_books(book_id, N)

print_similar_books(recommended_books, book_id, N)

##Classification

In [None]:
top_10_genres = df['genreSingle'].value_counts().head(10)

top_10_genres = top_10_genres.index.tolist()

classification_df =  df[['bookId', 'description', 'genreSingle']]

classification_df = classification_df[classification_df['genreSingle'].isin(top_10_genres)]

classification_df

In [None]:
classification_df = preprocess_df(classification_df, 'description')

classification_df

In [None]:
classification_df = classification_df[:10000]

In [None]:
from gensim.test.utils import common_texts
from gensim.models import Word2Vec

tokenized_desc = classification_df['description'].apply(lambda x: x.split()) # tokenizing
model_w2v = Word2Vec(
                  tokenized_desc,
                  vector_size=200, # desired no. of features/independent variables
                  window=5, # context window size
                  min_count=2,
                  sg = 1, # 1 for skip-gram model
                  hs = 0,
                  negative = 10, # for negative sampling
                  workers= 4, # no.of cores
                  seed = 34)
model_w2v.train(tokenized_desc, total_examples= len(classification_df['description']), epochs=20)

In [None]:
model_w2v.wv.most_similar(positive="red")

In [None]:
def get_description_vector(description):
    # Tokenize the description
    tokens = description.split()

    # Initialize an empty list to store the word vectors
    word_vectors = []

    # Retrieve word vectors for each token
    for token in tokens:
        if token in model_w2v.wv:
            word_vector = model_w2v.wv[token]
            word_vectors.append(word_vector)

    # Check if any word vectors were found
    if len(word_vectors) > 0:
        # Compute the average of word vectors
        description_vector = np.mean(word_vectors, axis=0)
    else:
        # If no word vectors were found, return a zero vector
        description_vector = np.zeros(model_w2v.vector_size)

    return description_vector

classification_df['desc_vector'] = classification_df['description'].apply(lambda x: get_description_vector(x))

classification_df['desc_vector']

In [None]:
import pickle

# Αποθηκεύουμε το DataFrame σε ένα αρχείο .pkl
with open('features.pkl', 'wb') as file:
    pickle.dump(classification_df['desc_vector'], file)

# Φορτώνουμε το DataFrame από το αρχείο .pkl
with open('features.pkl', 'rb') as file:
    loaded_features = pickle.load(file)

# Τώρα το φορτωμένο DataFrame είναι έτοιμο για χρήση
print(type(loaded_features))

###Naive Bayes

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import KFold, cross_val_score, cross_validate
from sklearn.metrics import make_scorer, f1_score, accuracy_score, classification_report, confusion_matrix, precision_score, recall_score

kfold = KFold(n_splits=10, random_state=42, shuffle=True)

scoring = {
    'precision': make_scorer(precision_score, average='macro', zero_division=0),
    'recall': make_scorer(recall_score, average='macro', zero_division=0),
    'f1_score': make_scorer(f1_score, average='macro', zero_division=0),
    'accuracy': 'accuracy'
}

X = np.array(classification_df['desc_vector'].tolist())
Y = classification_df['genreSingle']

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state=42)

model = GaussianNB().fit(X_train, Y_train)
predicted = model.predict(X_test)

scores = cross_validate(model, X, Y, cv=kfold, scoring=scoring, error_score='raise', return_train_score=False)

precision_avg = scores['test_precision'].mean()
recall_avg = scores['test_recall'].mean()
f1_score_avg = scores['test_f1_score'].mean()
accuracy_avg = scores['test_accuracy'].mean()

print("Precision (macro average):", precision_avg)
print("Recall (macro average):", recall_avg)
print("F1-score (macro average):", f1_score_avg)
print("Accuracy:", accuracy_avg)

Precision (macro average): 0.40870000000000006
Recall (macro average): 0.40870000000000006
F1-score (macro average): 0.40870000000000006
Accuracy: 0.40870000000000006


\begin{array}{|c|c|c|c|c|c|}
    \hline
    df size & average & precision & recall & f1-score & accuracy \\
    \hline
    10000 & 'weighted' & 0.5221295316689785 & 0.4063 & 0.3634986971327451 & 0.4063 \\
    \hline
    10000 & 'macro' & 0.4218160714061529 & 0.4308436912496119
    & 0.3541748177783423 & 0.4063 \\
    \hline
    10000 & 'micro' & 0.4063 & 0.4063
    & 0.40630000000000005 & 0.4063 \\
    \hline
\end{array}

###SVM's

In [None]:
from sklearn import svm

X = np.array(classification_df['desc_vector'].tolist())

X_train, X_test, Y_train, Y_test = train_test_split(X, classification_df['genreSingle'], test_size = 0.2, random_state=42)

model = svm.SVC(C=1, kernel='poly', gamma='auto').fit(X_train, Y_train)
predicted = model.predict(X_test)

scores = cross_validate(model, X, Y, cv=kfold, scoring=scoring)

precision_avg = scores['test_precision'].mean()
recall_avg = scores['test_recall'].mean()
f1_score_avg = scores['test_f1_score'].mean()
accuracy_avg = scores['test_accuracy'].mean()

print("Precision (macro average):", precision_avg)
print("Recall (macro average):", recall_avg)
print("F1-score (macro average):", f1_score_avg)
print("Accuracy:", accuracy_avg)

Precision (macro average): 0.23850000000000002
Recall (macro average): 0.23850000000000002
F1-score (macro average): 0.23850000000000002
Accuracy: 0.23850000000000002


\begin{array}{|c|c|c|c|c|c|}
    \hline
    df size & average & precision & recall & f1-score & accuracy & C & kernel & gamma\\
    \hline
    10000 & 'weighted' & 0.6792768753852335 & 0.6678000000000001 & 0.6522683797259655 & 0.6678000000000001 & 1.0 & 'rbf' & 'scale'\\
    \hline
    10000 & 'macro' & 0.6802518874619129 & 0.5480011940702822
    & 0.5771918774220145 & 0.6678000000000001 & 1.0 & 'rbf' & 'scale' \\
    \hline
    10000 & 'micro' & 0.6678000000000001 & 0.6678000000000001
    & 0.6678000000000001 & 0.6678000000000001 & 1.0 & 'rbf' & 'scale' \\
    \hline
    10000 & 'micro' & 0.6604000000000001 & 0.6604000000000001
    & 0.6604000000000001 & 0.6604000000000001 & 0.75 & 'rbf' & 'scale' \\
    \hline
    10000 & 'macro' & 0.6751716801177108 & 0.5316729158471419
    & 0.5649371966852927 & 0.6622 & 0.75 & 'rbf' & 'scale' \\
    \hline
    10000 & 'weighted' & 0.6751551578609265 & 0.6622
    & 0.6443373258912279 & 0.6622 & 0.75 & 'rbf' & 'scale' \\
    \hline
    10000 & 'weighted' & 0.6662915959345186 & 0.6504000000000001
    & 0.6288118114319659 & 0.6504000000000001 & 0.5 & 'rbf' & 'scale' \\
    \hline
    10000 & 'macro' & 0.6514537439985582 & 0.5072699986293276
    & 0.5429834320856206 & 0.6504000000000001 & 0.5 & 'rbf' & 'scale' \\
    \hline
    10000 & 'micro' & 0.6504000000000001 & 0.6504000000000001
    & 0.6504000000000001 & 0.6504000000000001 & 0.5 & 'rbf' & 'scale' \\
    \hline
    10000 & 'micro' & 0.6623000000000001 & 0.6623000000000001
    & 0.6623000000000001 & 0.6623000000000001 & 1.0 & 'linear' & 'scale' \\
    \hline
    10000 & 'macro' & 0.6432803569752201 & 0.5545233959751643
    & 0.5761151198228849 & 0.6623000000000001 & 1.0 & 'linear' & 'scale' \\
    \hline
    10000 & 'weighted' & 0.6613705693951196 & 0.6623000000000001
    & 0.648813333483055 & 0.6623000000000001 & 1.0 & 'linear' & 'scale' \\
    \hline
    10000 & 'weighted' & 0.6813601550398586 & 0.6712
    & 0.6559179816836453 & 0.6712 & 1.0 & 'poly' & 'scale' \\
    \hline
    10000 & 'macro' & 0.6813601550398586 & 0.6712
    & 0.6559179816836453 & 0.6712 & 1.0 & 'poly' & 'scale' \\
    \hline
    10000 & 'micro' & 0.6712 & 0.6712
    & 0.6712 & 0.6712 & 1.0 & 'poly' & 'scale' \\
    \hline
    10000 & 'macro' & 0.23850000000000002 & 0.23850000000000002
    & 0.23850000000000002 & 0.23850000000000002 & 1.0 & 'poly' & 'auto' \\
    \hline
\end{array}

###Random Forests

In [None]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(max_depth=2, random_state=42).fit(X_train, Y_train)
predicted = model.predict(X_test)

scores = cross_validate(model, X, Y, cv=kfold, scoring=scoring)

precision_avg = scores['test_precision'].mean()
recall_avg = scores['test_recall'].mean()
f1_score_avg = scores['test_f1_score'].mean()
accuracy_avg = scores['test_accuracy'].mean()

print("Precision (macro average):", precision_avg)
print("Recall (macro average):", recall_avg)
print("F1-score (macro average):", f1_score_avg)
print("Accuracy:", accuracy_avg)

Precision (macro average): 0.1413949130687595
Recall (macro average): 0.1808072001832111
F1-score (macro average): 0.12181192726852456
Accuracy: 0.41059999999999997


\begin{array}{|c|c|c|c|c|c|}
    \hline
    df size & average & precision & recall & f1-score & accuracy & max depth\\
    \hline
    10000 & 'macro' & 0.13771582222147544 & 0.17914135482818644 & 0.11823657431961032 &0.4088 &2\\
    \hline
    10000 & 'weighted' &  0.26671859036419165 & 0.4088 & 0.26626648742766806 &0.4088 &2\\
    \hline
    10000 & 'macro' & 0.6670470797699398 & 0.3358458169106126 & 0.35071046743569967 &0.5408999999999999 & 10\\
    \hline
    10000 & 'weighted' & 0.6348077201508254 &  0.5408999999999999 & 0.47732582453726813 &0.5408999999999999 & 10\\
    \hline
    10000 & 'macro' & 0.6667965072594207 & 0.3901888622877353 & 0.4234875195464142 &0.5742 & 50\\
    \hline
    10000 & 'weighted' & 0.6368988799830719 & 0.5742 & 0.5295475319899082 &0.5742 & 50\\
    \hline
\end{array}

##Bonus

In [None]:
import os
import csv
import time
import urllib.request

In [None]:
DIR_PATH = "drive/MyDrive/"

def get_books_cover(DIR_PATH, books, bookIds):
        """
        Retrieves books covers to a img/ directory
        Will work on existing books class attribute, so a GoodReads list should be scraped or a books list loaded
        (csv_to_books) before use.
        :return: None
        """
        img_dir = "img"
        check_folder = os.path.isdir(img_dir)

        # If folder doesn't exist, then create it.
        if not check_folder:
            os.makedirs( DIR_PATH + img_dir, exist_ok=True)
            print("Creating folder: ", img_dir)

        else:
            print(img_dir, "folder already exists, saving images to folder.")

        counter = 0
        # Download covers
        for (book, bookId) in zip(books, bookIds):
                urllib.request.urlretrieve(
                     book, DIR_PATH + "img/" + bookId  + ".jpg"
                )
                # Set a respectful wait time
                time.sleep(2)
                if counter == 700:
                  break
                counter += 1

In [None]:
file = DIR_PATH + "books_1.Best_Books_Ever.csv"

finalbooks = pd.read_csv(file)

finalbooks['coverImg'].head()

print(os.getcwd())

In [None]:
directory = DIR_PATH + 'img/'  # Specify the directory path

file_count = len([name for name in os.listdir(directory) if os.path.isfile(os.path.join(directory, name))])

print(f"Number of files in '{directory}': {file_count}")

In [None]:
columns_to_dropna = ['coverImg', 'bookId']
finalbooks = finalbooks.dropna(subset=columns_to_dropna)

images = finalbooks['coverImg']
Ids = finalbooks['bookId']

images = images[:700]
Ids = Ids[:700]

if file_count != 701:
  get_books_cover(DIR_PATH, images, Ids)

####Βήμα 2

In [None]:
import cv2 as cv

folder_path = directory

# Αρχικοποίηση του DataFrame
histogram_df = pd.DataFrame(columns=['id', 'histogram_b', 'histogram_g', 'histogram_r', 'histogram'])

# Παράμετροι για τον υπολογισμό του ιστογράμματος
channels = [0, 1, 2]
bins = 32
hist_range = [0, 256]
id = 0
counter = 0
test_images = []

# Iterate over each file in the folder
for filename in os.listdir(folder_path):
    # Check if the file is an image file (you can modify this condition based on your specific requirements)
    if filename.endswith('.jpg') or filename.endswith('.png'):
      image_path = os.path.join(folder_path, filename)
      if counter < 650:
        image = cv.imread(image_path)

        hist_b = cv.calcHist([image], [channels[0]], None, [bins], hist_range)
        hist_g = cv.calcHist([image], [channels[1]], None, [bins], hist_range)
        hist_r = cv.calcHist([image], [channels[2]], None, [bins], hist_range)

        hist_vector = np.concatenate((hist_b, hist_g, hist_r)).flatten()

        histogram_df.loc[id] = [id, hist_vector[0:bins], hist_vector[bins:2*bins], hist_vector[2*bins:], hist_vector]
      else:
        test_images.append(image_path)

      id += 1
      counter += 1

histogram_df['id'] = Ids[:649]
histogram_df

###Βήμα 3

In [None]:
image_path = test_images[0]

image = cv.imread(image_path)

hist_b = cv.calcHist([image], [channels[0]], None, [bins], hist_range)
hist_g = cv.calcHist([image], [channels[1]], None, [bins], hist_range)
hist_r = cv.calcHist([image], [channels[2]], None, [bins], hist_range)

hist_vector = np.concatenate((hist_b, hist_g, hist_r)).flatten()

test_vector = hist_vector

###Βήμα 4

In [None]:
from scipy.spatial.distance import euclidean
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt

In [None]:
def calculate_similarity(hist1, hist2, metric):
    if metric == 'euclidean':
        return euclidean(hist1, hist2)
    elif metric == 'cosine':
        return 1 - cosine_similarity(hist1.reshape(1, -1), hist2.reshape(1, -1))
    else:
        raise ValueError('Invalid metric. Choose either "euclidean" or "cosine".')


In [None]:
def find_closest_images(query_hist, histograms, metric, num_images=4):
    similarities = []
    for hist in histograms:
        similarity = calculate_similarity(query_hist, hist, metric)
        similarities.append(similarity)
    indices = np.argsort(similarities)[:num_images]
    return indices


###Try it on test image

In [None]:
query_hist = test_vector

closest_indices = find_closest_images(query_hist, histogram_df['histogram'], 'euclidean', num_images=4)

In [None]:
plt.figure(figsize=(12, 6))

test = test_images[0]

print("Closest images to : " + test)

if os.path.exists(test):
      image = cv.imread(test)
      if image is not None:
          image_rgb = cv.cvtColor(image, cv.COLOR_BGR2RGB)
          plt.subplot(1, 4, 4)
          plt.imshow(image_rgb)
          plt.axis('off')
plt.show()

for i, index in enumerate(closest_indices):
    image_path = os.path.join(folder_path, f"{histogram_df.loc[index, 'id']}.jpg")

    # Load and display the image if it exists
    if os.path.exists(image_path):
        image = cv.imread(image_path)
        if image is not None:
            image_rgb = cv.cvtColor(image, cv.COLOR_BGR2RGB)
            plt.subplot(1, 4, i + 1)
            plt.imshow(image_rgb)
            plt.axis('off')
        else:
            print(f"Error loading image: {image_path}")
    else:
        print(f"Image file not found: {image_path}")
plt.show()

### For all test images

In [None]:
def transform_img_to_hist(image_path):
  image = cv.imread(image_path)

  hist_b = cv.calcHist([image], [channels[0]], None, [bins], hist_range)
  hist_g = cv.calcHist([image], [channels[1]], None, [bins], hist_range)
  hist_r = cv.calcHist([image], [channels[2]], None, [bins], hist_range)

  hist_vector = np.concatenate((hist_b, hist_g, hist_r)).flatten()

  return hist_vector

In [None]:
def plot_image(filepath):
  plt.figure(figsize=(12, 6))

  if os.path.exists(filepath):
        image = cv.imread(filepath)
        if image is not None:
            image_rgb = cv.cvtColor(image, cv.COLOR_BGR2RGB)
            plt.subplot(1, 4, i + 1)
            plt.imshow(image_rgb)
            plt.axis('off')
  plt.show()

In [None]:
def plot_indices(hist, metric):
  closest_indices = find_closest_images(hist, histogram_df['histogram'], metric, num_images=4)

  plt.figure(figsize=(12, 6))

  for i, index in enumerate(closest_indices):
    image_path = os.path.join(folder_path, f"{histogram_df.loc[index, 'id']}.jpg")

    # Load and display the image if it exists
    if os.path.exists(image_path):
        image = cv.imread(image_path)
        if image is not None:
            image_rgb = cv.cvtColor(image, cv.COLOR_BGR2RGB)
            plt.subplot(1, 4, i + 1)
            plt.imshow(image_rgb)
            plt.axis('off')
        else:
            print(f"Error loading image: {image_path}")
    else:
        print(f"Image file not found: {image_path}")
plt.show()

In [None]:
counter = 1
for image in test_images:
  if counter == 6:
    break

  histogram = transform_img_to_hist(image)

  plot_image(image)

  plot_indices(histogram, 'euclidean')

  counter+=1