### Copyright-protected material, all rights reserved. (c) University of Vienna.
_Copyright Notice of the corresponding course at Moodle applies. <br> Only to be used in the MRE course._

# MRE Assignment 1 - Digital Image Processing

In this assignment you will use Python (using Pillow or OpenCV) to load, transcode and store images. You will also use the libraries to extract some basic metadata from the images and store them in a data structure for easy access.

In this notebook, you will implement your solution. This notebook will be imported into the "*_def.ipynb" notebook.

Of course you can include code for testing your implementation in this implementation notebook, but code for testing and output generated for testing is not going to be assessed.

Of course, your code for the solutions in this notebook will be inspected and is subject to grading.


## Setup

For general installation instructions, please refer to the ressources given for all the assignments [in Moodle](https://moodle.univie.ac.at/course/view.php?id=260637#section-13).

If the cell below executes without error, you can start the assignment!

In [None]:
# -------- Imports --------
# Please do not change the contents of this cell!

# In case you work in a local environment on your own machine,
# how to install the required packages:
#   PIP:     pip install <name> / pip install --upgrade <name>
#   CONDA:   conda install -c conda-forge <name>  /  conda install -c anaconda <name>


# Imports required by us.
import cv2                             # opencv-python
from PIL import Image                  # pillow
from PIL.ExifTags import TAGS
from matplotlib import pyplot as plt   # matplotlib
from sklearn.metrics.pairwise import cosine_similarity  # scikit-learn
import numpy as np                     # numpy
import pandas as pd                    # pandas
from IPython.display import display    # packaged with python

# This directive will allow matplotlib to render interactive plots in the notebook.
%matplotlib widget

In the cells below, place your own imports, global variables, (helper) functions and classes. Feel free to add cells here as you see fit.

In [None]:
# Please place your own imports here.
import os
from PIL import ImageFilter
from PIL import ImageDraw

from numpy.linalg import norm

In [None]:
# Place any helper functions, global variables and classes here.

## Task 1.1: Image formats transformation and adding filters


In [None]:
# Write your function here.

# Transcodes images to JPEG format of adjustable quality (for JPEG).
def JPEGImageConverter(inputImg: str, outputDir: str, quality: float = 1.0) -> None:
    os.makedirs(outputDir, exist_ok=True)   # make output directory if not existant
    
    if os.path.isdir(inputImg):
        for filename in os.listdir(inputImg):
            filepath = os.path.join(inputImg, filename)
            
            fileN, fileExtension = os.path.splitext(filename)
            if os.path.isfile(filepath) and fileExtension.upper() in ['.JPG', '.JPEG', '.BMP', '.PNG', '.GIF']: #just take these extensions
                convertFile(filepath, outputDir, quality)
    else:
        convertFile(inputImg, outputDir, quality)

def convertFile(inputImg: str, outputDir: str, quality: float = 1.0):
    with Image.open(inputImg) as image:
        originalName = os.path.basename(inputImg)
        fileN, fileExtension = os.path.splitext(originalName)
        
        if quality == 1.0:
            outputName = f"{originalName}.jpg"
        else:
            outputName = f"{originalName}-{quality}.jpg" 
            
        outputPath = os.path.join(outputDir, outputName)
                
        image.convert("RGB").save(outputPath, format="JPEG", quality=int(quality * 100))

    

In [None]:
# Test your function here.
#JPEGImageConverter("images/jawa-1.jpg", "./output_dir", 0.77)

In [None]:
# Add Blur filter
def BlurImage(inputImg: str) -> None:
    with Image.open(inputImg) as image:
        originalName = os.path.basename(inputImg)
        fileN, fileExtension = os.path.splitext(originalName)
        outputName = f"{originalName}-blurred.jpg"
        outputPath = os.path.join("./output_dir", outputName)
        
        blurred = image.filter(filter=ImageFilter.BLUR)
        blurred.convert("RGB").save(outputPath, format="JPEG", quality=int(100))

In [None]:
# Test your function here.
#BlurImage("images/zebra3m.jpg")

## Task 1.2:  Extract / Get Image Metadata

In [None]:
# Write your function here. 

# Extracts metadata from an image and places them into a Pandas DataFrame.
def ImageMetadataGenerator(inputImage: str) -> pd.DataFrame:
    df = pd.DataFrame(columns=['File', 'Width', 'Height', 'Components', 'Channels', 'BitsPerPixel', 'ColorSpaceType', 'Format', 'Pixels', 'HasAlphaChannel', 'SizeWithoutCompressionInKB'])
    
    listOfImagePaths = []
    
    if os.path.isdir(inputImage):
        for filename in os.listdir(inputImage):
            filepath = os.path.join(inputImage, filename)
            fileN, fileExtension = os.path.splitext(filename)
            if os.path.isfile(filepath) and fileExtension.upper() in ['.JPG', '.JPEG', '.BMP', '.PNG', '.GIF']: #just take these extensions
                listOfImagePaths.append(filepath)
    else:
        listOfImagePaths.append(inputImage)
    
    for imagePath in listOfImagePaths:
        #print(imagePath)
        with Image.open(imagePath) as image:
            hasAlphaChannel = False
            
            file = image.filename
            width = image.width
            height = image.height
            
            bands = image.getbands()
            numChannels = len(bands)
            numComponents = len(bands)
            if 'A' in bands:
                numComponents -= 1
                hasAlphaChannel = True
            
            bitsPerChannel = 8 #image.getchannel(0).bits # TODOOOOOOOOOOOOOO
            bitsPerPixel = bitsPerChannel * numChannels
            
            colorSpaceType = image.mode
            
            imageFormat = image.format
            pixels = width*height
            sizeWithoutCompressionInKB = ((pixels*bitsPerPixel)/8)/1024 # theoretical size without compression
            
            df = df.append({
                    'File': file,
                    'Width': width,
                    'Height': height,
                    'Components': numComponents,
                    'Channels': numChannels,
                    'BitsPerPixel': bitsPerPixel,
                    'ColorSpaceType': colorSpaceType,
                    'Format': imageFormat,
                    'Pixels': pixels,
                    'HasAlphaChannel': hasAlphaChannel,
                    'SizeWithoutCompressionInKB': sizeWithoutCompressionInKB
                }, ignore_index=True)
        
    return df

In [None]:
# Test your function here.
display(ImageMetadataGenerator("images/"))


with Image.open("images/b17barb026_dithered.gif") as image:
    #print(image.getextrema()[1])
    pass

## Task 1.3: Drawing circles around objects in an image

In [None]:
# Write your function here. 

# Identifies shapes in an images and draws circles around them.
def IdentifyObjects(inputImage: str) -> None:
    image = cv2.imread(inputImage)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (9, 9), 0)
    edges = cv2.Canny(blurred, 50, 150)
    contours, hierarchy = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    for contour in contours:
        if cv2.contourArea(contour) > 120:
            (x, y), radius = cv2.minEnclosingCircle(contour)
            center = (int(x), int(y))
            radius = int(radius)
            cv2.circle(image, center, radius, (0, 0, 255), 2)
    
    cv2.imshow("Identified objects", image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

In [None]:
# Test your function here.
#IdentifyObjects("images/Task1.3/partyballoons-1.jpg")
#IdentifyObjects("images/Task1.3/partyballoons-2.jpg")

## Task 1.4: Duplicate image finder using cosine similarity

In [None]:
# Calculates a histogram for one image.
def ImageHistogramGenerator(inputImage: str) -> np.array:
    with Image.open(inputImage) as image:
        width, height = image.size
        amountPixels = width*height
        histogramArray = image.histogram()
        relativeHistorgram = np.divide(histogramArray, amountPixels)
        return relativeHistorgram

In [None]:
# # Test your function here.
# histogram = ImageHistogramGenerator("images/ca-21.jpg")
# #Slicing rgb into 3 arrays
# hrgb = np.array_split(histogram, 3)

# plt.clf()
# plt.plot(hrgb[0])
# plt.plot(hrgb[1])
# plt.plot(hrgb[2])
# plt.show()

In [None]:
# Testing CosineSimilarity
# h1 = ImageHistogramGenerator("images/vie-2.jpg")
# h2 = ImageHistogramGenerator("images/b17barb026.jpg")
# cosine = np.dot(h1,h2)/(norm(h1)*norm(h2))

# print(cosine)

# plt.clf()
# plt.plot(h1)
# plt.show()
# print(norm(h2))

In [None]:
# Find duplicate images and return their similarity scores in a dataframe.
def FindDuplicateImages(inputDir1 : str, inputDir2: str, similarityThreshold: float = 1.0) -> pd.DataFrame:
    df = pd.DataFrame(columns=['inputDir1', 'inputDir2', 'similarityThreshold'])
    
    dir1FileList = getFilePathsInDirectory(inputDir1)
    dir2FileList = getFilePathsInDirectory(inputDir2)

    for filePath2 in dir2FileList:
        for filePath1 in dir1FileList:
            histogram1 = ImageHistogramGenerator(filePath1)
            histogram2 = ImageHistogramGenerator(filePath2)
            
            if len(histogram1) == len(histogram2): #if both have same vector size (grayscale and color not comparable)
                cosineSimilarity = np.dot(histogram1,histogram2)/(norm(histogram1)*norm(histogram2))
                
                if cosineSimilarity >= similarityThreshold:
                    df = df.append({
                            'inputDir1': os.path.basename(filePath1),
                            'inputDir2': os.path.basename(filePath2),
                            'similarityThreshold': cosineSimilarity
                        }, ignore_index=True)
            
    return df


def getFilePathsInDirectory(inputDir : str) -> list:
    filePathList = []
    if os.path.isdir(inputDir):
        for filename in os.listdir(inputDir):
            filepath = os.path.join(inputDir, filename)
            fileN, fileExtension = os.path.splitext(filename)
            if os.path.isfile(filepath) and fileExtension.upper() in ['.JPG', '.JPEG', '.BMP', '.PNG', '.GIF']: #just take these extensions
                filePathList.append(filepath)
    return filePathList
            
    

In [None]:
# Test your function here.
FindDuplicateImages("inputDir1/", "inputDir2/", 0.5)