<div align="center"><h3>HW2, Question 6</h3></div>
<div align="center"><h5>Mohammadreza Ghofrani, 400131076</h5></div>

In [None]:
import os
import glob
import warnings
from itertools import combinations
import cv2
import scipy
import numpy as np
import pandas as pd
import imageio as iio
from sklearn import metrics
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.graph_objects as go

EPSILON = 0.001
pd.options.plotting.backend = "plotly"

# Question 6

## Part A

In [None]:
number_of_zero_pixels = 0
total_number_of_pixels = 0
for img_path in glob.glob('data/P6/pratheepan/train/set1/groundtruth/*.png'):
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    number_of_zero_pixels += np.count_nonzero(img==0)
    total_number_of_pixels += (img.shape[0] * img.shape[1])

prior_prob = {'skin': -1, 'non_skin': -1}
prior_prob['skin'] = number_of_zero_pixels / total_number_of_pixels
prior_prob['non_skin'] = 1 - prior_prob['skin']
print(prior_prob)

## Part B

In [None]:
skin_pixels_list = []
non_skin_pixels_list = []
for groundtruth_img_path, img_path in \
    zip(sorted(glob.glob('data/P6/pratheepan/train/set1/groundtruth/**')), \
        sorted(glob.glob('data/P6/pratheepan/train/set1/images/**'))):
    groundtruth_img = cv2.cvtColor(cv2.imread(groundtruth_img_path), cv2.COLOR_BGR2GRAY)
    img = cv2.imread(img_path)

    for i, _ in enumerate(groundtruth_img):
        for j, mask in enumerate(groundtruth_img[i]):
            if mask == 0:
                non_skin_pixels_list.append(img[i][j])
            else:
                skin_pixels_list.append(img[i][j])

theta = {'skin': {'mean': 0, 'cov': 0}, 'non_skin': {'mean': 0, 'cov': 0}}
skin_pixel_df = pd.DataFrame(np.array(skin_pixels_list).astype(int))
theta['skin']['mean'] = skin_pixel_df.mean().to_numpy()
theta['skin']['cov'] = skin_pixel_df.cov().to_numpy()
non_skin_pixels_df = pd.DataFrame(np.array(non_skin_pixels_list).astype(int))
theta['non_skin']['mean'] = non_skin_pixels_df.mean().to_numpy()
theta['non_skin']['cov'] = non_skin_pixels_df.cov().to_numpy()

print('For skin pixels,')
print('mean:\n', theta['skin']['mean'])
print('covarince: \n', theta['skin']['cov'])
print('For non skin pixels,')
print('mean:\n', theta['non_skin']['mean'])
print('covarince: \n', theta['non_skin']['cov'])

cond_prob = {'skin': None, 'non_skin': None}
cond_prob['skin'] = scipy.stats.multivariate_normal(mean=theta['skin']['mean'], 
                                                    cov=theta['skin']['cov'])
cond_prob['non_skin'] = scipy.stats.multivariate_normal(mean=theta['non_skin']['mean'], 
                                                        cov=theta['non_skin']['cov'])

## Part C

In [None]:
for trump_tie_title in ['trump_tie_1.jpg', 'trump_tie_2.jpg']:
    trump_tie = cv2.imread(f'data/P6/{trump_tie_title}')
    trump_tie_predicted = np.zeros((trump_tie.shape[0], trump_tie.shape[1]))
    for i, _ in enumerate(trump_tie):
        for j, pixel in enumerate(trump_tie[i]):
            skin_likelihood = prior_prob['skin'] * cond_prob['skin'].pdf(pixel) * EPSILON
            non_skin_likelihood = prior_prob['non_skin'] * cond_prob['non_skin'].pdf(pixel) * EPSILON
            trump_tie_predicted[i][j] = int(skin_likelihood > non_skin_likelihood)
    fig = px.imshow(trump_tie_predicted, color_continuous_scale='gray',
                    title=f'Skin Detection in {trump_tie_title} with Bayes Classifier')
    fig.update_layout(title_x=0.5)
    fig.show()

## Part D

In [None]:
for trump_tie_title in ['trump_tie_1.jpg', 'trump_tie_2.jpg']:
    trump_tie = cv2.imread(f'data/P6/{trump_tie_title}')
    trump_tie_predicted = np.zeros((trump_tie.shape[0], trump_tie.shape[1]))
    for i, _ in enumerate(trump_tie):
        for j, pixel in enumerate(trump_tie[i]):
            distance_from_skin_class = np.sqrt(np.sum((pixel - theta['skin']['mean'])**2))
            distance_from_non_skin_class = np.sqrt(np.sum((pixel - theta['non_skin']['mean'])**2))
            trump_tie_predicted[i][j] = int(distance_from_skin_class < distance_from_non_skin_class)
    fig = px.imshow(trump_tie_predicted, color_continuous_scale='gray',
                    title=f'Skin Detection in {trump_tie_title} with MDC Classifier')
    fig.update_layout(title_x=0.5)
    fig.show()

## Part E

In [None]:
true_class = list()
predicted_class = list()
for groundtruth_img_path, img_path in \
    zip(sorted(glob.glob('data/P6/pratheepan/test/groundtruth/**')), \
        sorted(glob.glob('data/P6/pratheepan/test/images/**'))):
    groundtruth_img = cv2.cvtColor(cv2.imread(groundtruth_img_path), cv2.COLOR_BGR2GRAY)
    img = cv2.imread(img_path)

    for i, _ in enumerate(img):
        for j, pixel in enumerate(img[i]):
            skin_likelihood = prior_prob['skin'] * cond_prob['skin'].pdf(pixel) * EPSILON
            non_skin_likelihood = prior_prob['non_skin'] * cond_prob['non_skin'].pdf(pixel) * EPSILON
            predicted_class.append(int(skin_likelihood > non_skin_likelihood))
            true_class.append(groundtruth_img[i][j]//255)

error = 0
for predicted, true in zip(predicted_class, true_class):
    if predicted != true:
        error += 1
print('Error Rate is ', (error/len(predicted_class)))

## Part F

In [None]:
pd.DataFrame(metrics.confusion_matrix(true_class, predicted_class))

## Part G

In [None]:
def bayes_error(r, g, b):
    x = np.array([r,g,b])
    skin_likelihood = prior_prob['skin'] * cond_prob['skin'].pdf(x)
    non_skin_likelihood = prior_prob['non_skin'] * cond_prob['non_skin'].pdf(x)
    return min(skin_likelihood, non_skin_likelihood)

In [None]:
# scipy.integrate.nquad(bayes_error, [[0, 255], [0, 255], [0,255]])

In [None]:
r = np.arange(0, 255)
g = np.arange(0, 255)
b = np.arange(0, 255)
error = 0
for ri in r:
    for gi in g:
        for bi in b:
            error += bayes_error(ri, gi, bi) * EPSILON
print(error)

## Part H

In [None]:
true_class = list()
predicted_score = list()
for groundtruth_img_path, img_path in \
    zip(sorted(glob.glob('data/P6/pratheepan/test/groundtruth/**')), \
        sorted(glob.glob('data/P6/pratheepan/test/images/**'))):
    groundtruth_img = cv2.cvtColor(cv2.imread(groundtruth_img_path), cv2.COLOR_BGR2GRAY)
    img = cv2.imread(img_path)

    for i, _ in enumerate(img):
        for j, pixel in enumerate(img[i]):
            skin_likelihood = prior_prob['skin'] * cond_prob['skin'].pdf(pixel)
            predicted_score.append(skin_likelihood)
            true_class.append(groundtruth_img[i][j]//255)

fpr, tpr, threshold = metrics.roc_curve(true_class, predicted_score)
roc_auc = metrics.auc(fpr, tpr)

fig = px.area(
    x=fpr, y=tpr,
    title=f'ROC Curve (AUC={metrics.auc(fpr, tpr):.4f})',
    labels=dict(x='False Positive Rate', y='True Positive Rate'),
    width=700, height=500
)
fig.add_shape(
    type='line', line=dict(dash='dash'),
    x0=0, x1=1, y0=0, y1=1
)

fig.update_layout({'title':{'x':0.5}})
fig.update_yaxes(scaleanchor="x", scaleratio=1)
fig.update_xaxes(constrain='domain')
fig.show()

## Part I

### Part A

In [None]:
number_of_zero_pixels = 0
total_number_of_pixels = 0
for img_path in glob.glob('data/P6/pratheepan/train/set1/groundtruth/**') + \
                glob.glob('data/P6/pratheepan/train/set2/groundtruth/**'):
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    number_of_zero_pixels += np.count_nonzero(img==0)
    total_number_of_pixels += (img.shape[0] * img.shape[1])

prior_prob = {'skin': -1, 'non_skin': -1}
prior_prob['skin'] = number_of_zero_pixels / total_number_of_pixels
prior_prob['non_skin'] = 1 - prior_prob['skin']
print(prior_prob)

### Part B

In [None]:
skin_pixels_list = []
non_skin_pixels_list = []
for groundtruth_img_path, img_path in \
    zip(sorted(glob.glob('data/P6/pratheepan/train/set1/groundtruth/**')), \
        sorted(glob.glob('data/P6/pratheepan/train/set1/images/**'))):
    groundtruth_img = cv2.cvtColor(cv2.imread(groundtruth_img_path), cv2.COLOR_BGR2GRAY)
    img = cv2.imread(img_path)

    for i, _ in enumerate(groundtruth_img):
        for j, mask in enumerate(groundtruth_img[i]):
            if mask == 0:
                non_skin_pixels_list.append(img[i][j])
            else:
                skin_pixels_list.append(img[i][j])

for groundtruth_img_path, img_path in \
    zip(sorted(glob.glob('data/P6/pratheepan/train/set2/groundtruth/**')), \
        sorted(glob.glob('data/P6/pratheepan/train/set2/images/**'))):
    groundtruth_img = cv2.cvtColor(cv2.imread(groundtruth_img_path), cv2.COLOR_BGR2GRAY)
    img = cv2.imread(img_path)

    for i, _ in enumerate(groundtruth_img):
        for j, mask in enumerate(groundtruth_img[i]):
            if mask == 0:
                non_skin_pixels_list.append(img[i][j])
            else:
                skin_pixels_list.append(img[i][j])

theta = {'skin': {'mean': 0, 'cov': 0}, 'non_skin': {'mean': 0, 'cov': 0}}
skin_pixel_df = pd.DataFrame(np.array(skin_pixels_list).astype(int))
theta['skin']['mean'] = skin_pixel_df.mean().to_numpy()
theta['skin']['cov'] = skin_pixel_df.cov().to_numpy()
non_skin_pixels_df = pd.DataFrame(np.array(non_skin_pixels_list).astype(int))
theta['non_skin']['mean'] = non_skin_pixels_df.mean().to_numpy()
theta['non_skin']['cov'] = non_skin_pixels_df.cov().to_numpy()

print('For skin pixels,')
print('mean:\n', theta['skin']['mean'])
print('covarince: \n', theta['skin']['cov'])
print('For non skin pixels,')
print('mean:\n', theta['non_skin']['mean'])
print('covarince: \n', theta['non_skin']['cov'])

cond_prob = {'skin': None, 'non_skin': None}
cond_prob['skin'] = scipy.stats.multivariate_normal(mean=theta['skin']['mean'], 
                                                    cov=theta['skin']['cov'])
cond_prob['non_skin'] = scipy.stats.multivariate_normal(mean=theta['non_skin']['mean'], 
                                                        cov=theta['non_skin']['cov'])

### Part C

In [None]:
for trump_tie_title in ['trump_tie_1.jpg', 'trump_tie_2.jpg']:
    trump_tie = cv2.imread(f'data/P6/{trump_tie_title}')
    trump_tie_predicted = np.zeros((trump_tie.shape[0], trump_tie.shape[1]))
    for i, _ in enumerate(trump_tie):
        for j, pixel in enumerate(trump_tie[i]):
            skin_likelihood = prior_prob['skin'] * cond_prob['skin'].pdf(pixel) * EPSILON
            non_skin_likelihood = prior_prob['non_skin'] * cond_prob['non_skin'].pdf(pixel) * EPSILON
            trump_tie_predicted[i][j] = int(skin_likelihood > non_skin_likelihood)
    fig = px.imshow(trump_tie_predicted, color_continuous_scale='gray',
                    title=f'Skin Detection in {trump_tie_title} with Bayes Classifier')
    fig.update_layout(title_x=0.5)
    fig.show()

### Part D

In [None]:
for trump_tie_title in ['trump_tie_1.jpg', 'trump_tie_2.jpg']:
    trump_tie = cv2.imread(f'data/P6/{trump_tie_title}')
    trump_tie_predicted = np.zeros((trump_tie.shape[0], trump_tie.shape[1]))
    for i, _ in enumerate(trump_tie):
        for j, pixel in enumerate(trump_tie[i]):
            distance_from_skin_class = np.sum((pixel - theta['skin']['mean'])**2)
            distance_from_non_skin_class = np.sum((pixel - theta['non_skin']['mean'])**2)
            trump_tie_predicted[i][j] = int(distance_from_skin_class < distance_from_non_skin_class)
    fig = px.imshow(trump_tie_predicted, color_continuous_scale='gray',
                    title=f'Skin Detection in {trump_tie_title} with MDC Classifier')
    fig.update_layout(title_x=0.5)
    fig.show()

### Part E

In [None]:
true_class = list()
predicted_class = list()
for groundtruth_img_path, img_path in \
    zip(sorted(glob.glob('data/P6/pratheepan/test/groundtruth/**')), \
        sorted(glob.glob('data/P6/pratheepan/test/images/**'))):
    groundtruth_img = cv2.cvtColor(cv2.imread(groundtruth_img_path), cv2.COLOR_BGR2GRAY)
    img = cv2.imread(img_path)

    for i, _ in enumerate(img):
        for j, pixel in enumerate(img[i]):
            skin_likelihood = prior_prob['skin'] * cond_prob['skin'].pdf(pixel) * EPSILON
            non_skin_likelihood = prior_prob['non_skin'] * cond_prob['non_skin'].pdf(pixel) * EPSILON
            predicted_class.append(int(skin_likelihood > non_skin_likelihood))
            true_class.append(groundtruth_img[i][j]//255)

error = 0
for predicted, true in zip(predicted_class, true_class):
    if predicted != true:
        error += 1
print('Error Rate is ', (error/len(predicted_class)))

### Part F

In [None]:
pd.DataFrame(metrics.confusion_matrix(true_class, predicted_class))

### Part G

In [None]:
def bayes_error(r, g, b):
    x = np.array([r,g,b])
    skin_likelihood = prior_prob['skin'] * cond_prob['skin'].pdf(x)
    non_skin_likelihood = prior_prob['non_skin'] * cond_prob['non_skin'].pdf(x)
    return min(skin_likelihood, non_skin_likelihood)

In [None]:
# scipy.integrate.nquad(bayes_error, [[0, 255], [0, 255], [0,255]])

In [None]:
r = np.arange(0, 255)
g = np.arange(0, 255)
b = np.arange(0, 255)
error = 0
for ri in r:
    for gi in g:
        for bi in b:
            error += bayes_error(ri, gi, bi) * EPSILON
print(error)

### Part H

In [None]:
true_class = list()
predicted_score = list()
for groundtruth_img_path, img_path in \
    zip(sorted(glob.glob('data/P6/pratheepan/test/groundtruth/**')), \
        sorted(glob.glob('data/P6/pratheepan/test/images/**'))):
    groundtruth_img = cv2.cvtColor(cv2.imread(groundtruth_img_path), cv2.COLOR_BGR2GRAY)
    img = cv2.imread(img_path)

    for i, _ in enumerate(img):
        for j, pixel in enumerate(img[i]):
            skin_likelihood = prior_prob['skin'] * cond_prob['skin'].pdf(pixel)
            predicted_score.append(skin_likelihood)
            true_class.append(groundtruth_img[i][j]//255)

fpr, tpr, threshold = metrics.roc_curve(true_class, predicted_score)
roc_auc = metrics.auc(fpr, tpr)

fig = px.area(
    x=fpr, y=tpr,
    title=f'ROC Curve (AUC={metrics.auc(fpr, tpr):.4f})',
    labels=dict(x='False Positive Rate', y='True Positive Rate'),
    width=700, height=500
)
fig.add_shape(
    type='line', line=dict(dash='dash'),
    x0=0, x1=1, y0=0, y1=1
)

fig.update_layout({'title':{'x':0.5}})
fig.update_yaxes(scaleanchor="x", scaleratio=1)
fig.update_xaxes(constrain='domain')
fig.show()