## Face-mask-detection

In [None]:
!pip install xmltodict

In [None]:
import xmltodict
import pandas as pd
import numpy as np
import os
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
from pathlib import Path

plt.style.use(['seaborn-white'])

PATH = '../input/face-mask-detection/'
os.listdir(PATH)

In [None]:
ANNOTATIONS_PATH = os.path.join(PATH, "annotations")
IMAGE_PATH = os.path.join(PATH, "images")

In [None]:
data = []
sorted_annot_list = sorted(os.listdir(ANNOTATIONS_PATH), key=lambda x: int(x.split('.')[0][12:]))

for f_name in tqdm(sorted_annot_list):
    with open(os.path.join(ANNOTATIONS_PATH, f_name), 'r') as file:
        annot = xmltodict.parse(file.read())['annotation']
        if type(annot['object']) != list:
            annot['object'] = [annot['object']]
        data.append(annot)

data_df = pd.json_normalize(data)

In [None]:
data_df['num_of_object'] = data_df.object.map(len)

In [None]:
data_df

In [None]:
plt.figure(figsize=(20, 10))
sns.countplot(data_df.num_of_object.values)
plt.show()

In [None]:
data_df[data_df.num_of_object <= 15].num_of_object.sum()

In [None]:
data_df = data_df[data_df.num_of_object <= 15]

In [None]:
from itertools import chain

num_object_per_images = data_df.num_of_object

image_data = pd.DataFrame({
    'filename': np.repeat(data_df['filename'], num_object_per_images), 
    'segmented': np.repeat(data_df['segmented'], num_object_per_images), 
    'object': chain.from_iterable(data_df['object'].values), 
    'width': np.repeat(data_df['size.width'], num_object_per_images),
    'height': np.repeat(data_df['size.height'], num_object_per_images),
    'depth': np.repeat(data_df['size.depth'], num_object_per_images)
})
image_data.reset_index(inplace=True, drop=True)

In [None]:
objects = pd.DataFrame(image_data.object.map(dict).tolist())

data = pd.concat([image_data, objects], axis=1)
data.drop(columns='object', inplace=True)

In [None]:
data

In [None]:
data.to_csv('data.csv', index=False)

In [None]:
data = pd.read_csv('data.csv') # convert type

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
data['pose'].value_counts()

In [None]:
data.drop(columns=['segmented','depth','truncated','occluded','difficult', 'pose'], inplace=True)

In [None]:
data

In [None]:
data['name'].value_counts()

## face-mask-detection-dataset

In [None]:
PATH_2 = '../input/face-mask-detection-dataset/'
DATA_PATH_2 = PATH_2+"Medical mask/Medical mask/Medical Mask"
os.listdir(PATH_2)

In [None]:
ANNOTATIONS_PATH = os.path.join(DATA_PATH_2, "annotations")
IMAGE_PATH = os.path.join(DATA_PATH_2, "images")

In [None]:
data = pd.read_csv(os.path.join(PATH_2, "train.csv"))

In [None]:
data.name.unique().shape

In [None]:
data.classname.value_counts()

In [None]:
sample_per_class = data.groupby('classname').sample(3)

In [None]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

sample_per_class.shape

In [None]:
def read_image(f_name, IMAGE_PATH, max_size=224):
    img = cv2.imread(str(Path(IMAGE_PATH).joinpath(f_name)))
    
    ratio = img.shape[0] / img.shape[1]
    dim = (max_size, int(max_size / ratio)) if ratio > 1 else (int(max_size / ratio), max_size)
    img = cv2.resize(img, dim)
    return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

In [None]:
fig = make_subplots(20, 3, subplot_titles=list(sample_per_class.classname))

img_plots = list(map(lambda x: go.Image(z=read_image(x, IMAGE_PATH)), sample_per_class.name))

fig.add_traces(
    data = img_plots,
    rows = list(map(lambda x: x//3+1, range(0, 20*3))),
    cols = list(range(1, 4)) * 20
)

fig.update_layout(height=6000, showlegend=False)
fig.update_xaxes(showticklabels=False).update_yaxes(showticklabels=False)

fig.show()

In [None]:
data