In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
sns.set_style('darkgrid')

from PIL import Image, ImageDraw
import tensorflow as tf

import os
import ast
import sys
import time

import warnings
warnings.filterwarnings('ignore')

import greatbarrierreef


In [None]:
# data imports
DATA_PATH = '/kaggle/input/tensorflow-great-barrier-reef'
images_path = os.path.join(DATA_PATH,'train_images')
df_test = pd.read_csv("../input/tensorflow-great-barrier-reef/test.csv")
df_train = pd.read_csv("../input/tensorflow-great-barrier-reef/train.csv")
sample_submission = pd.read_csv("../input/tensorflow-great-barrier-reef/example_sample_submission.csv")
example = np.load("../input/tensorflow-great-barrier-reef/example_test.npy")

In [None]:
df_train['img_path'] = os.path.join('../input/tensorflow-great-barrier-reef/train_images')+"/video_"+df_train.video_id.astype(str)+"/"+df_train.video_frame.astype(str)+".jpg"
df_train['annotations'] = df_train['annotations'].apply(lambda x: ast.literal_eval(x))
df_train['Number_bbox'] = df_train['annotations'].apply(lambda x:len(x)) 

In [None]:
def bbox_areas(annotations):
    if not annotations:
        return [0]
    area_list = []
    for annotation in annotations:
        area_list.append(annotation['width']*annotation['height'])
    return area_list
df_train["bbox_area"] = df_train["annotations"].apply(bbox_areas)
df_train["max_area"] = df_train["bbox_area"].apply(lambda x : max(x))
df_train["min_area"] = df_train["bbox_area"].apply(lambda x : min(x))
df_train.head()


In [None]:
def img_viz(df_train, id):
    image = df_train['img_path'][id]
    img = Image.open(image)
    
    for box in df_train['annotations'][id]:
        shape = [box['x'], box['y'], box['x']+box['width'], box['y']+box['height']]
        ImageDraw.Draw(img).rectangle(shape, outline ="red", width=3)
    display(img)
df_train.sort_values("max_area", ascending=False).head()


In [None]:
img_viz(df_train, 7336)

In [None]:
import collections
import tqdm


all_boxes_area = []

for index, row in df_train.iterrows():
    all_boxes_area.extend(row['bbox_area'])
        
ax_h_w = (2,1)
# ratio = 6
fig1 = plt.figure(figsize=(24, 18))
ax1 = fig1.add_subplot(*ax_h_w, 1)
ax1.hist(x=all_boxes_area, bins=50)
ax2 = fig1.add_subplot(*ax_h_w, 2)
ax2.hist(x=all_boxes_area, bins=50, log=True)
plt.show()


In [None]:
df_train['video_id'].value_counts()

In [None]:
from PIL import Image

def video_stats(path):
    # Lookfor files within video folder
    onlyfiles = [f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))]
    # Filter files by extension
    onlyfiles = [f for f in onlyfiles if f.endswith(".jpg")]
    im = Image.open(os.path.join(path,onlyfiles[0]))
    width, height = im.size
    print(f'Number of frames: {len(onlyfiles)}')
    print(f'Frames with size (w,h): ({width},{height})')



In [None]:
print('Video 0 Stats:')
video_stats(os.path.join(images_path,'video_0'))

print("\n",'Video 1 Stats:')
video_stats(os.path.join(images_path,'video_1'))

print("\n",'Video 2 Stats:')
video_stats(os.path.join(images_path,'video_2'))


In [None]:
df_annotations_count = df_train.groupby('Number_bbox')['annotations'].count()
df_annotations_count = df_annotations_count.drop([0]);
fig = px.bar(df_annotations_count)
fig.update_layout(xaxis=dict(type='category'), showlegend=False)
fig.show()