In [None]:
import numpy as np
import pandas as pd 

In [None]:
import os
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm_notebook as tqdm
import shutil

In [None]:
ships = pd.read_csv("/kaggle/input/airbus-ship-detection/train_ship_segmentations_v2.csv")
test_data = pd.read_csv("/kaggle/input/airbus-ship-detection/sample_submission_v2.csv")

In [None]:
ships["Ship"] = ships["EncodedPixels"].map(lambda x:1 if isinstance(x,str) else 0)
ship_unique = ships[["ImageId","Ship"]].groupby("ImageId").agg({"Ship":"sum"}).reset_index()

In [None]:
def rle2bbox(rle, shape):
    
    a = np.fromiter(rle.split(), dtype=np.uint)
    a = a.reshape((-1, 2))
    a[:,0] -= 1
    
    y0 = a[:,0] % shape[0]
    y1 = y0 + a[:,1]
    if np.any(y1 > shape[0]):
        y0 = 0
        y1 = shape[0]
    else:
        y0 = np.min(y0)
        y1 = np.max(y1)
    
    x0 = a[:,0] // shape[0]
    x1 = (a[:,0] + a[:,1]) // shape[0]
    x0 = np.min(x0)
    x1 = np.max(x1)
    
    if x1 > shape[1]:
        raise ValueError("invalid RLE or image dimensions: x1=%d > shape[1]=%d" % (
            x1, shape[1]
        ))

    xc = (x0+x1)/(2*768)
    yc = (y0+y1)/(2*768)
    w = np.abs(x1-x0)/768
    h = np.abs(y1-y0)/768
    return [xc, yc, h, w]

In [None]:
#Normalleştirilmiş kodlanmış piksellerden sınırlayıcı kutuları bulma
ships["Boundingbox"] = ships["EncodedPixels"].apply(lambda x:rle2bbox(x,(768,768)) if isinstance(x,str) else np.NaN)
ships.drop("EncodedPixels", axis =1, inplace =True)

In [None]:
ships["BoundingboxArea"]=ships["Boundingbox"].map(lambda x:x[2]*768*x[3]*768 if x==x else 0)

In [None]:
#%1'den az olan kutuları kaldırma
ships = ships[ships["BoundingboxArea"]>np.percentile(ships["BoundingboxArea"],1)]

In [None]:
balanced_df = ship_unique.groupby("Ship").apply(lambda x:x.sample(1000) if len(x)>=1000 else x.sample(len(x)))
balanced_df.reset_index(drop=True,inplace=True)

In [None]:
#Balanced_df'deki görüntüler için Sınırlayıcı kutular için veri çerçevesi oluşturma
balanced_bbox = ships.merge(balanced_df[["ImageId"]], how ="inner", on = "ImageId")
balanced_bbox.head(20)

In [None]:
#Sınırlayıcı kutuları ve görüntüleri görselleştirme
path ="../input/airbus-ship-detection/train_v2/"
plt.figure(figsize =(20,20))
for i in range(15):
    imageid = balanced_df[balanced_df.Ship ==i].iloc[0][0]
    image = np.array(cv2.imread(path+imageid)[:,:,::-1])
    if i>0:
        bbox = balanced_bbox[balanced_bbox.ImageId==imageid]["Boundingbox"]
        
        for items in bbox:
            Xmin  = int((items[0]-items[3]/2)*768)
            Ymin  = int((items[1]-items[2]/2)*768)
            Xmax  = int((items[0]+items[3]/2)*768)
            Ymax  = int((items[1]+items[2]/2)*768)
            cv2.rectangle(image,
                          (Xmin,Ymin),
                          (Xmax,Ymax),
                          (255,0,0),
                          thickness = 2)
    plt.subplot(4,4,i+1)
    plt.imshow(image)
    plt.title("Bulunan gemi sayısı = {}".format(i))
