In [None]:
import pandas as pd
from tqdm.notebook import tqdm
import os
from termcolor import colored
import plotly.express as px
import matplotlib.pyplot as plt
import cv2

In [None]:
DIRECTORY_PATH = "../input/sartorius-cell-instance-segmentation"
TRAIN_CSV = DIRECTORY_PATH + "/train.csv"
TRAIN_PATH = DIRECTORY_PATH + "/train"
TEST_PATH = DIRECTORY_PATH + "/test"
TRAIN_SEMI_SUPERVISED_PATH = DIRECTORY_PATH + "/train_semi_supervised"

In [None]:
def get_Image_paths(path):
    """
    Function to get the path with individual image Paths
    """
    image_names= []
    for dirname,_,filenames in os.walk(path):
        for filename in tqdm(filenames):
            fullpath =os.path.join(dirname,filename)
            image_names.append(fullpath)
    return image_names
    

In [None]:
df_train=pd.read_csv(TRAIN_CSV)
#Get complete image paths for train and test datasets
train_images_path = get_Image_paths(TRAIN_PATH)
test_images_path = get_Image_paths(TEST_PATH)
train_semi_supervised_path = get_Image_paths(TRAIN_SEMI_SUPERVISED_PATH)

In [None]:
df_train.head()

In [None]:
df_train.info()

In [None]:
#Size of dataset
df_train.shape

In [None]:
#Columnwise unique values
for col in df_train.columns:
    print(col + ":" + colored(str(len(df_train[col].unique())), 'blue'))
    

In [None]:
#Number of Images in Each Directory
print(f"Number of train images: {colored(len(train_images_path), 'blue')}")
print(f"Number of test images:  {colored(len(test_images_path), 'blue')}")

In [None]:
def plot_distribution(x):

    fig = px.histogram(
    df_train, 
    x = x,
    width = 800,
    height = 500,
    )
    
    fig.show()

In [None]:
plot_distribution('cell_type')

In [None]:
plot_distribution('plate_time')

In [None]:
plot_distribution('elapsed_timedelta')

In [None]:
def display_multiple_img(images_paths, rows, cols):
    """
    Function to Display Images from Dataset.
    
    parameters: images_path(string) - Paths of Images to be displayed
                rows(int) - No. of Rows in Output
                cols(int) - No. of Columns in Output
    """
    figure, ax = plt.subplots(nrows=rows,ncols=cols,figsize=(16,8) )
    for ind,image_path in enumerate(images_paths):
        image=cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 
        try:
            ax.ravel()[ind].imshow(image)
            ax.ravel()[ind].set_axis_off()
        except:
            continue;
    plt.tight_layout()
    plt.show()

In [None]:
display_multiple_img(train_images_path[100:150], 3, 3)

In [None]:
display_multiple_img(test_images_path, 1, 3)