In [9]:
import cv2
import numpy as np
import sys
sys.path.append('..')
#from autocrop import crop

In [17]:
import cv2 as cv2
import numpy as np


def crop(path=None):
    """
    Returns a cropped version of the image with the path path.

    Parameters:
    - path: path to the image being cropped

    Returns:
    - cropped: image which is the cropped version of the image with the path
               path
    """
    assert type(path) == str, 'The path should be in string format!'
    
    img = cv2.imread(path, cv2.IMREAD_COLOR)
    
    assert np.sum(img) != None, 'the path is not right or there is no such a file. Check path or file name.'
    
    assert img.shape[0:3] != None, 'The image is not in right format. Image should have three diamensions'

    assert img.shape[0] >100 and img.shape[1] >100, 'the image is too blurred. Please retake or reload'
    
    
    # leave only green color
    img[:, :, 0] = 0
    img[:, :, 2] = 0

    # convert to gray scale
    gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # more contrast between foreground and background
    contrasted_img = apply_contrast(gray_img)

    # erode image
    kernel = np.ones((5, 5), np.uint8)
    eroded_img = cv2.erode(contrasted_img, kernel, iterations=15)

    # located contours
    contours = _locate_contours(eroded_img)

    img_contours = np.zeros(img.shape)
    cv2.drawContours(img_contours, contours, -1, (0, 255, 0), 3)

    # determine cropped image based on contours
    crop_box = _determine_cropped_image_box(img, contours)

    # crop the original image
    cropped = img[crop_box[1]:crop_box[3], crop_box[0]:crop_box[2]]

    return cropped


def apply_contrast(img):
    """
    Returns a contrasted version of img.

    Returns:
    - contrasted_img: contrasted version of img
    """

    contrast_threshold = 2
    grid_size = 2
    alpha = 3  # (1.0-3.0)
    beta = 0  # (0-100)

    # CLAHE (Contrast Limited Adaptive Histogram Equalization)
    clahe = cv2.createCLAHE(clipLimit=contrast_threshold,
                            tileGridSize=(grid_size, grid_size))
    clahe_img = clahe.apply(img)

    adjusted = cv2.convertScaleAbs(clahe_img, alpha=alpha, beta=beta)

    return adjusted


def _locate_contours(img):
    """
    Returns the substantial contours in img.

    Parameters:
    - img: the image being analyzed

    Returns:
    - substantial_contours: the substantial contours in img
    """

    min_threshold = 75
    threshold_output = 255
    min_countour_area = 15000

    _, threshold = cv2.threshold(img, min_threshold,
                                 threshold_output,
                                 cv2.THRESH_BINARY)

    # dilated = cv2.morphologyEx(threshold, cv2.MORPH_OPEN,
    #                            cv2.getStructuringElement(cv2.MORPH_ELLIPSE,
    #                            (10, 10)))

    contours, _ = cv2.findContours(threshold, cv2.RETR_LIST,
                                   cv2.CHAIN_APPROX_SIMPLE)

    substantial_contours = []
    for contour in contours:
        if cv2.contourArea(contour) > min_countour_area:
            substantial_contours.append(contour)

    cv2.drawContours(img, contours, -1, (0, 255, 0), 3)

    return substantial_contours


def _determine_cropped_image_box(img, contours):
    """
    Returns the pixel box including all contours in contours.

    Parameters:
    - img: the image being analyzed
    - contours: a list of countours thats locations should be included in the
                outputted box size

    Returns:
    - crop_box: the pixel box including all contours in contours in form [left,
                top, right, bottom]
    """
    # https://stackoverflow.com/questions/37803903/opencv-and-python-for-auto-cropping
    crop_box = [-1, -1, -1, -1]
    for contour in contours:
        contour_x, contour_y, contour_w, contour_h = cv2.boundingRect(contour)
        if crop_box[0] < 0:
            crop_box = [contour_x, contour_y, contour_x + contour_w,
                        contour_y + contour_h]
        elif contour_x > np.shape(img)[0] / 2:
            crop_box[0] = min(contour_x, crop_box[0])
            crop_box[1] = min(contour_y, crop_box[1])
            crop_box[2] = max(contour_x + contour_w, crop_box[2])
            crop_box[3] = max(contour_y + contour_h, crop_box[3])
    
    # add bounding space
    crop_box[0] = max(0, crop_box[0] - 50)
    crop_box[1] = max(0, crop_box[1] - 50)
    crop_box[2] = min(np.shape(img)[0], crop_box[2] + 100)
    crop_box[3] = min(np.shape(img)[1], crop_box[3] + 100)

    return crop_box

In [18]:
type('test_image.jpg') == str

True

In [19]:
testpath = 'test_image1.jpg'

In [20]:
dat = crop()

AssertionError: The path should be in string format!

In [21]:
crop(1)

AssertionError: The path should be in string format!

In [None]:
dat.shape

In [22]:
dat = crop('test_image.jpg')

In [24]:
img = cv2.imread('test_image.jpg', cv2.IMREAD_COLOR)

In [27]:
np.shape(img)

(859, 861, 3)

In [29]:
assert 2000> img.shape[0] >100 

In [35]:
2000> img.shape[0] >100 and 2000> img.shape[1] >100 and img.shape[2] ==3

True

In [None]:
img.shape[0:3] != None and sum(img.shape)

In [39]:
img.ndim

3

In [46]:
img = cv2.imread('test_image2.jpg', cv2.IMREAD_COLOR)

In [47]:
img.shape

(142, 355, 3)

In [11]:
img.any == None

False

In [30]:
img.shape[0:3]

(859, 861, 3)

In [25]:
img.shape[0:3] != None

True

In [13]:
img.shape[1]

861

In [18]:
img.shape[0] >100 and img.shape[1] >100

True

In [15]:
img.shape[1] >100

True

In [17]:
img.shape[0] >100

True

In [1]:
# -*- coding: utf-8 -*-
"""
Created on Wed Feb 23 21:52:41 2022

@author: Colem

Modified on Sun Mar 13
assertions wrote for tests
Xuetao.
"""
import numpy as np
import pandas as pd
import os


"""
Data sets All and AB are two part needing to be concatenated, all other data
sets are one piece 
"""



def get_data_array(Dataset=None):
    """
    
    Parameters
    ----------
    Dataset : str. Which data set desired: A,B,C,AB,AC,BC

    Returns
    -------
    data : array 
        Intexting the first image, array[i][j]-> i is the number of arrays in
        range of number of images, j=0 is eelecting image array, j=1 is the
        input copy number .
        
    """
    assert type(Dataset) == str, 'The chosen dataset names should be in string format!'
    
    Dataset_choices = ['All','AB','A','B','C','AC','BC']
    assert Dataset in Dataset_choices, 'Error: Datasets limited to A, B, C, AB, AC, BC'
    path = './Datasets/'
    ispath = os.path.isdir(path)
    assert ispath, 'Error Datasets is not downloaded into directory. Please download Datasets.'
    
    
    if Dataset == 'All':
        All_1 = np.load('./Datasets/QIAML_All_1_Data.npy', allow_pickle=True)
        All_2 = np.load('./Datasets/QIAML_All_2_Data.npy', allow_pickle=True)
        data = np.concatenate((All_1, All_2), axis=0)
        return data
    elif Dataset == 'AB':
        All_1 = np.load('./Datasets/QIAML_Data_AB_1.npy', allow_pickle=True)
        All_2 = np.load('./Datasets/QIAML_Data_AB_2.npy', allow_pickle=True)
        data = np.concatenate((All_1, All_2), axis=0)
        return data
    elif Dataset == 'A':
        data = np.load('./Datasets/QIAML_Data_A.npy', allow_pickle=True)
        return data
    elif Dataset == 'B':
        data = np.load('./Datasets/QIAML_Data_B.npy', allow_pickle=True)
        return data
    elif Dataset == 'C':
        data = np.load('./Datasets/QIAML_Data_C.npy', allow_pickle=True)
        return data
    elif Dataset == 'AC':
        All_1 = np.load('./Datasets/QIAML_Data_AC_1.npy', allow_pickle=True)
        All_2 = np.load('./Datasets/QIAML_Data_AC_2.npy', allow_pickle=True)
        data = np.concatenate((All_1, All_2), axis=0)
        return data
    elif Dataset == 'BC':
        data = np.load('./Datasets/QIAML_Data_BC.npy', allow_pickle=True)
        return data

def get_data_df(Dataset):
    """
    
    Parameters
    ----------
    Dataset : str. Which data set desired: A,B,C,AB,AC,BC

    Returns
    -------
    data : pandas dataframe. 
        DESCRIPTION: dataframe with columns: images, Copy number

    """
    if Dataset == 'All':
        data = pd.DataFrame(get_data_array(Dataset), columns = ['Images', 'Copy number'])
        return data


In [2]:
get_data_array('All')

AssertionError: Error Datasets is not downloaded into directory. Please download Datasets.

In [13]:
path = './Datasets/'
path2 = '__pycache__'
isdir = os.path.isdir(path2)

In [14]:
isdir

True

In [12]:
path

'./Datasets/'

In [15]:
get_data_array()

AssertionError: The chosen dataset names should be in string format!

In [16]:
None == str

False