# Setup

## Library import
We import all the required Python libraries

In [1]:
import cv2 as cv
import numpy as np
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm
tqdm.pandas()

## Local library import
We import all the required local libraries libraries

In [2]:
### Function takes an image and calculates its Linewidth, Overspray, and Roughness 
def ImageProcessing(image):
    
    # Read the image as Gray
    img = cv.imread(image,0) 
    
    #Pixel distribution for threshold determination
    vals = img.mean(axis=1).flatten()
    # Calculate histogram
    counts, bins = np.histogram(vals, range(257))
    mids = 0.5*(bins[1:] + bins[:-1])
    mean = int(np.average(mids, weights=counts))
    mean_threshold = int(mean*1.1)
    
    # Make the image Black and White
    threshold, BW_image = cv.threshold(img, mean_threshold, 255, cv.THRESH_BINARY )
    
    # Remove the Salt&Pepper effect with MedianBlur 
    BlurIm = cv.medianBlur(BW_image, 51)
    
    # Find the overspray contours and remove them for line width and roughness calculation 
    contours, hierarchy = cv.findContours(BlurIm, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
    NoSpBlurIm = BlurIm
    for i in contours:
        area = cv.contourArea(i)
        if 1 < area < 10000:
            cv.drawContours(NoSpBlurIm, [i], -1, thickness = -1, color = (0,0,0)) # Find overspray and make it dark
    
    
    ## Find Print Lines 
    top_edge = []
    bottom_edge = []
    first_col = 0

    # Find the first column that print starts
    for i in range(NoSpBlurIm.shape[1]): #all columns of the image
        if np.sum(NoSpBlurIm[:, i]) > 0:
            break
        first_col += 1

    # find the last column that print finishes
    last_col = first_col
    for i in range(first_col,NoSpBlurIm.shape[1]): #all columns of the image
        if np.sum(NoSpBlurIm[:, i]) == 0:
            break
        last_col += 1

    # X is all columns from first_col to last_col
    X = np.array([i for i in range(first_col, last_col)])

    print_length = last_col - first_col
    
    # In all columns that print exist find the top and bottom row that print exist 
    for i, column in enumerate(X):
        if np.sum(NoSpBlurIm[:, column]) > 0:
            top_edge.append(np.argmax(NoSpBlurIm[:, column])) #np.argmax gives the first row that have pixel 255
            bottom_edge.append(NoSpBlurIm.shape[0] - np.argmax(NoSpBlurIm[::-1, column])) # last row that has pixel 255

    # Regression of top and bottom lines 
    top_line = LinearRegression().fit(X.reshape(-1,1), top_edge)
    bottom_line = LinearRegression().fit(X.reshape(-1, 1), bottom_edge)
    
    ## Calculate Roughness
    roughness = 0.0
    for column in range(NoSpBlurIm.shape[1]): # for all columns
        if np.sum(NoSpBlurIm[:, column]) > 0: # if print exist
            top_edge = np.argmax(NoSpBlurIm[:, column])
            bottom_edge = np.argmax(NoSpBlurIm[::-1, column])

            delta_up = np.abs(top_edge - top_line.predict(np.array(column).reshape(1, -1)))
            delta_low = np.abs(bottom_edge - bottom_line.predict(np.array(column).reshape(1, -1)))
            roughness += (delta_up + delta_low)

    roughness /= (2*print_length)

    ## Find Overspray Contours
    # Get the image with overspray 
    img = cv.imread(file,0) # read the image gray. 0 means gray
    threshold, BW_image = cv.threshold(img, mean_threshold, 255, cv.THRESH_BINARY )
    
    # Find the overspray countors
    contours, hierarchy = cv.findContours(BW_image, cv.RETR_TREE, cv.CHAIN_APPROX_NONE)

    overspray_contours = []
    for contour in contours:
        area = cv.contourArea(contour)
        if area == 0:
            continue 
        M = cv.moments(contour)
        center_x = int(M["m10"] / M["m00"])
        center_y= int(M["m01"] / M["m00"])

        if center_y < top_line.predict(np.array(center_x).reshape(1, -1)) or center_y > bottom_line.predict(np.array(center_x).reshape(1, -1)):
            area = cv.contourArea(contour)

            if 25 < area < 10000:
                overspray_contours.append(contour)

    OversprayImg = BW_image
    OversprayImg = cv.cvtColor(OversprayImg,cv.COLOR_GRAY2RGB)
    cv.drawContours(image=OversprayImg, contours=overspray_contours, contourIdx=-1, color=(0, 0, 255), thickness=cv.FILLED, lineType=cv.LINE_AA)
          
    ## Overspray Calculation
    overspray = 0.0
    for contour in overspray_contours:
        M = cv.moments(contour)
        center_x = int(M["m10"] / M["m00"])
        center_y= int(M["m01"] / M["m00"])
        area = cv.contourArea(contour)
        top = top_line.predict(np.array(center_x).reshape(1, -1))
        if center_y > top:
            overspray += area*(center_y - top[0]) # Both area and how far the overspray are important
        else:
            overspray += area*(bottom_line.predict(np.array(center_x).reshape(1, -1))[0] - center_y)

    overspray /= print_length
    
    ## Draw the processed line
    top_start_point = (0, int(top_line.predict(np.array(0).reshape(1, -1))[0]))
    top_end_point = (BW_image.shape[1] - 1, int(top_line.predict(np.array(BW_image.shape[1] - 1).reshape(1, -1))[0]))
    bottom_start_point = (0, int(bottom_line.predict(np.array(0).reshape(1, -1))[0]))
    bottom_end_point = (BW_image.shape[1] - 1, int(bottom_line.predict(np.array(BW_image.shape[1] - 1).reshape(1, -1))[0]))

    cv.line(OversprayImg, top_start_point, top_end_point, (0, 255, 0), 5)
    cv.line(OversprayImg, bottom_start_point, bottom_end_point, (0, 255, 0), 5)

    # save the processed line in f_save path
    f_save = r'../../data/processed/microscopic_imgs/cropped_clean/processed_imgs'
    cv.imwrite(f_save+"/"+image, OversprayImg)


    ## Linewidth Calculation
    L1 = []
    L2 = []
    Width_Sum = 0
    for i in range(len(X)):
        Top= int(top_line.predict(np.array(X[i].reshape(1, -1))))
        Bot= int(bottom_line.predict(np.array(X[i].reshape(1, -1))))
        Width_Column = Bot-Top
        Width_Sum = Width_Sum + Width_Column

    LineWidth = Width_Sum/print_length
    return(LineWidth, overspray, float(roughness))
    
    

In [3]:
# This code reads all cropped, i.e., ready images, and calculates the Line Width, Overspray, and Roughness, and 
# finally save them in a DataFrame 

import pandas as pd
import glob, os
import re

# path of all corpped images 
f_read = r'/home/computeruser/research/atomic/dataset/images'

#os.chdir(f_read)
column_names = ['File Name','Print ID','Print Height','Nozzle Voltage','Drop Spacing','Line Width',
                'Overspray','Roughness'] 
Print_Database = pd.DataFrame(columns = column_names)

for file in tqdm(glob.glob(f"{f_read}/*.png")): # read all png images in f_read path
    data = ImageProcessing(file)
    DigitsInFile = re.findall('(\d+)', file) # Extract all digits in the file name in an array
    temp_df = pd.DataFrame({'File Name': file,'Print ID':[int(DigitsInFile[0])],'Print Height':[int(DigitsInFile[1])],
                            'Nozzle Voltage':[int(DigitsInFile[2])],'Drop Spacing':[int(DigitsInFile[3])],
                            'Line Width' :[float(data[0])],'Overspray' :[float(data[1])],
                            'Roughness':[float(data[2])]})
    Print_Database = pd.concat([Print_Database, temp_df], ignore_index = True, axis = 0)
    
        
Print_Database = Print_Database.sort_values('File Name') # sort by file name
Print_Database = Print_Database.reset_index(drop=True) # reset the index based on new sort 
Print_Database


  0%|          | 0/2233 [00:00<?, ?it/s]

Unnamed: 0,File Name,Print ID,Print Height,Nozzle Voltage,Drop Spacing,Line Width,Overspray,Roughness
0,/home/computeruser/research/atomic/dataset/ima...,1,800,25,8,291.874868,0.000000,42.771747
1,/home/computeruser/research/atomic/dataset/ima...,1,800,25,8,363.114495,31.004060,296.820248
2,/home/computeruser/research/atomic/dataset/ima...,1,800,25,8,259.106667,0.000000,123.661153
3,/home/computeruser/research/atomic/dataset/ima...,1,800,25,8,260.506809,0.000000,72.222550
4,/home/computeruser/research/atomic/dataset/ima...,1,800,25,8,276.590840,6.325386,162.214829
...,...,...,...,...,...,...,...,...
2228,/home/computeruser/research/atomic/dataset/ima...,99,750,34,16,175.921831,0.000000,158.359276
2229,/home/computeruser/research/atomic/dataset/ima...,99,750,34,16,176.540470,0.000000,16.052330
2230,/home/computeruser/research/atomic/dataset/ima...,99,750,34,16,179.138182,0.000000,18.593236
2231,/home/computeruser/research/atomic/dataset/ima...,99,750,34,16,176.129524,0.000000,195.397440


In [4]:
f_save= r'../../data/processed/microscopic_imgs/cropped_clean/db'
#os.chdir(f_read)
column_names_ML = ['Print ID','Print Height','Nozzle Voltage','Drop Spacing','Line Width',
                'Overspray','Roughness'] 
Print_Database_ML = pd.DataFrame(columns = column_names_ML)

number_rows = Print_Database.shape[0]
Print_ID_List = []
for row in range (0,number_rows):
    Print_ID_List.append(Print_Database['Print ID'][row])
Print_ID_List = list(set(Print_ID_List)) # remove the duplicated Print IDs
Print_ID_List.sort()

for PrintID in tqdm(Print_ID_List):
    temp_df1 = Print_Database.loc[Print_Database['Print ID'] == PrintID]
    temp_df1 = temp_df1.reset_index(drop=True)
    DigitsInFile = re.findall('(\d+)', temp_df1["File Name"][0]) # extract all digits in the file name in an array
    temp_df2 = pd.DataFrame({'Print ID':[int(DigitsInFile[0])],
                              'Print Height':[int(DigitsInFile[1])],
                              'Nozzle Voltage':[int(DigitsInFile[2])],
                              'Drop Spacing':[int(DigitsInFile[3])],
                              'Line Width' :[int(temp_df1[["Line Width"]].mean()[0])],
                              'Overspray' :[int(temp_df1[["Overspray"]].mean()[0])],
                              'Roughness':[int(temp_df1[["Roughness"]].mean()[0])]})
    Print_Database_ML = pd.concat([Print_Database_ML, temp_df2], ignore_index = True, axis = 0)

Print_Database_ML = Print_Database_ML.sort_values('Print ID')
Print_Database_ML = Print_Database_ML.reset_index(drop=True)

Print_Database_ML.to_csv(f_save+"/"+'Print_Database_ML.csv')
Print_Database_ML

  0%|          | 0/205 [00:00<?, ?it/s]

Unnamed: 0,Print ID,Print Height,Nozzle Voltage,Drop Spacing,Line Width,Overspray,Roughness
0,1,800,25,8,294,12,164
1,2,800,25,9,261,136,141
2,3,800,25,10,218,11,103
3,4,800,25,11,190,15,68
4,5,800,25,12,190,91,90
...,...,...,...,...,...,...,...
200,205,650,31,12,262,17,94
201,206,650,31,13,241,15,86
202,207,650,31,14,191,77,87
203,209,650,31,16,188,1,73


# Parameter definition
We set all relevant parameters for our notebook. By convention, parameters are uppercase, while all the 
other variables follow Python's guidelines.


# Data import
We retrieve all the required data for the analysis.

# Data processing
Put here the core of the notebook. Feel free di further split this section into subsections.

# References
We report here relevant references:
1. author1, article1, journal1, year1, url1
2. author2, article2, journal2, year2, url2