#  Extracting Statistical Information from AFM Scans

## Importing Packages

In [1]:
import csv
import math
import numpy as np
from scipy.signal import argrelextrema
from findpeaks import findpeaks as findpeaks
import pandas as pd

## Defining Functions

In [2]:
# Extpected input: Outputted cross sectional data matrix from Pygwy script,
# pass the pixel dimension of your image as the value for n, defaulted to 512 representing a SQUARE 512x512 image.
# Expected Output: Nested list, containing lists of a row-wise breakup of the inital matrix of size n by n.
def SplitDataMatrixIntoRows(data, pixel_dimension_of_image=512):
    nested_list_of_individual_cross_sections = list()
    for i in range(0, len(data), pixel_dimension_of_image):
        nested_list_of_individual_cross_sections.append(data[i:i + pixel_dimension_of_image])
    return np.array(nested_list_of_individual_cross_sections)

# Expected input: Output from SplitDataMatrixIntoRows(),
# pass the physical length of your image in micrometers as "length_in_micrometers" and the same pixel dimension as 
# inputted into SplitDataMatrixIntoRows(), defaulted to length_in_micrometers=6 and pixel_dimension_of_image=512.
# Expected Output: Nested list containing lists of x-axis locations of negative peaks (relative minima)
def ExtractDistancesBetweenRelativeMinima_GrainDiameter(data, length_in_micrometers=6, pixel_dimension_of_image=512):
    scaling_factor = length_in_micrometers/pixel_dimension_of_image
    nested_list_of_xaxis_locations_of_relative_minima_for_each_cross_section = list()
    for each_individual_cross_section in data:
        xaxis_locations_of_relative_minima = argrelextrema(each_individual_cross_section, np.less)
        xaxis_locations_of_relative_minima = xaxis_locations_of_relative_minima[0]
        xaxis_locations_of_relative_minima = xaxis_locations_of_relative_minima*scaling_factor
        nested_list_of_xaxis_locations_of_relative_minima_for_each_cross_section.append(xaxis_locations_of_relative_minima)
    return nested_list_of_xaxis_locations_of_relative_minima_for_each_cross_section
    
# Expected Input: Output from ExtractDistancesBetweenRelativeMinima_GrainDiameter()  
# Expected Output: Two floats: The average grain diameter for all grains in the image and the standard deviation.  
def CalculateAverageAndStandardDeviation_GrainDiameter(data):
    list_of_grain_diameters_for_all_cross_sections = list()
    for each_list_of_relative_minima_locations in data:
        list_of_individual_diameters = np.diff(each_list_of_relative_minima_locations)
        list_of_grain_diameters_for_all_cross_sections.append(list_of_individual_diameters)
    list_of_grain_diameters_for_all_cross_sections = np.concatenate(list_of_grain_diameters_for_all_cross_sections).ravel()
    average_diameter = np.mean(list_of_grain_diameters_for_all_cross_sections)
    standard_deviation_diameter = np.std(list_of_grain_diameters_for_all_cross_sections)
    return average_diameter,standard_deviation_diameter

# Expected Input: Output from SplitDataMatrixIntoRows().
# Expected Output: Nested list containing heights of individual grains.
def ExtractHeightOfRelativeMaxima_GrainHeight(data):
    nested_list_of_grain_heights = list()
    for each_individual_cross_section in data:
        individual_heights = each_individual_cross_section[argrelextrema(each_individual_cross_section, np.greater)[0]]
        nested_list_of_grain_heights.append(individual_heights)
    return nested_list_of_grain_heights

# Expected Input: Output from ExtractHeightOfRelativeMaxima_GrainHeight()
# Expected Output: Two floats: The average grain height for all grains in the image and the standard devaiton.
def CalculateAverageAndStandardDeviation_GrainHeight(data):
    list_of_grain_heights_for_all_cross_sections = np.concatenate(data).ravel()
    average_height = np.mean(list_of_grain_heights_for_all_cross_sections)
    standard_deviation_height = np.std(list_of_grain_heights_for_all_cross_sections)
    return average_height,standard_deviation_height

# Expected Input: Output from ExtractDistancesBetweenRelativeMinima_GrainDiameter()
# Expected Output: Four floats: The average maximum grain diameter from each cross section and the standard deviation.
# The average minimum grain diameter from each cross section and the standard deviation.
def CalculateAverageAndStandardDeviationOfMaximumAndMinimumGrainDiameter(data):
    list_of_maximum_grain_diameter_from_each_cross_section = list()
    list_of_minimum_grain_diameter_from_each_cross_section = list()
    for each_individual_cross_section in data:
        list_of_individual_diameters = np.diff(each_individual_cross_section)
        list_of_maximum_grain_diameter_from_each_cross_section.append(max(list_of_individual_diameters))
        list_of_minimum_grain_diameter_from_each_cross_section.append(min(list_of_individual_diameters))
    average_maximum_grain_diameter = np.mean(list_of_maximum_grain_diameter_from_each_cross_section)
    standard_deviation_of_maximum_grain_diameter = np.std(list_of_maximum_grain_diameter_from_each_cross_section)
    average_minimum_grain_diameter = np.mean(list_of_minimum_grain_diameter_from_each_cross_section)
    standard_deviation_of_minimum_grain_diameter = np.std(list_of_minimum_grain_diameter_from_each_cross_section)
    return average_maximum_grain_diameter, standard_deviation_of_maximum_grain_diameter, average_minimum_grain_diameter, standard_deviation_of_minimum_grain_diameter

# Expected Input: Output from ExtractHeightOfRelativeMaxima_GrainHeight()
# Expected Output: Four floats: The average maximum grain height from each cross section and the standard deviation.
# The average minimum grain height from each cross section and the standard deviation.
def CalculateAverageAndStandardDeviationOfMaximumAndMinimumGrainHeight(data):
    list_of_maximum_grain_heights_from_each_cross_section = list()
    list_of_minimum_grain_heights_from_each_cross_section = list()
    for each_individual_cross_section in data:
        list_of_maximum_grain_heights_from_each_cross_section.append(max(each_individual_cross_section))
        list_of_minimum_grain_heights_from_each_cross_section.append(min(each_individual_cross_section))
    average_maximum_grain_height = np.mean(list_of_maximum_grain_heights_from_each_cross_section)
    standard_deviation_of_maximum_grain_heights = np.std(list_of_maximum_grain_heights_from_each_cross_section)
    average_minimum_grain_height = np.mean(list_of_minimum_grain_heights_from_each_cross_section)
    standard_deviation_of_minimum_grain_heights = np.std(list_of_minimum_grain_heights_from_each_cross_section)
    return average_maximum_grain_height, standard_deviation_of_maximum_grain_heights, average_minimum_grain_height, standard_deviation_of_minimum_grain_heights

def process_morphology_return_data_only(morphology_title, list_of_scans, times, length_in_micrometers=6, pixel_dimension_of_image=512):
    scaling_factor = length_in_micrometers / pixel_dimension_of_image
    fp = findpeaks(method='topology', whitelist='peak')
    scan_data = dict()
    peak_y_cordinates = list()
    data = {"mu_n_grains":list(),
            "sigma_n_grains":list(),
            "mu_grain_width":list(),
            "sigma_grain_width":list(),
            "mu_grain_amp":list(),
            "sigma_grain_amp":list()}
    
    for index, scan in list_of_scans.items():
        fit = fp.fit(scan)
        list_of_peak_cordinates = fit['groups0']
        for peak in list_of_peak_cordinates:
            if peak[2] != 0.0:
                peak_y_cordinates.append(peak[0][0])
                    
        list_of_crosssections_with_peaks = [scan[i] for i in list(set(peak_y_cordinates))]
        
        scan_mean_n_grains, scan_std_n_grains = len(list_of_peak_cordinates), 1
        
        scan_diams = ExtractDistancesBetweenRelativeMinima_GrainDiameter(list_of_crosssections_with_peaks)
        scan_mean_diameter, scan_std_diameter = CalculateAverageAndStandardDeviation_GrainDiameter(scan_diams)
        
        scan_height = ExtractHeightOfRelativeMaxima_GrainHeight(list_of_crosssections_with_peaks)
        scan_mean_height, scan_std_height = CalculateAverageAndStandardDeviation_GrainHeight(scan_height)
        
        collection = [scan_mean_n_grains, scan_std_n_grains,
                      (scan_mean_diameter/scaling_factor)/2, (scan_std_diameter/scaling_factor)/10,
                      (scan_mean_height/scaling_factor)/10, (scan_std_height/scaling_factor)/100]
        
        for entry, index in zip(collection, data.keys()): 
            data[index].append(entry)
            
    data['times(min)'] = list(times)

    return data

## Opening AFM Scan Data (2d arrays)

In [3]:
def open_files(file_names, scan_title):
    temp = dict()
    for name in file_names:
        with open(f'../../UR_la_plante/CaCO3 Gwyddion Python/gwyddion_scripts/gwy_data/{name}_full.txt') as f:
            reader = csv.reader(f)
            data = list(reader)
            data = np.array(data[0])
            exec(f'{name}_raw_data = data.astype(float)')
            
    for name in file_names:
        exec(f'{name}_divided = SplitDataMatrixIntoRows({name}_raw_data)')
        exec(f'temp[name] = {name}_divided')
        
    for key, array in temp.items():
        array = np.vstack([array, array[0]])
        temp[key] = array
        
    return temp       

Open all files for the C11 Carbon Chain morphology.

In [4]:
c11_scan_title = 'C11'
c11_times = np.array([5, 14, 31, 77, 130])
c11_file_names = ['c11_5','c11_14','c11_31','c11_77','c11_130']

c11_all_scans = open_files(c11_file_names, c11_scan_title)

Open all files for the C11OH Carbon Chain morphology.

In [5]:
c11OH_scan_title = 'C11OH'
c11OH_times = np.array([4, 18, 66, 100])
c11OH_file_names = ['c11OH_4', 'c11OH_18', 'c11OH_66', 'c11OH_100']

c11OH_all_scans = open_files(c11OH_file_names, c11OH_scan_title)

Open all files for the C11NF Carbon Chain morphology.

In [6]:
c11NF_scan_title = 'C11NF'
c11NF_times = np.array([6, 23, 51, 84, 102])
c11NF_file_names = ['c11NF_6', 'c11NF_23', 'c11NF_51', 'c11NF_84', 'c11NF_102']

c11NF_all_scans = open_files(c11NF_file_names, c11NF_scan_title)

## Initializing Data Generation Parameters
Utilizing the "Findpeaks" Python package we will make a function that returns statistical information that will be passed in as parameters to output synthetic data generation functions necessary to generate representative data.

**Module Docs for reference:** https://erdogant.github.io/findpeaks/pages/html/index.html

In [7]:
# Running our data through the function.
c11_morph_stats = process_morphology_return_data_only(c11_scan_title, c11_all_scans, c11_times)

[findpeaks] >Finding peaks in 2d-array using topology method..
[findpeaks] >Scaling image between [0-255] and to uint8
[findpeaks] >Conversion to gray image.
[findpeaks] >Denoising with [fastnl], window: [3].
[findpeaks] >Detect peaks using topology method with limit at None.
[findpeaks] >Fin.
[findpeaks] >Finding peaks in 2d-array using topology method..
[findpeaks] >Scaling image between [0-255] and to uint8
[findpeaks] >Conversion to gray image.
[findpeaks] >Denoising with [fastnl], window: [3].
[findpeaks] >Detect peaks using topology method with limit at None.
[findpeaks] >Fin.
[findpeaks] >Finding peaks in 2d-array using topology method..
[findpeaks] >Scaling image between [0-255] and to uint8
[findpeaks] >Conversion to gray image.
[findpeaks] >Denoising with [fastnl], window: [3].
[findpeaks] >Detect peaks using topology method with limit at None.
[findpeaks] >Fin.
[findpeaks] >Finding peaks in 2d-array using topology method..
[findpeaks] >Scaling image between [0-255] and to ui

In [8]:
c11OH_morph_stats = process_morphology_return_data_only(c11OH_scan_title, c11OH_all_scans, c11OH_times)

[findpeaks] >Finding peaks in 2d-array using topology method..
[findpeaks] >Scaling image between [0-255] and to uint8
[findpeaks] >Conversion to gray image.
[findpeaks] >Denoising with [fastnl], window: [3].
[findpeaks] >Detect peaks using topology method with limit at None.
[findpeaks] >Fin.
[findpeaks] >Finding peaks in 2d-array using topology method..
[findpeaks] >Scaling image between [0-255] and to uint8
[findpeaks] >Conversion to gray image.
[findpeaks] >Denoising with [fastnl], window: [3].
[findpeaks] >Detect peaks using topology method with limit at None.
[findpeaks] >Fin.
[findpeaks] >Finding peaks in 2d-array using topology method..
[findpeaks] >Scaling image between [0-255] and to uint8
[findpeaks] >Conversion to gray image.
[findpeaks] >Denoising with [fastnl], window: [3].
[findpeaks] >Detect peaks using topology method with limit at None.
[findpeaks] >Fin.
[findpeaks] >Finding peaks in 2d-array using topology method..
[findpeaks] >Scaling image between [0-255] and to ui

In [9]:
c11NF_morph_stats = process_morphology_return_data_only(c11NF_scan_title, c11NF_all_scans, c11NF_times)

[findpeaks] >Finding peaks in 2d-array using topology method..
[findpeaks] >Scaling image between [0-255] and to uint8
[findpeaks] >Conversion to gray image.
[findpeaks] >Denoising with [fastnl], window: [3].
[findpeaks] >Detect peaks using topology method with limit at None.
[findpeaks] >Fin.
[findpeaks] >Finding peaks in 2d-array using topology method..
[findpeaks] >Scaling image between [0-255] and to uint8
[findpeaks] >Conversion to gray image.
[findpeaks] >Denoising with [fastnl], window: [3].
[findpeaks] >Detect peaks using topology method with limit at None.
[findpeaks] >Fin.
[findpeaks] >Finding peaks in 2d-array using topology method..
[findpeaks] >Scaling image between [0-255] and to uint8
[findpeaks] >Conversion to gray image.
[findpeaks] >Denoising with [fastnl], window: [3].
[findpeaks] >Detect peaks using topology method with limit at None.
[findpeaks] >Fin.
[findpeaks] >Finding peaks in 2d-array using topology method..
[findpeaks] >Scaling image between [0-255] and to ui

## Export Statistical Values

In [10]:
with open("statistical_data//c11_morph_stats.csv", "w") as outfile:
    writer = csv.writer(outfile)
    writer.writerow(c11_morph_stats.keys())
    writer.writerows(zip(*c11_morph_stats.values()))

In [14]:
with open('statistical_data/c11OH_morph_stats.csv') as f:
    df_c11 = pd.read_csv(f)
    #df_c11 = df_c11.T.to_dict('list')
df_c11

Unnamed: 0,mu_n_grains,sigma_n_grains,mu_grain_width,sigma_grain_width,mu_grain_amp,sigma_grain_amp,times(min)
0,879,1,7.629606,0.818005,446.671242,14.579432,4
1,2336,1,5.964128,0.71833,690.371578,17.060765,18
2,551,1,9.468574,1.099737,638.629809,17.52708,66
3,1306,1,7.886063,0.928313,660.868158,18.158011,100


In [12]:
with open("statistical_data//c11OH_morph_stats.csv", "w") as outfile:
    writer = csv.writer(outfile)
    writer.writerow(c11OH_morph_stats.keys())
    writer.writerows(zip(*c11OH_morph_stats.values()))

In [13]:
with open("statistical_data//c11NF_morph_stats.csv", "w") as outfile:
    writer = csv.writer(outfile)
    writer.writerow(c11NF_morph_stats.keys())
    writer.writerows(zip(*c11NF_morph_stats.values()))