# Prototype Hyperspecctral Class

In [None]:
import glob, os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage.measure import label, regionprops
from PIL import Image 

## Module

#### Metadata

In [None]:
def datastore_metadata( path_to_images ):
    """Prepare metadata on datastore"""
    image_map_files = []
    for file in os.listdir( path_to_images ):
        if file.endswith( ".png" ):
            image_map_files.append(file)
    df = pd.DataFrame(image_map_files, columns=["filename"])

    identifiers = parse_identifier( df )

    depths = parse_depths( df )

    image_metadata = pd.concat([df, identifiers, depths], axis=1)

    image_metadata = image_metadata\
        .sort_values(by="box-id")\
        .reset_index( drop=True )
    return image_metadata


def parse_identifier(df):
    """Parse identifier string in mineralmap and mask filenames"""
    identifiers = df["filename"]\
        .str.extract("CMM-(.*)@",expand=False)\
        .str.split("_", expand=True)\
        .astype("int")
    identifiers.columns = ["core-id", "box-id"] 
    return identifiers


def parse_depths(df):
    """Parse depth string in mineralmap and mask filenames"""
    depths = df["filename"]\
        .str.extract("@(.*).png",expand=False)\
        .str.split("_", expand=True)\
        .astype("float")
    depths.columns = ["depth-start", "depth-end"] 
    return depths 

#### Datastore I/O

In [None]:
def read_images(path_to_images, image_names, mode="sample"):
    """Read images into an ndarray array"""

    if mode == "sample":
     n = 5
    elif mode == "all":
        n = len(image_names)
    else:
        raise Exception("Unknown mode selection. Select sample or all.")
    images = [None] * n
    for i in range(0,n):
        images[i] = np.array( 
            Image.open( os.path.join(path_to_images,  image_names[i]) ) 
            )  
    return images


def read_datastore( path_to_images, mode="sample"):
    """Read image datastore"""

    image_metadata = datastore_metadata( path_to_images )
    image_names = image_metadata["filename"]
    images = read_images(path_to_images, image_names, mode="sample")

    return images

### Mineral map calcs

In [None]:
def mask_image( image ):
    """Mask background pixels of minalyze map image"""
    mask = np.all(image == (0,0,0), axis=-1)
    image_masked = np.copy( image )
    image_masked[mask,:]= [255, 255, 255]

    return image_masked 


def identify_box( mask ):
    """Identify core box columns as mask"""
    bg = np.sum(mask==False, axis=0) == mask.shape[0]
    core_column = label( np.invert(bg) )
    return core_column


def segment_box( box, compartment_mask ):
    """Segment core box""" 
    n_column = np.max( np.unique(compartment_mask) )
    n = n_column+1

    array = [None] * n_column
    for i in range(1,n):
            array[i-1] = box[:, compartment_mask==i]
    return array 


def get_core_pixel_counts( mask, core_compartment ):
    """Get core pixel count per row per compartment"""

    compartments = segment_box( mask, core_compartment )

    n_row = compartments[0].shape[0] 
    n_column = len(compartments)
    n = n_column

    column_pixel_counts = np.zeros( (n_row,n_column ) )
    for j in range(0,n):
        column_pixel_counts[:,j] = np.sum( compartments[j], axis=1 )
    column_pixel_counts[column_pixel_counts==0] = np.nan
    return column_pixel_counts


def get_mineral_pixel_counts( image, mask, mineral_dictionary ):
    """Get all mineral pixel counts per row per compartment"""

    # segment core box mask into columns / compartments 
    core_compartments = identify_box( mask )

    # core pixel counts by row/compartment
    column_pixel_counts = get_core_pixel_counts( mask, core_compartments )

    # define tensor dimensions 
    n_row     = mask.shape[0] 
    n_column  = get_compartment_count( core_compartments )
    n_mineral = len( mineral_dictionary )
    n = n_column

    mineral_pixel_counts = np.zeros( (n_row, n_column, n_mineral) )

    #loop over minerals and core compartments 
    for k, (key,value) in enumerate( mineral_dictionary.items() ):
        mineral = np.all(image == value, axis=-1)
        compartments = segment_box( mineral, core_compartments )
        for j in range(0, n):
            mineral_pixel_counts[:,j,k] = np.sum(compartments[j],axis=1)
    return mineral_pixel_counts


def get_mineral_pixel_prc( mineral_pixel_counts, core_pixel_counts ):
    """Get all mineral pixel percentage per row per compartment"""
    mineral_pixel_prc = mineral_pixel_counts / core_pixel_counts[:,:, np.newaxis]
    return mineral_pixel_prc


def tensor_to_matrix(mineral_tensor):
    """Convert mineral tensor to a mineral matrix"""
    mineral_matrix = np.concatenate( mineral_tensor.transpose(1,0,2), axis=0)
    return mineral_matrix  


def get_compartment_count( compartment_mask ):
    """Number of core columns/compartments"""
    n_compartment = np.max( np.unique( compartment_mask ) )
    return n_compartment


### Class

In [252]:
class HyperspecDatastore:
    """Import hyperspectral from datastore"""
    def __init__(self, path_to_mineral, path_to_mask ):
        """ImportHyperspec constructor"""
        self.active_index = 1 
        self.problem_index = []
        self.core_box_df = []
        self.core_df = []
        self.minerals = []
        self.masks = []
        self.metadata = []
        self.minerals_dictionary = []
        self.minerals_colormap = []
        self.path_to_mineral = path_to_mineral
        self.path_to_mask = path_to_mask
        
    def read_metadata(self):
        """Read metadata"""
        mineralmap_metadata = datastore_metadata( self.path_to_mineral )
        mask_metadata = datastore_metadata( self.path_to_mask )

        mineralmap_metadata = mineralmap_metadata.rename(columns={
            "filename": "mineral_filename",
            })
        mask_metadata = mask_metadata.rename(columns={
            "filename": "mask_filename",
            })

        joining_variables = ["core-id", "box-id", "depth-start", "depth-end"]
        metadata = mineralmap_metadata.merge(mask_metadata,
            how = "inner", 
            left_on = joining_variables,
            right_on = joining_variables)

        this = metadata.pop( "mineral_filename" )
        metadata.insert(metadata.columns.get_loc("mask_filename"), 
            "mineral_filename", this)

        self.metadata = metadata

    def read_legend(self):
        """Read legend containing mineral map color scheme."""
        legend = np.array( 
            Image.open( os.path.join(self.path_to_mineral, ".." ,  "mineral-legend.png") ) 
            )
        points_to_sample =np.array([
            [17,86],
            [17, 112],
            [17,136],
            [17,158],
            [17,184],
            [17,209],
            [17,235],
            ])
        mineral_keys = [
            "illite",
            "illite-smectite",
            "low-reflectance",
            "montmorillonite",
            "other",
            "smectite-kaolinite",
            "smectite-saponite",
            ]

        pixel_values = []
        pixel_values_scaled = [] 
        for row in points_to_sample:
            value = legend[row[1],row[0]][:3]
            pixel_values.append( tuple(value) )
            pixel_values_scaled.append( tuple(value/255) )
        pixel_values
        mineral_categories = dict( zip(mineral_keys, pixel_values) )
        self.minerals_dictionary = mineral_categories
        self.minerals_colomap = pixel_values_scaled

    def read(self, mode="sample"):
        """Read datastore"""

        #read/construct metadata
        self.read_metadata()

        #read mineralmap color scheme
        self.read_legend()

        #read minerals
        image_names = self.metadata["mineral_filename"]
        self.minerals = read_images(self.path_to_mineral, image_names, mode=mode )
        
        #read masks 
        image_names = self.metadata["mask_filename"]
        self.masks  = read_images(self.path_to_mask, image_names, mode=mode )

    def extract_features(self):
        """TODO"""
        n = len(self.minerals)

        self.problem_index = []
        features = []
        for i in range( n ):
            print(i)
            self.active_index = i
            self.calculate_metrics()
            if len(self.core_box_df)==0:
                self.problem_index = i
            features.append(self.core_box_df)
        self.core_df = pd.concat( features )

    def calculate_metrics(self):
        """Calculate mineral map metrics"""

        mineral = mask_image( self.minerals[self.active_index] )
        mask    = self.masks[ self.active_index ]
        
        #identify core compartments
        core_compartments = identify_box( mask )

        #core pixel curve
        core_pixel_counts = get_core_pixel_counts( 
            mask, 
            core_compartments )

        try:
            #mineral pixel counts 
            mineral_pixel_counts = get_mineral_pixel_counts( 
                mineral, 
                mask, 
                self.minerals_dictionary)

            # mineral pixel prc
            mineral_pixel_prc = get_mineral_pixel_prc( 
                mineral_pixel_counts, 
                core_pixel_counts )

            # mineral metric curves
            mineral_metrics = tensor_to_matrix( mineral_pixel_prc )
        except:
            df = pd.DataFrame()
        else:
            # create core_box
            df = pd.DataFrame( 
                mineral_metrics, 
                columns=self.minerals_dictionary.keys() )

            core_start_depth = self.metadata["depth-start"][self.active_index],
            core_end_depth   = self.metadata["depth-end"][self.active_index]
            n_pixels = df.shape[0]

            #TODO revisit and calculate resolution 
            core_depth = np.linspace(
                core_start_depth, 
                core_end_depth, 
                n_pixels)

            df.insert(0, 
                "core_depth", core_depth)

        self.core_box_df = df


    def get_core_column(self):
        """TODO"""

    def plot_core_box(self, ptype="minerals"):
        """Preview core box image array"""
        images = getattr(self, ptype)
        n = len(images)

        # truncate 
        if n>5:
            n=5
            images = images[:5]

        fig, ax  = plt.subplots(1,n, figsize=(18,12))
        for item,image in zip(ax,images):
            item.imshow(image, interpolation="none")

    def plot_box_features( self ):
        """Plot mineral features"""

        minerals = self.core_box_df.iloc[:,1:]
        core_depth = self.core_box_df["core_depth"]
        mineral_keys = minerals.columns.values

        n_element = minerals.shape[1]
        fig, ax = plt.subplots(1,n_element, figsize=(18,12), sharey=True)
        for i in range( len(ax) ):
            mineral = minerals.iloc[:,i]
            ax[i].plot(mineral, core_depth)
            ax[i].set_title(mineral_keys[i])
        ax[0].invert_yaxis()
        ax[0].set_ylabel( "core depth" )
        return ax

    def plot_core_features(self):
        """TODO"""

        minerals = self.core_df.iloc[:,1:]
        core_depth = self.core_df["core_depth"]
        mineral_keys = minerals.columns.values

        n_element = minerals.shape[1]
        fig, ax = plt.subplots(1,n_element, figsize=(18,12), sharey=True)
        for i in range( len(ax) ):
            mineral = minerals.iloc[:,i]
            ax[i].plot(mineral, core_depth)
            ax[i].set_title(mineral_keys[i])
        ax[0].invert_yaxis()
        ax[0].set_ylabel( "core depth" )
        return ax


    @classmethod
    def read_ds( cls, path_to_mineral, path_to_mask, mode="sample" ):
        """Read hyperspectral datastore"""
        ds = cls(path_to_mineral, path_to_mask)
        ds.read(mode=mode)
        return ds


## Configure

In [None]:
path_to_data    = "../../data/cmm-111C-coremask-mineralmap/"
path_to_mineral = os.path.join(path_to_data, "borehole-mineral-map")
path_to_mask    = os.path.join(path_to_data, "core-mask")

## Prototype: single core box

In [None]:
ds = HyperspecDatastore.read_ds(path_to_mineral, path_to_mask)

In [None]:
ds.metadata

In [None]:
ds.active_index

In [None]:
ds.calculate_metrics()
ax = ds.plot_box_features()

## Prototype: multiple core boxes [sample n = 5]

In [None]:
ds = HyperspecDatastore.read_ds(path_to_mineral, path_to_mask, mode="sample")


In [None]:
len(ds.minerals)

In [None]:
ds.extract_features()

In [None]:
ds.plot_core_box(ptype="minerals")

In [None]:
ax = ds.plot_core_features()

## Prototype: all core boxes

In [253]:
ds = HyperspecDatastore.read_ds(path_to_mineral, path_to_mask, mode="all")

In [254]:
len(ds.minerals)

238

In [255]:
ds.extract_features()

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237


In [246]:
ds.core_df

Unnamed: 0,core_depth,illite,illite-smectite,low-reflectance,montmorillonite,other,smectite-kaolinite,smectite-saponite
0,0.000000,,,,,,,
1,0.003870,,,,,,,
2,0.007740,,,,,,,
3,0.011610,,,,,,,
4,0.015480,,,,,,,
...,...,...,...,...,...,...,...,...
2675,47.485069,,,,,,,
2676,47.488802,,,,,,,
2677,47.492535,,,,,,,
2678,47.496267,,,,,,,


In [257]:
len(ds.core_df)

625475