In [1]:
%load_ext autoreload
%autoreload 2
import additive.utility as ut
from additive.features import Features
from dask import compute, delayed
import dask.bag as db
import matplotlib.pyplot as plt
import additive.feature_functions_v02 as ff
import numpy as np
import pandas as pd
import joblib
import glob
from functools import partial

In [2]:
data_dir = "../data/experiment_03/"
files = pd.Series(glob.glob(data_dir+"*info"), name='files')

In [3]:
file_info = ut.get_file_info(files)

In [4]:
as_built_files = file_info[~file_info['ispolished'] & file_info['specimen'].isin({'V12', 'V14', 'V18'})]
polished_files = file_info[file_info['ispolished'] & file_info['specimen'].isin({'V11', 'V13', 'V17'})]

In [5]:
chosen_file_info = ut.pd.concat([as_built_files, polished_files])
chosen_file_info.head(3)

Unnamed: 0,ispolished,specimen,T,RL,files
3,False,V14,T1,R,../data/experiment_03/V14_T1_Right(Top)_500X_3...
25,False,V12,T2,L,../data/experiment_03/V12_T2_Left(Bottom)_500X...
59,False,V18,T1,L,../data/experiment_03/V18_T1_Left(Bottom)_500X...


In [6]:
def get_features(img, feature_funs):
    return {func_name: func(img) for func_name, func in feature_funs.items()}

def resize_image(img, slices):
    return(img[slices])

def load_img(file):
    from additive.features import Features
    return np.array(joblib.load(file)['value'].x)

def aggregator_max(x):
    return np.sort(x.reshape(-1))[-10:].mean()

def aggregator_min(x):
    return np.sort(x.reshape(-1))[:10].mean()

In [7]:
features_min = ff.Features(aggregator_min)
features_max = ff.Features(aggregator_max)

In [8]:
features_all = {k+"_min_10_avg": v for k, v in features_min.functions.items()}
features_max = {k+"_max_10_avg": v for k, v in features_max.functions.items()}
features_all.update(features_max)

In [9]:
slices = (slice(2000, -2000), slice(2000, -2000))
features_d = db.from_sequence(chosen_file_info.files)\
    .map(load_img)\
    .map(resize_image, slices=slices)\
    .map(get_features, feature_funs=features_all)

In [10]:
features = features_d.compute()

In [12]:
feature_df = pd.DataFrame(features)
feature_df['files'] = chosen_file_info.files.values
features_df = chosen_file_info.merge(feature_df, on='files')

In [18]:
features_df.to_csv("../data/min_and_max_10_1d_params.csv")