In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import scipy.stats as st
import cv2
import pickle
import collections
import seaborn as sns
import glob
from os import path
from os.path import basename, splitext
import matplotlib.pyplot as plt
import joblib
from dask import delayed, compute, bag
from dask.distributed import Client, LocalCluster
from operator import methodcaller
from functools import partial
import functional_pipeline as fp

import additive.features as f
import additive.utility as ut
import additive.experimental as ex
ut.custom_matplotlib_style()

In [2]:
cluster = LocalCluster(2, threads_per_worker=1)
client = Client(cluster)

In [3]:
client

0,1
Client  Scheduler: tcp://127.0.0.1:35731  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 2  Cores: 2  Memory: 21.00 GB


In [4]:
root = f'/data/additive_project/data/raw_csvs'

# Downloading Files

In [5]:
links_and_names = pd.Series({'V17_T1_Right(Top)_500X_3D.csv': 'https://auburn.box.com/shared/static/p7dq55csjzt6aub5p2a8fs11rputur75.csv',
 'V19_T1_Left(Bottom)_500X_3D.csv': 'https://auburn.box.com/shared/static/0sqgp8kdfgehrzzduz4tdwklt58vz9jr.csv',
 'Polished_V03_T1_Left(Bottom)_500X_3D.csv': 'https://auburn.box.com/shared/static/vj5jo9jilqio6kxz0q634zcm616e2j4d.csv',
 'V14_T1_Right(Top)_500X_3D.csv': 'https://auburn.box.com/shared/static/glyd1t7agaa29widz4gv1fm3zl4b3smq.csv',
 'V16_T2_Left(Bottom)_500X_3D.csv': 'https://auburn.box.com/shared/static/4er2scf3uyivowu55jkyd7vg8xgnay0d.csv',
 'V14_T1_Left(Bottom)_500X_3D.csv': 'https://auburn.box.com/shared/static/dpdz5ypokqj71q5nvyxj1qdzh22twqtk.csv',
 'V17_T1_Left(Bottom)_500X_3D.csv': 'https://auburn.box.com/shared/static/cbjee163mi949c4e2vs53r6qohuxym5b.csv',
 'Polished_V03_T1_Right(Top)_500X_3D.csv': 'https://auburn.box.com/shared/static/ua9jm6yl1hxvamrswhz1br1g2obta371.csv',
 'V14_T2_Left(Bottom)_500X_3D.csv': 'https://auburn.box.com/shared/static/7rufyyazktvudvxc5la7uiiiycnyx4e4.csv',
 'Polished_V17_T2_Left(Bottom)_500X_3D.csv': 'https://auburn.box.com/shared/static/7afnjtu2a6wr0vqs9uzlli93chf445db.csv',
 'Polished_V13_T2_Left(Bottom)_500X_3D.csv': 'https://auburn.box.com/shared/static/nrosvmj9kuq1y4b87qjrddxiroljd0e1.csv',
 'Polished_V05_T1_Right(Top)_500X_3D.csv': 'https://auburn.box.com/shared/static/zvf4x2musg1glbjbttxsr7rfjd6j7ahj.csv',
 'V16_T1_Left(Bottom)_500X_3D.csv': 'https://auburn.box.com/shared/static/y9zcr01notwhyqw0f5eyw44de03acah6.csv',
 'Polished_V17_T1_Left(Bottom)_500X_3D.csv': 'https://auburn.box.com/shared/static/vgq595c9lv4l7oikqu1e0knx2l43i9yk.csv',
 'Polished_V15_T1_Right(Top)_500X_3D.csv': 'https://auburn.box.com/shared/static/9nbon41ddyij3z66r1t90qy5qbfdldyx.csv',
 'Polished_V09_T1_Right(Top)_500X_3D.csv': 'https://auburn.box.com/shared/static/dh4w5rqawosmfi4s5l4uhc262gcnfhvv.csv',
 'Polished_V09_T2_Left(Bottom)_500X_3D.csv': 'https://auburn.box.com/shared/static/jxlsyfnyaujeoxigqty9h6ddrn8a6587.csv',
 'V16_T2_Right(Top)_500X_3D.csv': 'https://auburn.box.com/shared/static/xubu79r40qg50agofxqgrrhbkmu4q20o.csv',
 'V19_T2_Right(Top)_500X_3D.csv': 'https://auburn.box.com/shared/static/5rk49cpir8iclaph1jyp0e5rtl6opxyr.csv',
 'V19_T2_Left(Bottom)_500X_3D.csv': 'https://auburn.box.com/shared/static/tubl0z8x82oelvhma0xow60wblpqbi91.csv',
 'Polished_V03_T2_Right(Top)_500X_3D.csv': 'https://auburn.box.com/shared/static/ajk9bkra57hq5e4vuugciwqc256pw3j2.csv',
 'Polished_V15_T1_Left(Bottom)_500X_3D.csv': 'https://auburn.box.com/shared/static/jewy8q9l0zmzb63daer7s8o4up4nzsrm.csv',
 'Polished_V11_T2_Right(Top)_500X_3D.csv': 'https://auburn.box.com/shared/static/r07qobd7trai71kbcc9q7pyjxp0be9t4.csv',
 'Polished_V13_T1_Right(Top)_500X_3D.csv': 'https://auburn.box.com/shared/static/vpeptuut6ox1h4jb7b21h82xordn11eu.csv',
 'V16_T1_Right(Top)_500X_3D.csv': 'https://auburn.box.com/shared/static/3hxlhyr9wfchoqc7xx5ac0leaog4gzte.csv',
 'Polished_V15_T2_Right(Top)_500X_3D.csv': 'https://auburn.box.com/shared/static/qxws6khps46a7btpd7a10ahv20ppdyg1.csv',
 'Polished_V17_T1_Right(Top)_500X_3D.csv': 'https://auburn.box.com/shared/static/yrmul0ftnc496vet6lxyepa7ofr8sk87.csv',
 'Polished_V17_T2_Right(Top)_500X_3D.csv': 'https://auburn.box.com/shared/static/ehiv0bp60amfl2kzmqfg9kw76tv9u2ms.csv',
 'V18_T1_Left(Bottom)_500X_3D.csv': 'https://auburn.box.com/shared/static/z8k5oi212rpcfvtulryw65mzf5t4wwah.csv',
 'Polished_V03_T2_Left(Bottom)_500X_3D.csv': 'https://auburn.box.com/shared/static/nnzc376avao9s8ozng9huudv2krfttny.csv',
 'Polished_V05_T2_Right(Top)_500X_3D.csv': 'https://auburn.box.com/shared/static/kb9gurjazk4j024dn85mh2sjwldghorn.csv',
 'Polished_V11_T1_Left(Bottom)_500X_3D.csv': 'https://auburn.box.com/shared/static/57k6masq9chzrd16hjyi37zclpvbqksc.csv',
 'Polished_V13_T1_Left(Bottom)_500X_3D.csv': 'https://auburn.box.com/shared/static/gjwtgn4phjclm0ezqtwa6o1fwe57004t.csv',
 'V13_T1_Right(Top)_500X_3D.csv': 'https://auburn.box.com/shared/static/97hlllnhy1i73wqtrgu2ed349sr3snqq.csv',
 'V18_T2_Right(Top)_500X_3D.csv': 'https://auburn.box.com/shared/static/d0jk3i5e8puzvr2svwwsjn5bicmxa565.csv',
 'V15_T1_Right(Top)_500X_3D.csv': 'https://auburn.box.com/shared/static/uetcv4wpkzc6gm2i2dl73kgvuzzr800w.csv',
 'Polished_V11_T1_Right(Top)_500X_3D.csv': 'https://auburn.box.com/shared/static/klpxzp5pi7p59yposczqkjo3bqca2jjs.csv',
 'V14_T2_Right(Top)_500X_3D.csv': 'https://auburn.box.com/shared/static/9orism7mccd1qbn74op5mtre913r9wol.csv',
 'V15_T1_Left(Bottom)_500X_3D.csv': 'https://auburn.box.com/shared/static/i2iznpq0fs7k1j9nn954zegiuin5j9p5.csv',
 'Polished_V09_T2_Right(Top)_500X_3D.csv': 'https://auburn.box.com/shared/static/ychsv6j83pctayifoz2r10ttp148641t.csv',
 'V19_T1_Right(Top)_500X_3D.csv': 'https://auburn.box.com/shared/static/upm0bnfo4rp4vvc3qidse8yj9tnfmhom.csv',
 'Polished_V11_T2_Left(Bottom)_500X_3D.csv': 'https://auburn.box.com/shared/static/tqwbmo6siu82fq604930ccg337kob1ns.csv',
 'V17_T2_Left(Bottom)_500X_3D.csv': 'https://auburn.box.com/shared/static/r4k4hac3poio6ixxq6yk4ul0p60swky1.csv',
 'Polished_V09_T1_Left(Bottom)_500X_3D.csv': 'https://auburn.box.com/shared/static/9klqplr4mjqmb1aunruf5gfhk20kkmdv.csv',
 'V18_T2_Left(Bottom)_500X_3D.csv': 'https://auburn.box.com/shared/static/vjs15fbvyc6n9ddua0zk8hg3pzkuklwy.csv',
 'Polished_V13_T2_Right(Top)_500X_3D.csv': 'https://auburn.box.com/shared/static/5wnju953hn9lhuo7wmw9fxwbbin2qmej.csv',
 'V18_T1_Right(Top)_500X_3D.csv': 'https://auburn.box.com/shared/static/jwmk6i93ft6ci3z7n0v9hl24evpudwds.csv',
 'Polished_V05_T1_Left(Bottom)_500X_3D.csv': 'https://auburn.box.com/shared/static/0jjgmenraapunatbeikr3qk3u417yovq.csv',
 'V13_T1_Left(Bottom)_500X_3D.csv': 'https://auburn.box.com/shared/static/e6odjhkmy2hn7v7ettzlyqxcl49tamhr.csv',
 'Polished_V05_T2_Left(Bottom)_500X_3D.csv': 'https://auburn.box.com/shared/static/zf2lbkbq4gmzg63iigkzxcyl5qw6y2fs.csv',
 'V13_T2_Left(Bottom)_500X_3D.csv': 'https://auburn.box.com/shared/static/nrosvmj9kuq1y4b87qjrddxiroljd0e1.csv',
 'V15_T2_Right(Top)_500X_3D.csv': 'https://auburn.box.com/shared/static/e76vn6z6ryy7v83whi1s8ljqbrwpdhbs.csv',
 'Polished_V15_T2_Left(Bottom)_500X_3D.csv': 'https://auburn.box.com/shared/static/qxws6khps46a7btpd7a10ahv20ppdyg1.csv',
 'V15_T2_Left(Bottom)_500X_3D.csv': 'https://auburn.box.com/shared/static/zg5atidig6a3r3qapbhkut6d42c7xqel.csv',
 'V17_T2_Right(Top)_500X_3D.csv': 'https://auburn.box.com/shared/static/poyd5xlte5d47edkx9q93epl64jujqfd.csv',
 'V07_T2_Right(Top)_500X_3D.csv': 'https://auburn.box.com/shared/static/9y1h6iimqod53khomzfsgeaho5a6xjj4.csv',
 'V13_T2_Right(Top)_500X_3D.csv': 'https://auburn.box.com/shared/static/5wnju953hn9lhuo7wmw9fxwbbin2qmej.csv'})

In [6]:
sample_links = links_and_names[links_and_names.index.str.startswith('V17')]

In [7]:
list(sample_links.items())

[('V17_T1_Right(Top)_500X_3D.csv',
  'https://auburn.box.com/shared/static/p7dq55csjzt6aub5p2a8fs11rputur75.csv'),
 ('V17_T1_Left(Bottom)_500X_3D.csv',
  'https://auburn.box.com/shared/static/cbjee163mi949c4e2vs53r6qohuxym5b.csv'),
 ('V17_T2_Left(Bottom)_500X_3D.csv',
  'https://auburn.box.com/shared/static/r4k4hac3poio6ixxq6yk4ul0p60swky1.csv'),
 ('V17_T2_Right(Top)_500X_3D.csv',
  'https://auburn.box.com/shared/static/poyd5xlte5d47edkx9q93epl64jujqfd.csv')]

In [8]:
for k, v in sample_links.items():
    b = ut.file_name_from_url(None, root, k)
    if path.exists(b):
        continue
    print(b)
    res = client.submit(ut.download_file, v, root, k)

# Converting csv to numpy array and save

In [9]:
files = glob.glob(root+"/*")

In [12]:
def parse_and_save(file):
    df = pd.read_csv(file).values.astype('float32')
    joblib.dump(df, file.replace('raw_csv', 'raw_array').replace('csv', 'np'))
    return file
for file in files:
    client.submit(parse_and_save, file)

['/data/additive_project/data/raw_csvs/V17_T2_Left(Bottom)_500X_3D.csv',
 '/data/additive_project/data/raw_csvs/V17_T2_Right(Top)_500X_3D.csv',
 '/data/additive_project/data/raw_csvs/V17_T1_Left(Bottom)_500X_3D.csv',
 '/data/additive_project/data/raw_csvs/V17_T1_Right(Top)_500X_3D.csv']

# Getting Features

In [11]:
new_root = '/data/additive_project/data/raw_arrays'

In [12]:
files = glob.glob(new_root+'/*')

In [13]:
from scipy.ndimage.filters import convolve
from scipy.ndimage import zoom

In [14]:
def gkern2d(kernlen=21, nsig=3):
    """Returns a 2D Gaussian kernel."""
    x = np.linspace(-nsig, nsig, kernlen+1)
    kern1d = np.diff(st.norm.cdf(x))
    kern2d = np.outer(kern1d, kern1d)
    return kern2d/kern2d.max()
k3_ = gkern2d(31, 5)
k3 = k3_ / k3_.sum()

In [15]:
def pipeline(file):
    res = fp.pipeline(file, operations=[
        joblib.load, 
        partial(zoom, zoom=1/ut.SCALE_FACTOR),
        partial(convolve, weights=k3),
        # ex.adjust_tilt,
        # ex.align_image,
        f.Features,
        methodcaller('run_all_tests')
    ])
    joblib.dump(res, file.replace('raw_arrays', 'cleaned_v01').replace('.np', '.info'))

In [16]:
result_b = bag.from_sequence(files).map(pipeline)

In [None]:
result = result_b.compute()

In [56]:
ls /data/additive_project/data/cleaned_v01/

'V17_T1_Right(Top)_500X_3D.info'  'V17_T2_Left(Bottom)_500X_3D.info'


In [None]:
!mkdir /da