In [45]:
%matplotlib notebook
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import pickle 
from collections import defaultdict
import pandas as pd
from matplotlib.patches import Rectangle

In [4]:
def preprocess(data_file, calc_features=False, baseline=None):
    try:
        produce_spectra = pickle.load(open(data_file, "rb"), encoding='latin1')
    except (OSError, IOError) as e:
        print("Error loading pickled spectra!")
        produce_spectra = []

    reflectances = np.array([item[:-1] for item in produce_spectra])
    labels = np.array([int(item[-1]) for item in produce_spectra])
    
    baseline = baseline

    if calc_features:
        feature_vectors = []
        for curve in reflectances:
            div = utils.spectral_info_divergence(baseline, curve)
            corr = utils.spectral_correlation(baseline, curve)
            dist = utils.euclidean_distance(baseline, curve)
            angle = utils.spectral_angle(baseline, curve)
            feature_vectors.append([div,corr,dist,angle])

        return {'feature': np.vstack(feature_vectors), "label": labels}
    else:
        return {'feature': np.vstack(reflectances), "label": labels}

In [57]:
data = preprocess("../Formatted_Data/banana.p")
labels = data["label"]
features = data["feature"]

In [17]:
print(features.shape)
print(labels.shape)

(176, 290)
(176,)


In [35]:
def partition(labels, features):
    data_by_day = defaultdict(list)
    for i in range(len(labels)):
        data_by_day[labels[i]].append(features[i])
        
    return data_by_day

In [58]:
data_by_day = partition(labels, features)

In [59]:
for k, v in data_by_day.items():
    amps = np.hstack(v)
    #plt.hist(amps, bins="auto", label="{}".format(k))
    print("Day {0}, Len {1}".format(k, len(amps)))

amps = np.hstack(data_by_day[1])
plt.hist(amps, bins="auto", label="Day 1")

amps = np.hstack(data_by_day[2])
plt.hist(amps, bins="auto", label="Day 2")

amps = np.hstack(data_by_day[3])
plt.hist(amps, bins="auto", label="Day 3")

amps = np.hstack(data_by_day[4])
plt.hist(amps, bins="auto", label="Day 4")
plt.show()
plt.legend()
plt.title('Banana')

Day 1, Len 6670
Day 2, Len 6670
Day 3, Len 4060
Day 5, Len 6670
Day 6, Len 6670
Day 7, Len 6670
Day 9, Len 4060
Day 10, Len 1450
Day 13, Len 1450
Day 15, Len 1450
Day 8, Len 2610
Day 4, Len 2610


<IPython.core.display.Javascript object>

Text(0.5,1,'Banana')

In [53]:
data = preprocess("../Formatted_Data/potato1.p")
labels = data["label"]
features = data["feature"]

In [54]:
data_by_day = partition(labels, features)

In [56]:
for k, v in data_by_day.items():
    amps = np.hstack(v)
    #plt.hist(amps, bins="auto", label="{}".format(k))
    print("Day {0}, Len {1}".format(k, len(amps)))

amps = np.hstack(data_by_day[1])
plt.hist(amps, bins="auto", label="Day 1")

amps = np.hstack(data_by_day[2])
plt.hist(amps, bins="auto", label="Day 2")

amps = np.hstack(data_by_day[3])
plt.hist(amps, bins="auto", label="Day 3")

amps = np.hstack(data_by_day[4])
plt.hist(amps, bins="auto", label="Day 4")
plt.show()
plt.legend()
plt.title('Potato')

Day 1, Len 3480
Day 2, Len 3480
Day 3, Len 2610
Day 6, Len 3480
Day 7, Len 3480
Day 10, Len 870
Day 14, Len 870
Day 16, Len 870
Day 17, Len 870
Day 20, Len 870
Day 5, Len 2610
Day 8, Len 2610
Day 9, Len 870
Day 4, Len 1740


<IPython.core.display.Javascript object>

Text(0.5,1,'Potato')