In [1]:
%matplotlib notebook
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import pickle 
from collections import defaultdict
import pandas as pd

import utils
from image import HyperCube

In [2]:
def preprocess(data_file, calc_features=False, baseline=None):
    try:
        produce_spectra = pickle.load(open(data_file, "rb"), encoding='latin1')
    except (OSError, IOError) as e:
        print("Error loading pickled spectra!")
        produce_spectra = []

    reflectances = np.array([item[:-1] for item in produce_spectra])
    labels = np.array([int(item[-1]) for item in produce_spectra])
    
    baseline = baseline

    if calc_features:
        feature_vectors = []
        for curve in reflectances:
            div = utils.spectral_info_divergence(baseline, curve)
            corr = utils.spectral_correlation(baseline, curve)
            dist = utils.euclidean_distance(baseline, curve)
            angle = utils.spectral_angle(baseline, curve)
            feature_vectors.append([div,corr,dist,angle])

        return {'feature': np.vstack(feature_vectors), "label": labels}
    else:
        return {'feature': np.vstack(reflectances), "label": labels}

In [None]:
banana_cutoff = preprocess("Formatted_Data/banana_cutoff.p")
cutoff_reflect = banana_cutoff["feature"]

## Use model to predict age of cutoff

In [None]:
model=Sequential()
model.add(Dense(64, activation="relu",input_dim=290))
model.add(Dense(64, activation="relu"))
model.add(Dense(1, activation='linear'))
model.compile(optimizer='adam', loss='mse', metrics=['mae'])
model.load_weights("Models/banana_net.h5")

In [None]:
print(len(cutoff_reflect))

In [None]:
cutoff_age = model.predict(cutoff_reflect)

In [None]:
print(np.mean(cutoff_age))
print(np.std(cutoff_age))

## Check rate of change of similarity measures

In [None]:
mean_reflect = np.mean(cutoff_reflect, axis=0)

In [None]:
banana_over_time = preprocess("Formatted_Data/banana.p", calc_features=True, baseline=mean_reflect)

In [None]:
features = banana_over_time["feature"]
labels = banana_over_time["label"]

features_dict = {"Day": [], "Divergence": [], "Correlation": [], "Eucl_Dist": [], "Cosine_Sim": []}

for i in range(len(labels)):
    features_dict["Day"].append(labels[i])
    features_dict["Divergence"].append(features[i][0])
    features_dict["Correlation"].append(features[i][1])
    features_dict["Eucl_Dist"].append(features[i][2])
    features_dict["Cosine_Sim"].append(features[i][3])

In [None]:
df = pd.DataFrame(features_dict, columns = ["Day", "Divergence", "Correlation", "Eucl_Dist", "Cosine_Sim"])

In [4]:
df = pd.read_csv('Baseline_Similarity.csv')

In [5]:
day1 = df.loc[df['Day'] == 1]
day2 = df.loc[df['Day'] == 2]
day3 = df.loc[df['Day'] == 3]
day4 = df.loc[df['Day'] == 4]
day5 = df.loc[df['Day'] == 5]

In [6]:
day5.describe()

Unnamed: 0.1,Unnamed: 0,Day,Divergence,Correlation,Eucl_Dist,Cosine_Sim
count,23.0,23.0,23.0,23.0,23.0,23.0
mean,91.73913,5.0,0.025351,0.974693,1.791427,0.094141
std,54.498722,0.0,0.020469,0.018268,1.109068,0.044334
min,15.0,5.0,0.001594,0.938384,0.422211,0.024995
25%,68.5,5.0,0.007549,0.958874,1.06102,0.05805
50%,74.0,5.0,0.019777,0.976331,1.421763,0.089952
75%,151.5,5.0,0.039737,0.990106,2.385153,0.131923
max,157.0,5.0,0.074092,0.998216,5.370636,0.176774


In [7]:
# divergence info
divergence = {'day1': day1['Divergence'].tolist(), 'day2': day2['Divergence'].tolist(), 'day3': day3['Divergence'].tolist(),
              'day4': day4['Divergence'].tolist(), 'day5': day5['Divergence'].tolist()}

divergence_df = pd.DataFrame({ key:pd.Series(value) for key, value in divergence.items() })

# correlation info
corr = {'day1': day1['Correlation'].tolist(), 'day2': day2['Correlation'].tolist(), 'day3': day3['Correlation'].tolist(),
        'day4': day4['Correlation'].tolist(), 'day5': day5['Correlation'].tolist()}

corr_df = pd.DataFrame({ key:pd.Series(value) for key, value in corr.items() })

# euclidean distance info
dist = {'day1': day1['Eucl_Dist'].tolist(), 'day2': day2['Eucl_Dist'].tolist(), 'day3': day3['Eucl_Dist'].tolist(),
        'day4': day4['Eucl_Dist'].tolist(), 'day5': day5['Eucl_Dist'].tolist()}

dist_df = pd.DataFrame({ key:pd.Series(value) for key, value in dist.items() })

# spectral angle info
angle = {'day1': day1['Cosine_Sim'].tolist(), 'day2': day2['Cosine_Sim'].tolist(), 'day3': day3['Cosine_Sim'].tolist(),
        'day4': day4['Cosine_Sim'].tolist(), 'day5': day5['Cosine_Sim'].tolist()}

angle_df = pd.DataFrame({ key:pd.Series(value) for key, value in angle.items() })

In [12]:
angle_df.boxplot(column=['day1', 'day2', 'day3', 'day4', 'day5'], grid=False)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x1c1f6789b0>

In [None]:
day1_divergence = np.mean(day1['Divergence'].tolist())
day2_divergence = np.mean(day2['Divergence'].tolist())
day3_divergence = np.mean(day3['Divergence'].tolist())
day4_divergence = np.mean(day4['Divergence'].tolist())
day5_divergence = np.mean(day5['Divergence'].tolist())

day1_corr = np.mean(day1['Correlation'].tolist())
day2_corr = np.mean(day2['Correlation'].tolist())
day3_corr = np.mean(day3['Correlation'].tolist())
day4_corr = np.mean(day4['Correlation'].tolist())
day5_corr = np.mean(day5['Correlation'].tolist())

day1_dist = np.mean(day1['Eucl_Dist'].tolist())
day2_dist = np.mean(day2['Eucl_Dist'].tolist())
day3_dist = np.mean(day3['Eucl_Dist'].tolist())
day4_dist = np.mean(day4['Eucl_Dist'].tolist())
day5_dist = np.mean(day5['Eucl_Dist'].tolist())

day1_angle = np.mean(day1['Cosine_Sim'].tolist())
day2_angle = np.mean(day2['Cosine_Sim'].tolist())
day3_angle = np.mean(day3['Cosine_Sim'].tolist())
day4_angle = np.mean(day4['Cosine_Sim'].tolist())
day5_angle = np.mean(day5['Cosine_Sim'].tolist())

In [None]:
print(day1_divergence, day2_divergence, day3_divergence, day4_divergence, day5_divergence)

In [None]:
#plt.scatter([1,2,3,4,5],[day1_divergence, day2_divergence, day3_divergence, day4_divergence, day5_divergence])
#plt.scatter([1,2,3,4,5],[day1_corr, day2_corr, day3_corr, day4_corr, day5_corr])
#plt.scatter([1,2,3,4,5],[day1_dist, day2_dist, day3_dist, day4_dist, day5_dist])
plt.scatter([1,2,3,4,5],[day1_angle, day2_angle, day3_angle, day4_angle, day5_angle])
plt.show