In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
import numpy as np
np.__version__

In [None]:
import thecannon as tc
tc.__version__

In [None]:
# Read in the training labels and model spectra
# These are 1000 samples of labels 'EWT','LMA','N','CHL','CAR','ANT'
# used to modelled spectra at wavelengths 400..1..2500nm with the PROSPECT code
LUC_labels  = pd.read_csv('data/PROSPECT_input.csv')
LUC_spectra = pd.read_csv('data/PROSPECT_LUT.csv')

In [None]:
# Prepare input for a complete and restrictred Cannon model
prospect_wavelength = np.arange(400,2501,1)

labels = ['EWT','LMA','N','CHL','CAR','ANT']
prospect_labels = np.array(LUC_labels[labels])

labels_restricted = ['EWT','LMA']
prospect_labels_restricted = np.array(LUC_labels[labels_restricted])

prospect_spectra = np.array(LUC_spectra).T
prospect_spectra_ivar = (100./prospect_spectra)**2.0 # SNR 100

# Task 1: Visualise your data. How are labels distributed and how do spectra look like?

In [None]:
# Visualise the labels

In [None]:
# Visualise the spectra

# Task 2: Train a quadratic model that can generate fit labels when testing new spectra

In [None]:
# Initialise The Cannon
prospect_model = tc.CannonModel(
    prospect_labels,
    prospect_spectra, prospect_spectra_ivar,
    vectorizer=tc.vectorizer.PolynomialVectorizer(list(labels), 2),dispersion=prospect_wavelength)

In [None]:
# Train and save The Cannon
prospect_theta, prospect_s2, prospect_metadata = prospect_model.train(threads=1)

In [None]:
# If we want to, we can save the model to file and read it back in
try:
    prospect_model.write('data/prospect_model.model',overwrite=True)
    prospect_model = tc.CannonModel.read('data/prospect_model.model')
except:
    print("Could not write/read model file")

# Task 3: Visualise the linear coeffecients of the quadratic model

In [None]:
# Let's first have a look at the model coefficients
print(np.shape(prospect_theta))
prospect_theta

In [None]:
# Now let's visualise the linear coeffecients of the quadratic model

plt.figure(figsize=(12,5))
# for i in range(len(labels)):
#     plt.plot(...)
# plt.legend()
plt.xlabel('Wavelength [nm]',fontsize=15)
plt.ylabel('Linear coeffecient',fontsize=15)
plt.show()

# Task 4: Can we actually recover the labels for the training set?

In [None]:
prospect_test_labels, prospect_test_cov, prospect_metadata = prospect_model.test(prospect_spectra, prospect_spectra_ivar)

In [None]:
# Let's plot how the INPUT labels compare to the OUTPUT labels
def plot_comparison(label, prospect_labels, prospect_test_labels, plot_mean_and_std=False):
    """ Plot comparison of INPUT vs OUTPUT labels from The Cannon model """

    # Identify the index of the label we want to plot
    index = labels.index(label)

    # Set up the figure with 2 panels (one for label comparison, one for residuals)
    f, gs = plt.subplots(2,1, figsize=(6,6), gridspec_kw={'height_ratios': [3,1]}, sharex=True)

    # First panel: INPUT vs OUTPUT
    ax = gs[0]
    ax.set_ylabel('Output '+labels[index],fontsize=15)

    # Plot the comparison
    ax.plot(prospect_labels[:,index],prospect_test_labels[:,index],'k.')

    # Let's set the limits to the min/max of either INPUT or OUTPUT
    min_val = np.min([np.min(prospect_labels[:,index]),np.min(prospect_test_labels[:,index])])
    max_val = np.max([np.max(prospect_labels[:,index]),np.max(prospect_test_labels[:,index])])
    min_val_margin = min_val - 0.1*(max_val-min_val)
    max_val_margin = max_val + 0.1*(max_val-min_val)

    # Set the limits and plot the 1:1 line, which should be diagonal now
    ax.set_xlim(min_val_margin,max_val_margin)
    ax.set_ylim(min_val_margin, max_val_margin)
    ax.plot([min_val_margin,max_val_margin],[min_val_margin,max_val_margin],'r-')

    if plot_mean_and_std:
        raise NotImplementedError("You need to implement the mean and std plotting")

    # Second panel: Residuals
    ax = gs[1]
    ax.set_xlabel('Input '+labels[index],fontsize=15)
    ax.set_ylabel('Output - Input '+labels[index],fontsize=15)

    # Plot the residuals
    ax.plot(prospect_labels[:,index],prospect_test_labels[:,index]-prospect_labels[:,index],'k.')
    ax.axhline(0, color='r', ls='-')
    ax.set_xlim(min_val_margin,max_val_margin)

    plt.show()
    plt.close()

## Question: Which of the 2 panels do you find more informative and why?

In [None]:
for label in labels:
    plot_comparison(label, prospect_labels, prospect_test_labels)

# Task 6: How well can we recover the labels?

Adjust the function `plot_comparison(label, prospect_labels, prospect_test_labels)` to that it also plot the mean and standard deviation of the residuals.

Use the part of the function that is activated with the keyword `plot_mean_and_std`!

In [None]:
for label in labels:
    plot_comparison(label, prospect_labels, prospect_test_labels, plot_mean_and_std=True)

## Task 7: If there is time: Try to only predict EWT and LMA (these are the two most important measurements of the spectra)

In [None]:
labels_restricted = ['EWT','LMA']
prospect_labels_restricted = np.array(LUC_labels[labels_restricted])

prospect_spectra = np.array(LUC_spectra).T
prospect_spectra_ivar = (100./prospect_spectra)**2.0 # SNR 100

### How well can we recover the labels if we only use EWT and LMA to model the spectra?

In [None]:
# You basically want to repeat the steps above, but only using the restricted labels
# Initialise The Cannon
# Train The Cannon
# Test The Cannon

In [None]:
# To quantify how well you did, you can use the function above to compare INPUT and OUTPUT labels
# We basically want to see how well we can recover the labels with only 2 labels instead of 6

# Plot the 1:1 plots first

In [None]:
# Then compute the mean and std of the residuals for both the 6-label and the 2-label model and compare them for EWT and LMA