In [245]:
import scipy
import scipy.io as sio
import matplotlib.pyplot as plt
import numpy
import pandas
import sklearn
from sklearn import preprocessing
from sklearn import linear_model
import tensorflow
import functools

In [247]:
untrimmedCollection = pandas.read_csv('untrimmed_image_samples_3.csv')
X = untrimmedCollection.drop(labels='LAI', axis=1)
y = untrimmedCollection.LAI

In [248]:
LAImodel = sklearn.linear_model.Lars(n_nonzero_coefs=57)
LAImodel = LAImodel.fit(X, y)

In [249]:
coef_path = pandas.DataFrame(LAImodel.coef_path_).set_axis(X.columns, axis='index')

In [250]:
selected_data = untrimmedCollection.iloc[:, numpy.append(numpy.nonzero(coef_path.iloc[:, 4].to_numpy())[0], 57)]

In [251]:
sorted_data = selected_data.sort_values(by="LAI", axis=0, ascending=True, ignore_index=True)
partitioned_data = numpy.array_split(sorted_data, 10)

In [261]:
def trimmer(partition):
    def trim_one_feature(column):
        trim_ready_data = partition.join(pandas.qcut(x=partition[column], q=[0, .05, .95, 1], labels=False).rename('partition'))
        trimmed_data = trim_ready_data[trim_ready_data.partition.eq(1)].drop(labels=['partition'], axis=1)
        return trimmed_data
    trimmed_features = list(map(trim_one_feature, partition.columns))
    trimmed_partition = functools.reduce(lambda left, right: pandas.merge(left,right,how='inner',left_index=True,right_index=True,suffixes=('','_')), trimmed_features)
    return trimmed_partition[partition.columns]

In [262]:
trimmed_data_list = list(map(trimmer, partitioned_data))
trimmed_data = pandas.concat(objs=trimmed_data_list, axis=0, ignore_index=True)

In [263]:
trimmed_data

Unnamed: 0,B8A,B9,OSAVI,MSR2,LAI
0,-0.985756,-2.060903,5.423218,6.106368,-1.424587
1,-1.062536,-1.995054,5.605429,6.400157,-1.417822
2,-0.657395,-1.793846,5.119102,5.635390,-1.403705
3,-0.806056,-1.899937,5.409345,6.084401,-1.399990
4,-0.484230,-1.543252,5.449214,6.147677,-1.396707
...,...,...,...,...,...
10658,2.376262,3.132069,-0.054041,-0.090165,2.028966
10659,2.239037,2.577835,0.080219,0.023198,2.029139
10660,2.399133,2.491865,-0.401369,-0.377721,2.035510
10661,2.108346,2.910741,0.083915,0.026348,2.035813


In [264]:
trimmedCollection = pandas.read_csv('trimmed_image_samples_3.csv')
X = trimmedCollection.drop(labels='LAI', axis=1)
y = trimmedCollection.LAI

In [265]:
trimmedCollection.sort_values(by="LAI", axis=0, ascending=True, ignore_index=True)

Unnamed: 0,B9,MSR2,B8A,OSAVI,LAI
0,-1.020114,6.241225,0.165956,5.507624,-1.472166
1,-2.172482,6.392183,-1.002092,5.600574,-1.466403
2,-1.199371,5.891581,-0.069287,5.286496,-1.457319
3,-2.119436,6.008734,-0.913876,5.361394,-1.439646
4,-1.830430,6.527512,-0.709672,5.682799,-1.435789
...,...,...,...,...,...
11767,3.007686,-0.245630,2.261908,-0.240650,2.342471
11768,3.309497,0.175701,2.516754,0.258473,2.362708
11769,2.912570,0.257110,2.734028,0.352566,2.379672
11770,2.056526,-0.774353,2.466112,-0.896879,2.420034


In [40]:
LAImodel = sklearn.linear_model.Lars(n_nonzero_coefs=10)
LAImodel = LAImodel.fit(X, y)

In [41]:
print('LAI Coefficient Path')
pandas.DataFrame(LAImodel.coef_path_).set_axis(X.columns, axis='index').loc[:, 0:10]

LAI Coefficient Path


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
B9,0.0,0.605589,12.627968,12.762882,13.443108,13.236336,9.632882,9.632964,9.632976,9.175789,8.613272
SR3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
B3,0.0,0.0,0.0,0.0,0.534894,0.549098,-0.388218,-0.388468,-0.38855,-0.969067,-1.625478
B8A,0.0,0.0,12.022379,12.01572,11.092492,11.329185,17.402884,17.403289,17.403434,18.594065,20.066707
TCARI,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.745057
MTVI2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
MCARI,0.0,0.0,0.0,0.0,0.0,0.0,-5.671342,-5.672844,-5.673325,-7.527478,-9.07172
B13,0.0,0.0,0.0,-0.130319,-0.474696,-0.519893,-1.252486,-1.252513,-1.252523,-1.283124,-1.319128
B12,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1e-05,-0.032949,-0.076173
NLI,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
