# Continnum Removal

## Usage

Update the constants below to work with your file structure

HYPERSPECRAL_VECTOR_CSV_PATH this should be the path to a csv file cointaining the hyperspectral data to be transformed into continuum removal data (following all pre-precessing such as atmospheric water vapour removal).

WAVELENGTHS_CSV_PATH this should be the path to a csv file containing the wavelengths included in the hyperspectral data.

OUTPUT_CSV_PATH this will be where the generated data is placed.

In [None]:
HYPERSPECRAL_VECTOR_CSV_PATH = 'preprocessed_data/site_with_hyperspectral_data.csv'
WAVELENGTHS_CSV_PATH = 'preprocessed_data/wavelength_data.csv'
OUTPUT_CSV_PATH = 'transformed_data/continuum_removal.csv'

Create continuum removal for the classes

In [5]:
import pandas as pd
import geopandas
data = pd.read_csv(HYPERSPECRAL_VECTOR_CSV_PATH) 

data['geometry'] = geopandas.GeoSeries.from_wkt(data['geometry'])

data_gdf = geopandas.GeoDataFrame(data, geometry='geometry')

data_gdf

Unnamed: 0.1,Unnamed: 0,band1,band2,band3,band4,band5,band6,band7,band8,band9,...,band352,band353,band354,band355,band356,band357,band358,POINTID,geometry,VALUE
0,0,-0.000152,0.000986,0.004008,0.005472,0.005756,0.007789,0.009954,0.011664,0.013060,...,0.065499,0.062266,0.063747,0.060986,0.059916,0.061251,0.058821,1,POINT (448210.950 6481848.324),-0.000152
1,1,0.000510,0.000955,0.004493,0.004698,0.006199,0.008139,0.010138,0.011971,0.014497,...,0.062596,0.063473,0.060694,0.062735,0.062540,0.061148,0.063097,2,POINT (448216.050 6481848.324),0.000510
2,2,0.000565,0.001765,0.003405,0.005407,0.005677,0.008092,0.010280,0.011298,0.014343,...,0.068078,0.066012,0.062762,0.061232,0.066458,0.060376,0.063481,3,POINT (448221.150 6481848.324),0.000565
3,3,0.000197,0.001537,0.003782,0.005053,0.006393,0.007700,0.010634,0.011635,0.013751,...,0.065658,0.066481,0.064339,0.062169,0.064833,0.062855,0.061849,4,POINT (448226.250 6481848.324),0.000197
4,4,0.000440,0.001375,0.003630,0.005567,0.006266,0.008108,0.010253,0.010777,0.013712,...,0.063027,0.059421,0.060017,0.062054,0.061573,0.059307,0.060271,5,POINT (448231.350 6481848.324),0.000440
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38803,38803,0.001394,0.002932,0.004483,0.004704,0.005632,0.008030,0.010003,0.011651,0.013787,...,0.079240,0.080583,0.077058,0.078510,0.079759,0.077709,0.074454,38804,POINT (449185.050 6480843.624),0.001394
38804,38804,0.001595,0.003533,0.005756,0.004066,0.005633,0.008298,0.010117,0.011662,0.014212,...,0.079868,0.079635,0.077944,0.079044,0.077693,0.078720,0.076663,38805,POINT (449190.150 6480843.624),0.001595
38805,38805,0.003444,0.002441,0.004458,0.004978,0.005722,0.008175,0.010115,0.011722,0.013806,...,0.082580,0.078714,0.078940,0.081538,0.078073,0.077943,0.078941,38806,POINT (449195.250 6480843.624),0.003444
38806,38806,0.003120,-0.000287,0.004544,0.004693,0.005794,0.008127,0.010116,0.011791,0.013700,...,0.084511,0.081187,0.081680,0.083110,0.081314,0.081436,0.078879,38807,POINT (449200.350 6480843.624),0.003120


In [6]:
wavelengths = pd.read_csv(WAVELENGTHS_CSV_PATH)

bdna = pd.read_csv(WAVELENGTHS_CSV_PATH)

sample = 0

count_row = data_gdf.shape[0]


Increment = (0.00375-0.00112)/358


for num in range(count_row):
    absorption_all_cols = data_gdf.loc[[sample]]
    absorption = absorption_all_cols.iloc[:, 1:359]
    original_spectra = absorption.transpose()
    original_spectra = original_spectra.rename(columns={sample: 'Original Spectra'})
    # original_spectra = original_spectra.rename(index={0:357}) # want to rename to match the other dataframe
    original_spectra = original_spectra.reset_index()
    # contimuum_removal
    continuum_removal = pd.concat([wavelengths,original_spectra], axis=1)
    continuum_removal["continuum"] = ""
    first_original_reflectance_value = continuum_removal.iloc[0]["Original Spectra"]
    for index, row in continuum_removal.iterrows():
        continuum_removal.iloc[index, 3] = first_original_reflectance_value + (Increment * (index + 1))
    continuum_removal["continuum removed"] = continuum_removal["Original Spectra"] / continuum_removal["continuum"]
    continuum_removal["band depth"] = 1 - continuum_removal["continuum removed"]
    sum_band_depth = continuum_removal["band depth"].sum()
    continuum_removal["normalised band depth"] = continuum_removal["band depth"] / sum_band_depth
    sum_continuum_removed = continuum_removal["continuum removed"].sum()
    continuum_removal["bdna"] = continuum_removal["continuum removed"] / sum_continuum_removed
    # name the table and add to two additional tables - a first derivative one and a second derivative one
    # need to improve naming of columns to include PFT and number
    bdna["sample"] = continuum_removal[["bdna"]]
    bdna = bdna.rename(columns = {"sample": sample})
    sample += 1


bdna



Unnamed: 0,Wavelength,0,1,2,3,4,5,6,7,8,...,38798,38799,38800,38801,38802,38803,38804,38805,38806,38807
0,377.071821,0.000012,0.000024,0.000024,0.000018,0.000024,0.000028,0.000022,0.00002,0.000019,...,0.000027,0.000052,0.000064,0.000052,0.000035,0.000034,0.000037,0.000062,0.000057,0.000037
1,382.081821,-0.000079,0.000044,0.000075,0.000137,0.000073,0.000063,0.000074,0.000084,0.000089,...,0.000077,0.00005,0.000053,0.000038,0.000023,0.000071,0.000082,0.000044,-0.000005,-0.000005
2,387.091821,-0.000341,0.000204,0.000144,0.000326,0.00019,0.000136,0.000166,0.000181,0.000188,...,0.000128,0.00007,0.000068,0.000074,0.000079,0.000109,0.000132,0.00008,0.000083,0.000094
3,392.101821,-0.000493,0.00021,0.000225,0.000422,0.000287,0.000175,0.000221,0.00023,0.000242,...,0.000137,0.000093,0.000084,0.000094,0.000107,0.000114,0.000093,0.00009,0.000086,0.000097
4,397.101821,-0.000552,0.000273,0.000234,0.000517,0.000318,0.000206,0.000255,0.000257,0.000266,...,0.000179,0.000108,0.000096,0.000106,0.000137,0.000135,0.000128,0.000103,0.000106,0.000132
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
353,2375.531821,0.000287,0.00047,0.000492,0.000434,0.000478,0.000516,0.000435,0.00043,0.000428,...,0.000728,0.00086,0.000885,0.000769,0.000673,0.000663,0.00069,0.000816,0.000822,0.000764
354,2380.541821,0.000274,0.000485,0.000478,0.000418,0.000493,0.000516,0.000405,0.000427,0.000428,...,0.000721,0.000891,0.000888,0.000771,0.000675,0.000674,0.000699,0.000842,0.000836,0.000792
355,2385.551821,0.000268,0.000482,0.000518,0.000435,0.000488,0.000496,0.000432,0.000439,0.000423,...,0.000734,0.000853,0.00091,0.000792,0.000643,0.000683,0.000685,0.000806,0.000817,0.000753
356,2390.561821,0.000273,0.00047,0.00047,0.000421,0.000469,0.000498,0.000418,0.000421,0.000423,...,0.000702,0.000849,0.000848,0.00076,0.000667,0.000664,0.000693,0.000803,0.000817,0.000785


In [7]:
bdna_transposed = bdna.transpose()
bdna_transposed = bdna_transposed.drop('Wavelength')
bdna_transposed

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,348,349,350,351,352,353,354,355,356,357
0,0.000012,-0.000079,-0.000341,-0.000493,-0.000552,-0.000798,-0.001095,-0.001384,-0.001683,-0.002103,...,0.000293,0.000291,0.000279,0.000296,0.000281,0.000287,0.000274,0.000268,0.000273,0.000262
1,0.000024,0.000044,0.000204,0.00021,0.000273,0.000354,0.000436,0.000508,0.000607,0.00064,...,0.000504,0.000504,0.000486,0.000487,0.000493,0.00047,0.000485,0.000482,0.00047,0.000484
2,0.000024,0.000075,0.000144,0.000225,0.000234,0.000329,0.000413,0.000449,0.000563,0.00062,...,0.000517,0.000519,0.000509,0.000536,0.000518,0.000492,0.000478,0.000518,0.00047,0.000493
3,0.000018,0.000137,0.000326,0.000422,0.000517,0.000603,0.000808,0.000859,0.000987,0.001086,...,0.000446,0.000451,0.000444,0.000445,0.00045,0.000434,0.000418,0.000435,0.000421,0.000413
4,0.000024,0.000073,0.00019,0.000287,0.000318,0.000406,0.000505,0.000523,0.000656,0.000689,...,0.000515,0.00051,0.000502,0.000504,0.000474,0.000478,0.000493,0.000488,0.000469,0.000475
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38803,0.000034,0.000071,0.000109,0.000114,0.000135,0.000192,0.000238,0.000275,0.000324,0.000376,...,0.000698,0.000706,0.000691,0.000684,0.000694,0.000663,0.000674,0.000683,0.000664,0.000635
38804,0.000037,0.000082,0.000132,0.000093,0.000128,0.000188,0.000228,0.000262,0.000318,0.000363,...,0.00072,0.000717,0.000712,0.00071,0.000706,0.00069,0.000699,0.000685,0.000693,0.000674
38805,0.000062,0.000044,0.00008,0.00009,0.000103,0.000147,0.000181,0.000209,0.000246,0.000283,...,0.000841,0.000837,0.000815,0.000856,0.000815,0.000816,0.000842,0.000806,0.000803,0.000813
38806,0.000057,-0.000005,0.000083,0.000086,0.000106,0.000148,0.000184,0.000214,0.000248,0.000287,...,0.000865,0.000843,0.000831,0.000853,0.000818,0.000822,0.000836,0.000817,0.000817,0.00079


In [8]:
# pft = data_gdf[['PFT']]
geometry = data_gdf[['geometry']]
# bdna_transposed['PFT'] = pft
bdna_transposed['geometry'] = geometry

bdna_transposed.to_csv(OUTPUT_CSV_PATH)