In [1]:
import os
import pandas as pd
import re
import xarray as xr

In [2]:
def list_folders_with_prefix(location, prefix):
    folders_with_prefix = [folder for folder in os.listdir(location) if os.path.isdir(os.path.join(location, folder)) and folder.startswith(prefix)]
    return folders_with_prefix

In [3]:
def list_csv_files_in_folder(folder_path, keyword):
    csv_files = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith('.csv') and keyword in file]
    return csv_files

In [4]:
location = "/home/khanalp/task1/data/"
prefix = "FLX"
folders = list_folders_with_prefix(location, prefix)

In [5]:
csv_files_with_keyword = []
for folder in folders:
    folder_path = os.path.join(location, folder)
    csv_files_with_keyword.extend(list_csv_files_in_folder(folder_path, "FULLSET_HH"))

In [25]:
csv_files_with_keyword[51]

'/home/khanalp/task1/data/FLX_FR-LGt_FLUXNET2015_FULLSET_2017-2020_beta-3/FLX_FR-LGt_FLUXNET2015_FULLSET_HH_2017-2020_beta-3.csv'

In [40]:
df = pd.read_csv(csv_files_with_keyword[51])

In [41]:
print(df.columns)

Index(['TIMESTAMP_START', 'TIMESTAMP_END', 'TA_F_MDS', 'TA_F_MDS_QC', 'TA_ERA',
       'TA_F', 'TA_F_QC', 'SW_IN_POT', 'SW_IN_F_MDS', 'SW_IN_F_MDS_QC',
       ...
       'GPP_DT_CUT_SE', 'GPP_DT_CUT_05', 'GPP_DT_CUT_16', 'GPP_DT_CUT_25',
       'GPP_DT_CUT_50', 'GPP_DT_CUT_75', 'GPP_DT_CUT_84', 'GPP_DT_CUT_95',
       'RECO_SR', 'RECO_SR_N'],
      dtype='object', length=235)


In [42]:
xds = xr.Dataset.from_dataframe(df)

In [43]:
xds

In [44]:
xds.data_vars

Data variables:
    TIMESTAMP_START                 (index) int64 201701010000 ... 202012312330
    TIMESTAMP_END                   (index) int64 201701010030 ... 202101010000
    TA_F_MDS                        (index) float64 -3.415 -3.434 ... -1.625
    TA_F_MDS_QC                     (index) int64 0 0 0 0 0 0 0 ... 0 0 0 0 0 0
    TA_ERA                          (index) float64 -4.747 -4.916 ... -9.999e+03
    TA_F                            (index) float64 -3.415 -3.434 ... -1.625
    TA_F_QC                         (index) int64 0 0 0 0 0 0 0 ... 0 0 0 0 0 0
    SW_IN_POT                       (index) float64 0.0 0.0 0.0 ... 0.0 0.0 0.0
    SW_IN_F_MDS                     (index) float64 0.68 0.68 0.68 ... 0.0 0.0
    SW_IN_F_MDS_QC                  (index) int64 0 0 0 0 0 0 0 ... 0 0 0 0 0 0
    SW_IN_ERA                       (index) float64 0.0 0.0 ... -9.999e+03
    SW_IN_F                         (index) float64 0.68 0.68 0.68 ... 0.0 0.0
    SW_IN_F_QC                      

In [45]:
variables = [
    'TIMESTAMP_START',
    'TA_F',
    'TA_F_QC',
    'SW_IN_F',
    'SW_IN_F_QC',
    'LW_IN_F',
    'LW_IN_F_QC',
    'VPD_F',
    'VPD_F_QC',
    'PA_F',
    'PA_F_QC',
    'P_F',
    'P_F_QC',
    'WS_F',
    'WS_F_QC',
    'RH',
    'CO2_F_MDS',
    'CO2_F_MDS_QC' 
]


selected = xds[variables]

In [46]:
rename = {'TA_F':'Tair',
          'TA_F_QC':'Tair_qc',
          'SW_IN_F':'SWdown',
          'SW_IN_F_QC':'SWdown_qc',
          'LW_IN_F':'LWdown_qc',
          'VPD_F':'VPD',
          'VPD_F_QC':'VPD_qc',
          'PA_F':'Psurf',
          'PA_F_QC':'Psurf_qc',
          'WS_F':'Wind',
          'WS_F_QC':'Wind_qc',
          'CO2_F_MDS':'CO2air_qc',
          'CO2_F_MDS_QC':'CO2air'
         }

In [47]:
selected1 = selected.rename(rename)

In [48]:
selected1

In [49]:
selected1 = selected1.assign_coords(index=pd.to_datetime(selected1['TIMESTAMP_START'], format='%Y%m%d%H%M'))

In [50]:
selected2 = selected1.rename({'index':'time'})

In [54]:
selected2.time

In [19]:
selected3 = selected2.expand_dims({'x': [1], 'y': [2]})

In [20]:
selected4 = selected3.drop_vars('TIMESTAMP_START')

In [None]:
# Convert all data variables to float32
for var_name in selected4.data_vars:
    selected4[var_name] = selected4[var_name].astype('float32')
for coord_name in selected4.coords:
    selected4[coord_name] = selected4[coord_name].astype('float64')

In [None]:
selected4

In [None]:
Plumber_2 = xr.open_dataset("~/STEMMUSSCOPE/STEMMUS_SCOPE/input/Plumber2_data/US-Blo_2000-2006_FLUXNET2015_Met.nc")

In [None]:
Plumber_2

In [None]:
Plumber_2['IGBP_veg_long'].values

In [None]:
Plumber_2['LAI'].values.plot()

In [None]:
Plumber_2['LAI'].shape

In [None]:
squeezed_LAI = Plumber_2['Qir'].squeeze()

In [None]:
squeezed_LAI.plot()

In [None]:
Plumber_2['Qair'].squeeze().plot()

In [None]:
Plumber_2['VPD'].squeeze().plot()