**Script Description** This script loads a pre-processed dataset, converts the units of NEE variables and computes the nighttime, daytime. 

**File Name:** 01_03_Compute_Nighttime_NEE.ipynb

**Date:** 2025

**Created by:** Rob Alamgir

#### Import the relevant packages

In [1]:
import os
import pandas as pd
import numpy as np
import seaborn as sns

#### Import and pre-process the dataset

In [2]:
#Load and preprocess data
data_path = "C:/Data_MSc_Thesis/Pre_Processed_Data_Final/Pre_Processed_Data_All_Locations_V2.csv"
complete_dataset = pd.read_csv(data_path, low_memory=False)
complete_dataset['datetime'] = pd.to_datetime(complete_dataset['datetime'], errors='coerce')
complete_dataset['DOY'] = pd.to_numeric(complete_dataset['DOY'], errors='coerce')
complete_dataset['daytime'] = pd.to_numeric(complete_dataset['daytime'], errors='coerce')
complete_dataset['date'] = complete_dataset['datetime'].dt.date    # Extract date for grouping

In [None]:
#print(complete_dataset.info()) 
#complete_dataset.head(50)
#complete_dataset.tail(100)

#### Convert the units of the variables from [μmol m⁻² s⁻¹ to kg day⁻¹ ha⁻]

In [3]:
# Define conversion factors
CO2_conversion_factor = 38.016
CH4_conversion_factor = 13.824

columns_to_convert = ['NEE_CO2', 'NEE_CH4', 'GPP_NT', 'RECO_NT']    # Relevant columns for computation

# Function to calculate daily means for selected columns
def compute_daily_means(df, columns):
    return df.groupby(['Source', 'date'])[columns].mean(numeric_only=True).reset_index()

# Function to calculate weekly means for selected columns
def compute_weekly_means(df, columns):
    return (df.set_index('datetime')
              .groupby('Source')[columns]
              .resample('W', label='left')
              .mean(numeric_only=True)
              .reset_index())

# Function to apply conversion factors
def apply_conversions(df):
    df = df.copy()
    if 'NEE_CO2' in df.columns:
        df['NEE_CO2_kg_day_ha'] = df['NEE_CO2'] * CO2_conversion_factor
    if 'NEE_CH4' in df.columns:
        df['NEE_CH4_kg_day_ha'] = df['NEE_CH4'] * CH4_conversion_factor
    if 'GPP_NT' in df.columns:
        df['GPP_NT_kg_day_ha'] = df['GPP_NT'] * CO2_conversion_factor
    if 'RECO_NT' in df.columns:
        df['RECO_NT_kg_day_ha'] = df['RECO_NT'] * CO2_conversion_factor
    return df

# Compute daily means for relevant columns
Wholeday_daily_means = apply_conversions(compute_daily_means(complete_dataset, columns_to_convert))
nighttime_daily_means = apply_conversions(compute_daily_means(complete_dataset[complete_dataset['daytime'] == 0], columns_to_convert))
daytime_daily_means = apply_conversions(compute_daily_means(complete_dataset[complete_dataset['daytime'] == 1], columns_to_convert))
NaN_daily_means = apply_conversions(compute_daily_means(complete_dataset[complete_dataset['daytime'].isna()], columns_to_convert))

# Compute weekly means for relevant columns
Wholeday_weekly_means = apply_conversions(compute_weekly_means(complete_dataset, columns_to_convert))
nighttime_weekly_means = apply_conversions(compute_weekly_means(complete_dataset[complete_dataset['daytime'] == 0], columns_to_convert))
daytime_weekly_means = apply_conversions(compute_weekly_means(complete_dataset[complete_dataset['daytime'] == 1], columns_to_convert))
NaN_weekly_means = apply_conversions(compute_weekly_means(complete_dataset[complete_dataset['daytime'].isna()], columns_to_convert))

#### Rename the suffixes to the specified columns

In [4]:
# Adding the suffixes to the specified columns for Daily Averages
DAv_24hrs_columns_to_rename = {
    'NEE_CO2_kg_day_ha': 'NEE_CO2_kg_day_ha_DAv_24hrs',
    'NEE_CH4_kg_day_ha': 'NEE_CH4_kg_day_ha_DAv_24hrs',
    'GPP_NT_kg_day_ha': 'GPP_NT_kg_day_ha_DAv_24hrs',
    'RECO_NT_kg_day_ha': 'RECO_NT_kg_day_ha_DAv_24hrs'}
Wholeday_daily_means.rename(columns=DAv_24hrs_columns_to_rename, inplace=True)

DAv_NT_columns_to_rename = {
    'NEE_CO2_kg_day_ha': 'NEE_CO2_kg_day_ha_DAv_NT',
    'NEE_CH4_kg_day_ha': 'NEE_CH4_kg_day_ha_DAv_NT',
    'GPP_NT_kg_day_ha': 'GPP_NT_kg_day_ha_DAv_NT',
    'RECO_NT_kg_day_ha': 'RECO_NT_kg_day_ha_DAv_NT'}
nighttime_daily_means.rename(columns=DAv_NT_columns_to_rename, inplace=True)

DAv_DT_columns_to_rename = {
    'NEE_CO2_kg_day_ha': 'NEE_CO2_kg_day_ha_DAv_DT',
    'NEE_CH4_kg_day_ha': 'NEE_CH4_kg_day_ha_DAv_DT',
    'GPP_NT_kg_day_ha': 'GPP_NT_kg_day_ha_DAv_DT',
    'RECO_NT_kg_day_ha': 'RECO_NT_kg_day_ha_DAv_DT'}
daytime_daily_means.rename(columns=DAv_DT_columns_to_rename, inplace=True)

DAv_NaN_columns_to_rename = {
    'NEE_CO2_kg_day_ha': 'NEE_CO2_kg_day_ha_DAv_NaN',
    'NEE_CH4_kg_day_ha': 'NEE_CH4_kg_day_ha_DAv_NaN',
    'GPP_NT_kg_day_ha': 'GPP_NT_kg_day_ha_DAv_NaN',
    'RECO_NT_kg_day_ha': 'RECO_NT_kg_day_ha_DAv_NaN'}
NaN_daily_means.rename(columns=DAv_NaN_columns_to_rename, inplace=True)


# Adding the suffixes to the specified columns for Weekly Averages
WAv_24hrs_columns_to_rename = {
    'NEE_CO2_kg_day_ha': 'NEE_CO2_kg_day_ha_WAv_24hrs',
    'NEE_CH4_kg_day_ha': 'NEE_CH4_kg_day_ha_WAv_24hrs',
    'GPP_NT_kg_day_ha': 'GPP_NT_kg_day_ha_WAv_24hrs',
    'RECO_NT_kg_day_ha': 'RECO_NT_kg_day_ha_WAv_24hrs',
    'datetime': 'date'
}
Wholeday_weekly_means.rename(columns=WAv_24hrs_columns_to_rename, inplace=True)

WAv_NT_columns_to_rename = {
    'NEE_CO2_kg_day_ha': 'NEE_CO2_kg_day_ha_WAv_NT',
    'NEE_CH4_kg_day_ha': 'NEE_CH4_kg_day_ha_WAv_NT',
    'GPP_NT_kg_day_ha': 'GPP_NT_kg_day_ha_WAv_NT',
    'RECO_NT_kg_day_ha': 'RECO_NT_kg_day_ha_WAv_NT',
    'datetime': 'date'
}
nighttime_weekly_means.rename(columns=WAv_NT_columns_to_rename, inplace=True)

WAv_DT_columns_to_rename = {
    'NEE_CO2_kg_day_ha': 'NEE_CO2_kg_day_ha_WAv_DT',
    'NEE_CH4_kg_day_ha': 'NEE_CH4_kg_day_ha_WAv_DT',
    'GPP_NT_kg_day_ha': 'GPP_NT_kg_day_ha_WAv_DT',
    'RECO_NT_kg_day_ha': 'RECO_NT_kg_day_ha_WAv_DT',
    'datetime': 'date'
}
daytime_weekly_means.rename(columns=WAv_DT_columns_to_rename, inplace=True)

WAv_NaN_columns_to_rename = {
    'NEE_CO2_kg_day_ha': 'NEE_CO2_kg_day_ha_WAv_NaN',
    'NEE_CH4_kg_day_ha': 'NEE_CH4_kg_day_ha_WAv_NaN',
    'GPP_NT_kg_day_ha': 'GPP_NT_kg_day_ha_WAv_NaN',
    'RECO_NT_kg_day_ha': 'RECO_NT_kg_day_ha_WAv_NaN',
    'datetime': 'date'
}
NaN_weekly_means.rename(columns=WAv_NaN_columns_to_rename, inplace=True)

In [None]:
#print("Wholeday_daily_means:\n", Wholeday_daily_means.info())
#print("nighttime_daily_means:\n", nighttime_daily_means.info())
#print("daytime_daily_means:\n", daytime_daily_means.info())
#print("NaN_daily_means:\n", NaN_daily_means.info())
#print("Wholeday_weekly_means:\n", Wholeday_weekly_means.info())
#print("nighttime_weekly_means:\n", nighttime_weekly_means.info())
#print("daytime_weekly_means:\n", daytime_weekly_means.info())
#print("NaN_weekly_means:\n", NaN_weekly_means.info())

#### Merge the new columns to the original dataframe  

In [5]:
# Convert 'Date' columns to datetime format
datasets = [Wholeday_daily_means, nighttime_daily_means, daytime_daily_means, NaN_daily_means,
            Wholeday_weekly_means, nighttime_weekly_means, daytime_weekly_means, NaN_weekly_means]

for df in datasets:
    df['date'] = pd.to_datetime(df['date'])

merge_info = [
    (Wholeday_daily_means, ['NEE_CO2_kg_day_ha_DAv_24hrs', 'NEE_CH4_kg_day_ha_DAv_24hrs', 'GPP_NT_kg_day_ha_DAv_24hrs', 'RECO_NT_kg_day_ha_DAv_24hrs']),
    (nighttime_daily_means, ['NEE_CO2_kg_day_ha_DAv_NT', 'NEE_CH4_kg_day_ha_DAv_NT', 'GPP_NT_kg_day_ha_DAv_NT', 'RECO_NT_kg_day_ha_DAv_NT']),
    (daytime_daily_means, ['NEE_CO2_kg_day_ha_DAv_DT', 'NEE_CH4_kg_day_ha_DAv_DT', 'GPP_NT_kg_day_ha_DAv_DT', 'RECO_NT_kg_day_ha_DAv_DT']),
    (NaN_daily_means, ['NEE_CO2_kg_day_ha_DAv_NaN', 'NEE_CH4_kg_day_ha_DAv_NaN', 'GPP_NT_kg_day_ha_DAv_NaN', 'RECO_NT_kg_day_ha_DAv_NaN']),
    (Wholeday_weekly_means, ['NEE_CO2_kg_day_ha_WAv_24hrs', 'NEE_CH4_kg_day_ha_WAv_24hrs', 'GPP_NT_kg_day_ha_WAv_24hrs', 'RECO_NT_kg_day_ha_WAv_24hrs']),
    (nighttime_weekly_means, ['NEE_CO2_kg_day_ha_WAv_NT', 'NEE_CH4_kg_day_ha_WAv_NT', 'GPP_NT_kg_day_ha_WAv_NT', 'RECO_NT_kg_day_ha_WAv_NT']),
    (daytime_weekly_means, ['NEE_CO2_kg_day_ha_WAv_DT', 'NEE_CH4_kg_day_ha_WAv_DT', 'GPP_NT_kg_day_ha_WAv_DT', 'RECO_NT_kg_day_ha_WAv_DT']),
    (NaN_weekly_means, ['NEE_CO2_kg_day_ha_WAv_NaN', 'NEE_CH4_kg_day_ha_WAv_NaN', 'GPP_NT_kg_day_ha_WAv_NaN', 'RECO_NT_kg_day_ha_WAv_NaN'])
]

# Convert 'date' to datetime format in all datasets
for df in [complete_dataset, Wholeday_daily_means, nighttime_daily_means, daytime_daily_means, NaN_daily_means,
           Wholeday_weekly_means, nighttime_weekly_means, daytime_weekly_means, NaN_weekly_means]:
    if df is not None and 'date' in df.columns:
        df['date'] = pd.to_datetime(df['date'], errors='coerce')  # Convert safely

# Perform the merging
for df, cols in merge_info:
    if 'date' in df.columns and 'Source' in df.columns:
        complete_dataset = complete_dataset.merge(
            df[['Source', 'date'] + [col for col in cols if col in df.columns]],
            on=['Source', 'date'], how='left')
    else:
        print(f"Skipping dataset {df} due to missing columns")

####  Perform a couple of post-processing steps

In [6]:
complete_dataset = complete_dataset.dropna(axis=1, how='all')  # drop the columns that contain only NaN values
cols = list(complete_dataset.columns)  
cols.insert(1, cols.pop(cols.index('date')))                   # Move 'date' right after 'datetime'
complete_dataset = complete_dataset[cols]

In [7]:
#print(complete_dataset.columns)
#print(complete_dataset.dtypes) 
print(complete_dataset.info(verbose=True)) 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 425308 entries, 0 to 425307
Data columns (total 72 columns):
 #   Column                       Non-Null Count   Dtype         
---  ------                       --------------   -----         
 0   datetime                     425299 non-null  datetime64[ns]
 1   date                         425299 non-null  datetime64[ns]
 2   DOY                          425191 non-null  float64       
 3   daytime                      141695 non-null  float64       
 4   Source                       425308 non-null  object        
 5   SWCT_1_005                   390173 non-null  float64       
 6   SWCT_1_015                   405102 non-null  float64       
 7   SWCT_1_025                   405250 non-null  float64       
 8   SWCT_1_035                   405140 non-null  float64       
 9   SWCT_1_045                   405447 non-null  float64       
 10  SWCT_1_055                   405289 non-null  float64       
 11  SWCT_1_065                

### Export the final dataframe to a CSV file

In [8]:
output_path = "C:/Data_MSc_Thesis/Pre_Processed_Data_Final/Pre_Processed_Data_All_Locations_V3.csv"  # Update the path as needed
complete_dataset.to_csv(output_path, index=False)

print(f"DataFrame successfully saved to {output_path}")

DataFrame successfully saved to C:/Data_MSc_Thesis/Pre_Processed_Data_Final/Pre_Processed_Data_All_Locations_V3.csv
