#### Script Description
This script loads the complete merged dataset, and splits the dataset to two seasonal periods.

*File Name:* 01_10_Seasonal_Splitting.ipynb

*Date:* 2025

*Created by:* Rob Alamgir  

*Version:* 1.0

*References:*

#### Import the relevant packages

In [1]:
import pandas as pd
import numpy as np

#### Step 1: Load Data

In [2]:
data_path = "C:/Data_MSc_Thesis/Pre_Processed_Data_Final/Pre_Processed_Data_All_Locations_V6.csv"
complete_dataset = pd.read_csv(data_path, low_memory=False)
complete_dataset['Date'] = pd.to_datetime(complete_dataset['Date']) # Convert 'date' column to datetime format
print(complete_dataset.info()) 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 36275 entries, 0 to 36274
Data columns (total 95 columns):
 #   Column                       Non-Null Count  Dtype         
---  ------                       --------------  -----         
 0   Date                         36275 non-null  datetime64[ns]
 1   DOY                          8873 non-null   float64       
 2   Site_ID                      36275 non-null  object        
 3   year_month                   8873 non-null   object        
 4   S1_VSM                       17022 non-null  float64       
 5   S1_Backscatter               17022 non-null  float64       
 6   Planet_SWC                   35314 non-null  float64       
 7   Available_soil_storage_mm    12031 non-null  float64       
 8   S2_NDVI                      4260 non-null   float64       
 9   S2_EVI                       4260 non-null   float64       
 10  S2_NDMI                      4260 non-null   float64       
 11  L8_9_LST                     1565 non-nul

In [3]:
# Ensure 'Date' column is in datetime format
complete_dataset['Date'] = pd.to_datetime(complete_dataset['Date'])

# Define seasons
complete_dataset['Season'] = complete_dataset['Date'].dt.month.map(
    lambda x: 'Wet' if x in [10, 11, 12, 1, 2, 3] else 'Dry')

# Compute hydrological balance (Rainfall - ET)
complete_dataset['Hydro_Balance'] = complete_dataset['RAIN_f'] - complete_dataset['ET']

# Classify into deficit (negative) or surplus (positive)
complete_dataset['Hydro_Status'] = complete_dataset['Hydro_Balance'].apply(
    lambda x: 'Deficit' if x < 0 else 'Surplus')

In [4]:
# Check updated dataframe
#complete_dataset.head(10)
print(complete_dataset.info()) 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 36275 entries, 0 to 36274
Data columns (total 98 columns):
 #   Column                       Non-Null Count  Dtype         
---  ------                       --------------  -----         
 0   Date                         36275 non-null  datetime64[ns]
 1   DOY                          8873 non-null   float64       
 2   Site_ID                      36275 non-null  object        
 3   year_month                   8873 non-null   object        
 4   S1_VSM                       17022 non-null  float64       
 5   S1_Backscatter               17022 non-null  float64       
 6   Planet_SWC                   35314 non-null  float64       
 7   Available_soil_storage_mm    12031 non-null  float64       
 8   S2_NDVI                      4260 non-null   float64       
 9   S2_EVI                       4260 non-null   float64       
 10  S2_NDMI                      4260 non-null   float64       
 11  L8_9_LST                     1565 non-nul

In [5]:
# Split the dataset based on the 'Season' column
Hydro_Wet_Season = complete_dataset[complete_dataset['Season'] == 'Wet'].copy()
Hydro_Dry_Season = complete_dataset[complete_dataset['Season'] == 'Dry'].copy()

# Display the number of rows in each dataframe
print(f"Hydro_Wet_Season: {Hydro_Wet_Season.shape[0]} rows")
print(f"Hydro_Dry_Season: {Hydro_Dry_Season.shape[0]} rows")

Hydro_Wet_Season: 17657 rows
Hydro_Dry_Season: 18618 rows


#### Export the datasets

In [6]:
# Define file paths for export
Hydro_Wet_Season_data_path = "C:/Data_MSc_Thesis/Pre_Processed_Data_Final/Pre_Processed_Data_All_Locations_Updated_V6_Wet.csv"
Hydro_Dry_Season_data_path = "C:/Data_MSc_Thesis/Pre_Processed_Data_Final/Pre_Processed_Data_All_Locations_Updated_V6_Dry.csv"

# Export summer and winter data to CSV
Hydro_Wet_Season.to_csv(Hydro_Wet_Season_data_path, index=False)
Hydro_Dry_Season.to_csv(Hydro_Dry_Season_data_path, index=False)

print(f"Hydro_Wet_Season dataset exported to {Hydro_Wet_Season_data_path}.")
print(f"Hydro_Dry_Season dataset exported to {Hydro_Dry_Season_data_path}.")

Hydro_Wet_Season dataset exported to C:/Data_MSc_Thesis/Pre_Processed_Data_Final/Pre_Processed_Data_All_Locations_Updated_V6_Wet.csv.
Hydro_Dry_Season dataset exported to C:/Data_MSc_Thesis/Pre_Processed_Data_Final/Pre_Processed_Data_All_Locations_Updated_V6_Dry.csv.
