**Script Description:** This script loads the complete merged dataset, and splits the dataset to two seasonal periods.

**File Name:** 01_11_Seasonal_Splitting.ipynb

**Date:** 2025

**Created by:** Rob Alamgir  

#### Import the relevant packages

In [1]:
import pandas as pd
import numpy as np

#### Step 1: Load Data

In [2]:
data_path = "C:/Data_MSc_Thesis/Pre_Processed_Data_Final/Pre_Processed_Data_All_Locations_V6.csv"
complete_dataset = pd.read_csv(data_path, low_memory=False)
complete_dataset['Date'] = pd.to_datetime(complete_dataset['Date']) # Convert 'date' column to datetime format
print(complete_dataset.info()) 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 36056 entries, 0 to 36055
Columns: 109 entries, Date to week_number
dtypes: datetime64[ns](1), float64(104), int64(2), object(2)
memory usage: 30.0+ MB
None


In [3]:
# Ensure 'Date' column is in datetime format
complete_dataset['Date'] = pd.to_datetime(complete_dataset['Date'])

# Define seasons
complete_dataset['Season'] = complete_dataset['Date'].dt.month.map(
    lambda x: 'Wet' if x in [10, 11, 12, 1, 2, 3] else 'Dry')

# Compute hydrological balance (Rainfall - ET)
complete_dataset['Hydro_Balance'] = complete_dataset['RAIN_f'] - complete_dataset['ET']

# Classify into deficit (negative) or surplus (positive)
complete_dataset['Hydro_Status'] = complete_dataset['Hydro_Balance'].apply(
    lambda x: 'Deficit' if x < 0 else 'Surplus')

In [4]:
# Check updated dataframe
#complete_dataset.head(10)
print(complete_dataset.info()) 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 36056 entries, 0 to 36055
Columns: 112 entries, Date to Hydro_Status
dtypes: datetime64[ns](1), float64(105), int64(2), object(4)
memory usage: 30.8+ MB
None


In [5]:
# Split the dataset based on the 'Season' column
Hydro_Wet_Season = complete_dataset[complete_dataset['Season'] == 'Wet'].copy()
Hydro_Dry_Season = complete_dataset[complete_dataset['Season'] == 'Dry'].copy()

# Display the number of rows in each dataframe
print(f"Hydro_Wet_Season: {Hydro_Wet_Season.shape[0]} rows")
print(f"Hydro_Dry_Season: {Hydro_Dry_Season.shape[0]} rows")

Hydro_Wet_Season: 17559 rows
Hydro_Dry_Season: 18497 rows


#### Export the datasets

In [6]:
# Define file paths for export
Hydro_Wet_Season_data_path = "C:/Data_MSc_Thesis/Pre_Processed_Data_Final/Pre_Processed_Data_All_Locations_Updated_V6_Wet.csv"
Hydro_Dry_Season_data_path = "C:/Data_MSc_Thesis/Pre_Processed_Data_Final/Pre_Processed_Data_All_Locations_Updated_V6_Dry.csv"

# Export summer and winter data to CSV
Hydro_Wet_Season.to_csv(Hydro_Wet_Season_data_path, index=False)
Hydro_Dry_Season.to_csv(Hydro_Dry_Season_data_path, index=False)

print(f"Hydro_Wet_Season dataset exported to {Hydro_Wet_Season_data_path}.")
print(f"Hydro_Dry_Season dataset exported to {Hydro_Dry_Season_data_path}.")

Hydro_Wet_Season dataset exported to C:/Data_MSc_Thesis/Pre_Processed_Data_Final/Pre_Processed_Data_All_Locations_Updated_V6_Wet.csv.
Hydro_Dry_Season dataset exported to C:/Data_MSc_Thesis/Pre_Processed_Data_Final/Pre_Processed_Data_All_Locations_Updated_V6_Dry.csv.
