In [None]:
# Department: ESTSOFT
# Class: AI Modelling
# Category: Machine learning
# Title: Ice Extent Prediction
# Contributors: Kimm Soo Min
# Last modified date: 09/05/25

In [None]:
# Library
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from PIL import Image, ImageOps
import re
from datetime import datetime
import glob
import os

In [None]:
# File path
DIR_PATH = "/content/drive/MyDrive/Class/ESTSOFT/project/time_series_analysis/data/southern_hemisphere"
pattern = "image_orig/S_*_extn_hires_v3.0.png"
input_path = os.path.join(DIR_PATH, pattern)
input_list = sorted(glob.glob(input_path))

In [None]:
# Image processing
for input in input_list:
	# Input
	img = Image.open(input)
	input_name = os.path.basename(input)
	date_match = re.search(r'(\d{6})', input_name)
	date = datetime.strptime(date_match.group(1), "%Y%m") if date_match else None

	# Crop
	crop_box = (100, 305, 1140, 1405)
	img_cropped = img.crop((crop_box))

	# Pad to square
	width, height = img_cropped.size
	max_side = max(width, height)
	pad_left = (max_side - width) // 2
	pad_top = (max_side - height) // 2
	pad_right = max_side - width - pad_left
	pad_bottom = max_side - height - pad_top
	pad_colour = img_cropped.getpixel((50, 50))
	img_padded = ImageOps.expand(img_cropped, border=(pad_left, pad_top, pad_right, pad_bottom), fill=pad_colour)

	# Resize
	img_resized = img_padded.resize((1024, 1024), resample=Image.LANCZOS)

	# Save
	os.makedirs("image_resized", exist_ok=True)
	output_name = "image_resized/img_resized_" + date.strftime("%Y%m") + ".png"
	output_path = os.path.join(DIR_PATH, output_name)
	if os.path.exists(output_path): # Skip if already exists
		print(f"Skipping {output_name}, already exists.")
		continue
	img_resized.save(output_path)

In [None]:
# File path
pattern = "image_resized/img_resized_*.png"
input_path = os.path.join(DIR_PATH, pattern)
input_list = sorted(glob.glob(input_path))

In [None]:
# Convert to greyscale
for input in input_list:
    # Input
	img = Image.open(input)
	input_name = os.path.basename(input)
	date_match = re.search(r'(\d{6})', input_name)
	date = datetime.strptime(date_match.group(1), "%Y%m") if date_match else None

    # Convert
	img_grey = img.convert('L')
	img_grey = img_grey.resize((256, 256), resample=Image.LANCZOS)

    # Save
	os.makedirs("image_grey/256x256", exist_ok=True)
	output_name = "image_grey/256x256/img_grey_" + date.strftime("%Y%m") + ".png"
	output_path = os.path.join(DIR_PATH, output_name)
	if os.path.exists(output_path): # Skip if already exists
		print(f"Skipping {output_name}, already exists.")
		continue
	img_grey.save(output_path)

In [None]:
# File path
pattern = "ice_extent/S_??_extent_v3.0.csv"
input_path = os.path.join(DIR_PATH, pattern)
input_list = sorted(glob.glob(output_path))

In [None]:
# Read files
dfs = []
for input in input_list:
    df = pd.read_csv(input, skipinitialspace=True)
    dfs.append(df)
df_merged = pd.concat(dfs, ignore_index=True)

# Create date column
df_merged['date'] = pd.to_datetime(df_merged['year'].astype(str) + df_merged['mo'].astype(str), format='%Y%m')
df_merged.sort_values('date', inplace=True)
df_merged.drop(columns=['year', 'mo'], inplace=True)

# Save
output_path = os.path.join(DIR_PATH, "ice_extent/S_extent_merged.csv")
df_merged.to_csv(output_path, index=False)