In [None]:
# DOWNLOAD DATA
import requests, re, os
from bs4 import BeautifulSoup
from urllib.parse import urljoin

archive_url = "https://noaadata.apps.nsidc.org/NOAA/G02135/north/daily/geotiff/"
name_pattern = re.compile(r".*extent_v4.0.tif$")
data_dir = "data"

def download_recursive(url, outdir, pattern):
    os.makedirs(outdir, exist_ok=True)
    r = requests.get(url)
    soup = BeautifulSoup(r.text, "html.parser")

    for link in soup.find_all("a")[1:]:
        href = link.get("href")
        full_url = urljoin(url, href)
        if href.endswith("/"):
            download_recursive(full_url, os.path.join(outdir, href.strip("/")), pattern)
        elif pattern.match(href):
            path = os.path.join(outdir, href)
            if not os.path.exists(path):
                print("Downloading", full_url)
                file_data = requests.get(full_url).content
                with open(path, "wb") as f:
                    f.write(file_data)
            else:
                print("Skipped", full_url)

download_recursive(
    url=archive_url,
    outdir=data_dir,
    pattern=name_pattern,
)

Skipped https://noaadata.apps.nsidc.org/NOAA/G02135/north/daily/geotiff/1978/10_Oct/N_19781026_extent_v4.0.tif
Skipped https://noaadata.apps.nsidc.org/NOAA/G02135/north/daily/geotiff/1978/10_Oct/N_19781028_extent_v4.0.tif
Skipped https://noaadata.apps.nsidc.org/NOAA/G02135/north/daily/geotiff/1978/10_Oct/N_19781030_extent_v4.0.tif
Skipped https://noaadata.apps.nsidc.org/NOAA/G02135/north/daily/geotiff/1978/11_Nov/N_19781101_extent_v4.0.tif
Skipped https://noaadata.apps.nsidc.org/NOAA/G02135/north/daily/geotiff/1978/11_Nov/N_19781103_extent_v4.0.tif
Skipped https://noaadata.apps.nsidc.org/NOAA/G02135/north/daily/geotiff/1978/11_Nov/N_19781105_extent_v4.0.tif
Skipped https://noaadata.apps.nsidc.org/NOAA/G02135/north/daily/geotiff/1978/11_Nov/N_19781107_extent_v4.0.tif
Skipped https://noaadata.apps.nsidc.org/NOAA/G02135/north/daily/geotiff/1978/11_Nov/N_19781109_extent_v4.0.tif
Skipped https://noaadata.apps.nsidc.org/NOAA/G02135/north/daily/geotiff/1978/11_Nov/N_19781111_extent_v4.0.tif
S

In [None]:
# LOAD DATA
import os
import pandas as pd

rows = []

for root, _, files in os.walk(data_dir):
    for file in files:
        if pattern.match(file):
            path = os.path.join(root, file)
            rows.append({
                "filename": file,
                "local_path": path,
                "size_bytes": os.path.getsize(path)
            })

df = pd.DataFrame(rows)
print(df.head())

In [None]:
# PROCESS DATA
from PIL import Image
import numpy as np

def count_white(path):
    img = Image.open(path).convert("L")
    arr = np.array(img)
    return np.sum(arr == 255)

df["white_pixels"] = df["local_path"].apply(count_white)
df["date"] = df["filename"].str.extract(r"_(\d{8})_")[0]
df["date"] = pd.to_datetime(df["date"], format="%Y%m%d")

print(df.head())

In [None]:
# PLOT DATA
import matplotlib.pyplot as plt

plt.figure(figsize=(8,5))
plt.plot(df["date"], df["white_pixels"], marker="o")
plt.xlabel("Date")
plt.ylabel("White Pixels")
plt.title("White Pixels vs Date")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()