# Pipeline:

In [None]:
import os
import pandas as pd
import numpy as np
import pyproj

# CHANGE THIS TO YOUR PATH
path_to_bedmap_data_folder = "/home/kim/data/bedmap_raw_data_test"

# paths to subfolders
path_to_bedmap1_data_folder = os.path.join(path_to_bedmap_data_folder, "bedmap1_raw_data")
path_to_bedmap2_data_folder = os.path.join(path_to_bedmap_data_folder, "bedmap2_raw_data")
path_to_bedmap3_data_folder = os.path.join(path_to_bedmap_data_folder, "bedmap3_raw_data")

# list all CSVs in the folder
list_of_bedmap1_csv_files = [f for f in os.listdir(path_to_bedmap1_data_folder) if f.endswith(".csv")]
list_of_bedmap2_csv_files = [f for f in os.listdir(path_to_bedmap2_data_folder) if f.endswith(".csv")]
list_of_bedmap3_csv_files = [f for f in os.listdir(path_to_bedmap3_data_folder) if f.endswith(".csv")]

# initialise DataFrame and column names
column_list = ["lon", "lat", "x", "y", "s", "t", "b", "b_inferred", "source"]
bedmap123_data = pd.DataFrame(columns = column_list)

# set up coordinate transformer once
lonlat_to_polarstereo = pyproj.Transformer.from_crs(
    crs_from = pyproj.CRS("epsg:4326"), # WGS84 (lon, lat)
    crs_to = pyproj.CRS("epsg:3031"), # Antarctic Polar Stereographic (x, y)
    always_xy = True
)

# lists 
paths_to_data_folders_all_versions = [path_to_bedmap1_data_folder, path_to_bedmap2_data_folder, path_to_bedmap3_data_folder]
list_of_all_versions = [list_of_bedmap1_csv_files, list_of_bedmap2_csv_files, list_of_bedmap3_csv_files]

# loop over bedmap versions
for v, (csv_list, folder_path) in enumerate(zip(list_of_all_versions, paths_to_data_folders_all_versions), start = 1):
    print(f"Processing Bedmap{v}...")
    print(f"Number of bedmap{v} csv files:", len(csv_list))

    # loop over csv files
    for i in csv_list:

        print("Processing:", i)
        # construct full file path
        file_path = os.path.join(folder_path, i)

        # Load CSV, skipping metadata header lines
        pd_data = pd.read_csv(file_path, skiprows = 18, low_memory = False)

        # Extract and rename required columns
        df = pd_data[[
            "longitude (degree_east)",
            "latitude (degree_north)",
            "surface_altitude (m)",
            "land_ice_thickness (m)",
            "bedrock_altitude (m)"
        ]].copy() # NOTE: Copy to avoid SettingWithCopyWarning

        # Rename columns to short names
        df.columns = ["lon", "lat", "s", "t", "b"]

        # Mark where bedrock elevation is inferred (as this is approximately true)
        df["b_inferred"] = False
        # Create a mask where bedrock elevation is missing (-9999) but surface and land ice thickness are provided
        infer_b_mask = (df['s'] != -9999) & (df['t'] != -9999) & (df['b'] == -9999)
        df.loc[infer_b_mask, 'b_inferred'] = True
        df.loc[infer_b_mask, 'b'] = df['s'] - df['t']

        # Drop rows still missing bed elevation
        # TODO: Change this if we are focussing on ice thickness and not bed elevation
        dropped_rows = (df['b'] == -9999).sum()
        print(f"#rows dropped: {dropped_rows}")
        df = df[df['b'] != -9999]

        # Project coordinates
        df["x"], df["y"] = lonlat_to_polarstereo.transform(df["lon"].values, df["lat"].values)

        # Add filename source
        df["source"] = i

        # Ensure column order and append
        df = df[column_list]
        bedmap123_data = pd.concat([bedmap123_data, df], ignore_index = True)

    # Final check of shape
    print("Combined dataset shape:", bedmap123_data.shape)

Number of bedmap1 csv files: 1
Number of bedmap2 csv files: 66
Number of bedmap3 csv files: 84
Processing Bedmap1...
Number of bedmap1 csv files: 1
Processing Bedmap2...
Number of bedmap2 csv files: 66
Processing Bedmap3...
Number of bedmap3 csv files: 84


In [None]:
import os
import pandas as pd
import numpy as np
import pyproj

# CHANGE THIS TO YOUR PATH
path_to_bedmap_data_folder = "/home/kim/data/bedmap_raw_data_test"

# paths to subfolders
path_to_bedmap1_data_folder = os.path.join(path_to_bedmap_data_folder, "bedmap1_raw_data")
path_to_bedmap2_data_folder = os.path.join(path_to_bedmap_data_folder, "bedmap2_raw_data")
path_to_bedmap3_data_folder = os.path.join(path_to_bedmap_data_folder, "bedmap3_raw_data")

# list all CSVs in the folder
list_of_bedmap1_csv_files = [f for f in os.listdir(path_to_bedmap1_data_folder) if f.endswith(".csv")]
list_of_bedmap2_csv_files = [f for f in os.listdir(path_to_bedmap2_data_folder) if f.endswith(".csv")]
list_of_bedmap3_csv_files = [f for f in os.listdir(path_to_bedmap3_data_folder) if f.endswith(".csv")]

print("Number of bedmap1 csv files:", len(list_of_bedmap1_csv_files))
print("Number of bedmap2 csv files:", len(list_of_bedmap2_csv_files))
print("Number of bedmap3 csv files:", len(list_of_bedmap3_csv_files))

# initialise DataFrame and column names
column_list = ["lon", "lat", "x", "y", "s", "t", "b", "b_inferred", "source"]
all_data = pd.DataFrame(columns = column_list)

# set up coordinate transformer once
lonlat_to_polarstereo = pyproj.Transformer.from_crs(
    crs_from = pyproj.CRS("epsg:4326"), # WGS84 (lon, lat)
    crs_to = pyproj.CRS("epsg:3031"), # Antarctic Polar Stereographic (x, y)
    always_xy = True
)

paths_to_data_folders_all_versions = [path_to_bedmap1_data_folder, path_to_bedmap2_data_folder, path_to_bedmap3_data_folder]
list_of_all_versions = [list_of_bedmap1_csv_files, list_of_bedmap2_csv_files, list_of_bedmap3_csv_files]

for v, (csv_list, folder_path) in enumerate(zip(list_of_all_versions, paths_to_data_folders_all_versions), start = 1):
    print(f"Processing Bedmap{v}...")
    print(f"Number of bedmap{v} csv files:", len(csv_list))



for i in list_of_csvs_bedmap1:
    print("Processing:", i)
    file_path = os.path.join(path_to_bedmap1_data_folder, i)

    # Load CSV, skipping metadata header lines
    pd_data = pd.read_csv(file_path, skiprows = 18, low_memory = False)

    # Extract and rename required columns
    df = pd_data[[
        "longitude (degree_east)",
        "latitude (degree_north)",
        "surface_altitude (m)",
        "land_ice_thickness (m)",
        "bedrock_altitude (m)"
    ]].copy() # NOTE: Copy to avoid SettingWithCopyWarning

    # Rename columns to short names
    df.columns = ["lon", "lat", "s", "t", "b"]

    # Mark where bedrock elevation is inferred (as this is approximately true)
    df["b_inferred"] = False
    # Create a mask where bedrock elevation is missing (-9999) but surface and land ice thickness are provided
    infer_b_mask = (df['s'] != -9999) & (df['t'] != -9999) & (df['b'] == -9999)
    df.loc[infer_b_mask, 'b_inferred'] = True
    df.loc[infer_b_mask, 'b'] = df['s'] - df['t']

    # Drop rows still missing bed elevation
    # TODO: Change this if we are focussing on ice thickness and not bed elevation
    dropped_rows = (df['b'] == -9999).sum()
    print(f"#rows dropped: {dropped_rows}")
    df = df[df['b'] != -9999]

    # Project coordinates
    df["x"], df["y"] = lonlat_to_polarstereo.transform(df["lon"].values, df["lat"].values)

    # Add filename source
    df["source"] = i

    # Ensure column order and append
    df = df[column_list]
    all_data = pd.concat([all_data, df], ignore_index=True)

# Final check
print("Combined dataset shape:", all_data.shape)
# print(all_data.head())

Number of bedmap1 csvs: 1
Processing: BEDMAP1_1966-2000_AIR_BM1.csv
#rows dropped: 945249
Combined dataset shape: (959801, 9)
