In [29]:
import geopandas as gpd
import pandas as pd
import os
import fiona
from matplotlib_scalebar.scalebar import ScaleBar
import matplotlib.pyplot as plt

# Define the paths
input_file_path = "./antarctic_index.gpkg"
map_path = "/Users/nathanbekele/Downloads/Quantarctica3/Miscellaneous/SimpleBasemap/ADD_DerivedLowresBasemap.shp"
output_folder = "./data3"
statistics_folder = "./data_statistics"

# Ensure the output folders exist
os.makedirs(output_folder, exist_ok=True)
os.makedirs(statistics_folder, exist_ok=True)

fp = gpd.read_file(input_file_path)
print(fp.head())
print(fp.tail())

print(fp['geometry'].tolist())
print("CRS of the GeoPackage:", g.crs)

            name   uri institution     region granule segment   campaign  \
0  AWI_1994_DML1  None         AWI  antarctic    None    None  1994_DML1   

  availability                                           geometry  
0            u  MULTIPOINT (-285560.755 1557735.604, -285182.1...  
            name   uri institution     region granule segment   campaign  \
0  AWI_1994_DML1  None         AWI  antarctic    None    None  1994_DML1   

  availability                                           geometry  
0            u  MULTIPOINT (-285560.755 1557735.604, -285182.1...  
[<MULTIPOINT (-285560.755 1557735.604, -285182.186 1557356.256, -284979.372 1...>]
CRS of the GeoPackage: EPSG:3031


In [41]:
import geopandas as gpd
import pandas as pd
import os
from shapely.geometry import MultiPoint, LineString, Point
from shapely.ops import transform
from functools import partial
import pyproj

# Define the paths
input_file_path = "./antarctic_index.gpkg"
map_path = "/Users/nathanbekele/Downloads/Quantarctica3/Miscellaneous/SimpleBasemap/ADD_DerivedLowresBasemap.shp"
output_folder = "./data3"
statistics_folder = "./data_statistics"

# Ensure the output folders exist
os.makedirs(output_folder, exist_ok=True)
os.makedirs(statistics_folder, exist_ok=True)

# Function to transform geometry to a specific CRS
def transform_to_meters(geometry, src_crs):
    project = partial(
        pyproj.transform,
        pyproj.Proj(init=src_crs),
        pyproj.Proj(init='epsg:3031')
    )
    return transform(project, geometry)

# Function to calculate distance from geometries
def calculate_distance(geometry):
    if isinstance(geometry, MultiPoint) or isinstance(geometry, LineString):
        points = list(geometry.geoms) if isinstance(geometry, MultiPoint) else list(geometry.coords)
        line = LineString(points)
        return line.length
    elif isinstance(geometry, Point):
        return 0  # A single point has no length
    else:
        return 0  # Handle other types accordingly

# List all layers in the GeoPackage
layers = fiona.listlayers(input_file_path)
all_statistics = []

# Define the last relevant layer
last_relevant_layer = 'RNRF_2019_RAE'

# Process each layer
for layer in layers:
    if layer == last_relevant_layer:
        print(f"Reached the last relevant layer: {layer}. Stopping further processing.")
        break

    print(f"Processing layer: {layer}")
    gdf = gpd.read_file(input_file_path, layer=layer)

    # Transform geometries to the CRS in meters if needed
    if gdf.crs != 'EPSG:3031':
        gdf['geometry'] = gdf['geometry'].apply(transform_to_meters, src_crs=gdf.crs)

    # Calculate distances for each row
    gdf['distance'] = gdf['geometry'].apply(calculate_distance)

    # If necessary columns are missing, add them with default values
    if 'line_km_with_radargrams' not in gdf.columns:
        gdf['line_km_with_radargrams'] = 0

    if 'campaign' not in gdf.columns:
        gdf['campaign'] = 'Unknown'

    # Group by institution and campaign to calculate statistics
    grouped = gdf.groupby(['institution', 'campaign']).agg({
        'distance': 'sum',
        'line_km_with_radargrams': 'sum'
    }).reset_index()

    # Append the statistics to the all_statistics list
    all_statistics.append(grouped)

# Combine all statistics into a single DataFrame
all_statistics_df = pd.concat(all_statistics, ignore_index=True)

# Save statistics to CSV
statistics_output_path = os.path.join(statistics_folder, 'institution_statistics.csv')
all_statistics_df.to_csv(statistics_output_path, index=False)
print(f"Saved institution statistics to {statistics_output_path}")

# Display the statistics
for institution, stats in all_statistics_df.groupby('institution'):
    print(f"\nStatistics for institution: {institution}")
    print(stats)

# Display the complete statistics table
print("\nComplete statistics table:")
print(all_statistics_df)


Processing layer: AWI_1994_DML1
Processing layer: AWI_1995_DML2
Processing layer: AWI_1996_DML3
Processing layer: AWI_1997_DML4
Processing layer: AWI_1998_DML5
Processing layer: AWI_2000_DML6
Processing layer: AWI_2001_DML7
Processing layer: AWI_2002_DML8
Processing layer: AWI_2003_DML9
Processing layer: AWI_2004_DML10
Processing layer: AWI_2005_ANTSYSO
Processing layer: AWI_2007_ANTR
Processing layer: AWI_2013_GEA-IV
Processing layer: AWI_2014_Recovery-Glacier
Processing layer: AWI_2015_GEA-DML
Processing layer: AWI_2016_OIR
Processing layer: AWI_2018_ANIRES
Processing layer: AWI_2018_DML-Coast
Processing layer: AWI_2018_JURAS
Processing layer: AWI_2019_JURAS
Processing layer: BAS_1994_Evans
Processing layer: BAS_1998_Dufek
Processing layer: BAS_2001_Bailey-Slessor
Processing layer: BAS_2001_MAMOG
Processing layer: BAS_2002_TORUS
Processing layer: BAS_2007_Lake-Ellsworth
Processing layer: BAS_2007_Rutford
Processing layer: BAS_2007_TIGRIS
Processing layer: BAS_2008_Lake-Ellsworth
Proc