In [1]:
# standard stack
import pandas as pd
import numpy as np

# for projection
import pyproj
import cartopy.crs as ccrs  # Projections list

# vis
import matplotlib.pyplot as plt
%matplotlib inline

import os

# Bedmap1

- dropping 1M of 2M data (could use REMA surface data to keep more)

In [23]:
path_to_bedmap1_csvs = "/home/kim/data/bedmap/bedmap1-csv"
# list of names of files
list_of_csvs_bm1 = os.listdir(path_to_bedmap1_csvs)
print("Number of bedmap1 csv's:", len(list_of_csvs_bm1))

Number of bedmap1 csv's: 1


In [43]:
# Initialise df
column_list = ["lon", "lat", "x", "y", "s", "t", "b", "b_inferred", "source"]
all_data = pd.DataFrame(columns = column_list)

for i in list_of_csvs_bm1:
    ### LOAD ###
    print(i)
    # concat file path
    file_path = path_to_bedmap1_csvs + str("/") + i
    # load in file
    pd_data = pd.read_csv(file_path, skiprows = 18, low_memory = False)

    ### PREPROCESS ###
    # subset columns
    df = pd_data[["longitude (degree_east)", "latitude (degree_north)", "surface_altitude (m)", "land_ice_thickness (m)", "bedrock_altitude (m)"]]
    # rename columns
    df.columns = ["lon", "lat", "s", "t", "b"]
    # set default to false
    df["b_inferred"] = False

    # If s & t are given, and b is empty, fill b
    # Indicat that b will be inferred
    df.loc[(df['s'] != -9999) & (df['t'] != -9999) & (df['b'] == -9999), 'b_inferred'] = True
    # Needs to be & not and
    df.loc[(df['s'] != -9999) & (df['t'] != -9999) & (df['b'] == -9999), 'b'] = df['s'] - df['t']

    # Drop rows with missing bed elevation value
    print("#rows dropped:", np.where(df.b == -9999)[0].shape[0])
    df = df.drop(np.where(df.b == -9999)[0])

    # optional: remove s & t columns

    # Project
    lonlat_to_polarstereo = pyproj.Transformer.from_crs(crs_from = pyproj.CRS("epsg:4326"), crs_to = pyproj.CRS("epsg:3031"), always_xy = True) # lon, lat
    df["x"], df["y"] = lonlat_to_polarstereo.transform(df["lon"], df["lat"])

    # Add file name to source column for tracibility
    df["source"] = i

    # Reorder columns
    df = df[column_list]

    all_data = pd.concat([all_data, df])

BEDMAP1_1966-2000_AIR_BM1.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 945249


In [44]:
all_data.shape
all_data["b_inferred"].value_counts()

False    617469
True     342332
Name: b_inferred, dtype: int64

# Bedmap2

In [45]:
path_to_bedmap2_csvs = "/home/kim/data/bedmap/bedmap2-csv"
# list of names of files
list_of_csvs_bm2 = os.listdir(path_to_bedmap2_csvs)
print("Number of bedmap1 csv's:", len(list_of_csvs_bm2))

Number of bedmap1 csv's: 66


In [46]:
for i in list_of_csvs_bm2:
    ### LOAD ###
    print(i)
    # concat file path
    file_path = path_to_bedmap2_csvs + str("/") + i
    # load in file
    pd_data = pd.read_csv(file_path, skiprows = 18, low_memory = False)

    ### PREPROCESS ###
    # subset columns
    df = pd_data[["longitude (degree_east)", "latitude (degree_north)", "surface_altitude (m)", "land_ice_thickness (m)", "bedrock_altitude (m)"]]
    # rename columns
    df.columns = ["lon", "lat", "s", "t", "b"]
    # set default to false
    df["b_inferred"] = False

    # If s & t are given, and b is empty, fill b
    # Indicat that b will be inferred
    df.loc[(df['s'] != -9999) & (df['t'] != -9999) & (df['b'] == -9999), 'b_inferred'] = True
    # Needs to be & not and
    df.loc[(df['s'] != -9999) & (df['t'] != -9999) & (df['b'] == -9999), 'b'] = df['s'] - df['t']

    # Drop rows with missing bed elevation value
    print("#rows dropped:", np.where(df.b == -9999)[0].shape[0])
    df = df.drop(np.where(df.b == -9999)[0])

    # optional: remove s & t columns

    # Project
    lonlat_to_polarstereo = pyproj.Transformer.from_crs(crs_from = pyproj.CRS("epsg:4326"), crs_to = pyproj.CRS("epsg:3031"), always_xy = True) # lon, lat
    df["x"], df["y"] = lonlat_to_polarstereo.transform(df["lon"], df["lat"])

    # Add file name to source column for tracibility
    df["source"] = i

    # Reorder columns
    df = df[column_list]

    all_data = pd.concat([all_data, df])

STOLAF_2002_ITASE-Hercules-Dome_GRN_BM2.csv
#rows dropped: 9252
BGR_2002_PCMEGA_AIR_BM2.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 501919


  all_data = pd.concat([all_data, df])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


UTIG_2000_Robb-Glacier_AIR_BM2.csv
#rows dropped: 13457


  all_data = pd.concat([all_data, df])


NASA_2012_ICEBRIDGE_AIR_BM2.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 80743


  all_data = pd.concat([all_data, df])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


RNRF_2008_Vostok-Subglacial-Lake_AIR_BM2.csv
#rows dropped: 0


  all_data = pd.concat([all_data, df])


UTIG_1999_SOAR-LVS-WLK_AIR_BM2.csv
#rows dropped: 0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


BAS_2010_PIG_AIR_BM2.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 0


  all_data = pd.concat([all_data, df])


UTIG_1998_West-Marie-Byrd-Land_AIR_BM2.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 666215


  all_data = pd.concat([all_data, df])


RNRF_2007_52RAEap5_AIR_BM2.csv
#rows dropped: 95874


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


RNRF_2006_51RAEap5_AIR_BM2.csv
#rows dropped: 112241


  all_data = pd.concat([all_data, df])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


NPI_2008_BELISSIMA_GRN_BM2.csv
#rows dropped: 4271


  all_data = pd.concat([all_data, df])


NASA_2004_ICEBRIDGE_AIR_BM2.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 0


  all_data = pd.concat([all_data, df])


AWI_1998_DML5_AIR_BM2.csv
#rows dropped: 51454


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


BAS_2007_AGAP_AIR_BM2.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 469536


  all_data = pd.concat([all_data, df])


NIPR_1999_JARE40_GRN_BM2.csv
#rows dropped: 285


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


BAS_2001_TORUS_AIR_BM2.csv
#rows dropped: 4487


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


BAS_2004_BBAS_AIR_BM2.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 533872


  all_data = pd.concat([all_data, df])


UTIG_2004_AGASEA_AIR_BM2.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 0


  all_data = pd.concat([all_data, df])


INGV_1997_ITASE_AIR_BM2.csv
#rows dropped: 116280


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


UTIG_2008_ICECAP_AIR_BM2.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 0


  all_data = pd.concat([all_data, df])


RNRF_2005_50RAEap5_AIR_BM2.csv
#rows dropped: 70450


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


AWI_1994_DML1_AIR_BM2.csv
#rows dropped: 25257


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


STOLAF_1994_Siple-Dome_GRN_BM2.csv
#rows dropped: 0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


NASA_2011_ICEBRIDGE_AIR_BM2.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 188694


  all_data = pd.concat([all_data, df])


PRIC_2004_CHINARE-21_GRN_BM2.csv
#rows dropped: 0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


LDEO_2007_AGAP-GAMBIT_AIR_BM2.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 2351633


  all_data = pd.concat([all_data, df])


RNRF_2008_53RAEap5_AIR_BM2.csv
#rows dropped: 114178


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


AWI_1996_DML3_AIR_BM2.csv
#rows dropped: 7486


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


BAS_2001_MAMOG_AIR_BM2.csv
#rows dropped: 1965


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


BAS_2010_IMAFI_AIR_BM2.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 170248


  all_data = pd.concat([all_data, df])


AWI_1995_DML2_AIR_BM2.csv
#rows dropped: 30600


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


UTIG_1991_CASERTZ_AIR_BM2.csv
#rows dropped: 32812


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


RNRF_2004_49RAEap5_AIR_BM2.csv
#rows dropped: 125228


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


AWI_2007_ANTR_AIR_BM2.csv
#rows dropped: 80153


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


NIPR_2007_JASE_GRN_BM2.csv
#rows dropped: 21327


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


STOLAF_2001_ITASE-Ellsworth_GRN_BM2.csv
#rows dropped: 9848


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


AWI_2005_ANTSYSO_AIR_BM2.csv
#rows dropped: 47242


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


LDEO_2007_Recovery-Lakes_AIR_BM2.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 0


  all_data = pd.concat([all_data, df])


RNRF_2007_Mirny-Vostok_AIR_BM2.csv
#rows dropped: 108898


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


STOLAF_2001_ITASE-Byrd-Ellsworth_GRN_BM2.csv
#rows dropped: 51019


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


PRIC_2007_CHINARE-24_GRN_BM2.csv
#rows dropped: 5134


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


NPI_2010_SRM_AIR_BM2.csv
#rows dropped: 43039


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


RNRF_2003_48RAEap5_AIR_BM2.csv
#rows dropped: 12568


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


AWI_2004_DML10_AIR_BM2.csv
#rows dropped: 49792


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


NASA_2002_ICEBRIDGE_AIR_BM2.csv
#rows dropped: 0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


BAS_2009_FERRIGNO_GRN_BM2.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 0


  all_data = pd.concat([all_data, df])


AWI_2001_DML7_AIR_BM2.csv
#rows dropped: 165104


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


BGR_1999_GANOVEX-VIII-Mertz_AIR_BM2.csv
#rows dropped: 0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


AWI_1997_DML4_AIR_BM2.csv
#rows dropped: 71905


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


RNRF_2013_Vostok-Progress_AIR_BM2.csv
#rows dropped: 138584


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


AWI_2000_DML6_AIR_BM2.csv
#rows dropped: 38221


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


NASA_2009_ICEBRIDGE_AIR_BM2.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 638928


  all_data = pd.concat([all_data, df])


BAS_2001_Bailey-Slessor_AIR_BM2.csv
#rows dropped: 79022


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


RNRF_2006_KV1-area_AIR_BM2.csv
#rows dropped: 47606


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


STOLAF_2002_ITASE-Byrd-South-Pole_GRN_BM2.csv
#rows dropped: 70932


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


NASA_2010_ICEBRIDGE_AIR_BM2.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 0


  all_data = pd.concat([all_data, df])


AWI_2002_DML8_AIR_BM2.csv
#rows dropped: 100005


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


NIPR_2007_JARE49_GRN_BM2.csv
#rows dropped: 6745


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


BGR_1999_GANOVEX-VIII-Matusevich_AIR_BM2.csv
#rows dropped: 0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


BAS_1994_Evans_AIR_BM2.csv
#rows dropped: 0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


BAS_1998_Dufek_AIR_BM2.csv
#rows dropped: 0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


BAS_2006_GRADES-IMAGE_AIR_BM2.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 61054


  all_data = pd.concat([all_data, df])


BAS_2007_TIGRIS_GRN_BM2.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 0


  all_data = pd.concat([all_data, df])


BAS_2005_WISE-ISODYN_AIR_BM2.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 369758


  all_data = pd.concat([all_data, df])


UCANTERBURY_2008_Darwin-Hatherton_GRN_BM2.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 132161


  all_data = pd.concat([all_data, df])


AWI_2003_DML9_AIR_BM2.csv
#rows dropped: 187728


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


In [47]:
all_data.shape
all_data["b_inferred"].value_counts()

False    23271144
True       342350
Name: b_inferred, dtype: int64

### runtime note

2 min

# Bedmap3

In [48]:
path_to_bedmap3_csvs = "/home/kim/data/bedmap/bedmap3-csv"
# list of names of files
list_of_csvs_bm3 = os.listdir(path_to_bedmap3_csvs)
print("Number of bedmap3 csv's:", len(list_of_csvs_bm3))

# Size of file
# sum(os.path.getsize(f) for f in os.listdir(".") if os.path.isfile(f))

Number of bedmap3 csv's: 84


In [49]:
for i in list_of_csvs_bm3:
    ### LOAD ###
    print(i)
    # concat file path
    file_path = path_to_bedmap3_csvs + str("/") + i
    # load in file
    pd_data = pd.read_csv(file_path, skiprows = 18, low_memory = False)

    ### PREPROCESS ###
    # subset columns
    df = pd_data[["longitude (degree_east)", "latitude (degree_north)", "surface_altitude (m)", "land_ice_thickness (m)", "bedrock_altitude (m)"]]
    # rename columns
    df.columns = ["lon", "lat", "s", "t", "b"]
    # set default to false for this dataset
    df["b_inferred"] = False

    # If s & t are given, and b is empty, fill b
    # Indicat that b will be inferred
    df.loc[(df['s'] != -9999) & (df['t'] != -9999) & (df['b'] == -9999), 'b_inferred'] = True
    # Needs to be & not and
    df.loc[(df['s'] != -9999) & (df['t'] != -9999) & (df['b'] == -9999), 'b'] = df['s'] - df['t']

    # Drop rows with missing bed elevation value
    print("#rows dropped:", np.where(df.b == -9999)[0].shape[0])
    df = df.drop(np.where(df.b == -9999)[0])

    # optional: remove s & t columns

    # Project
    lonlat_to_polarstereo = pyproj.Transformer.from_crs(crs_from = pyproj.CRS("epsg:4326"), crs_to = pyproj.CRS("epsg:3031"), always_xy = True) # lon, lat
    df["x"], df["y"] = lonlat_to_polarstereo.transform(df["lon"], df["lat"])

    # Add file name to source column for tracibility
    df["source"] = i

    # Reorder columns
    df = df[column_list]

    all_data = pd.concat([all_data, df])

NPI_2012_ICERISES_GRN_BM3.csv
#rows dropped: 0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


  all_data = pd.concat([all_data, df])


RNRF_1971_Lambert-Amery_SEI_BM3.csv
#rows dropped: 0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


UTIG_2013_GIMBLE_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 31043


  all_data = pd.concat([all_data, df])


BAS_2011_Adelaide_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 362816


  all_data = pd.concat([all_data, df])


KOPRI_2018_KRT2_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 102761


  all_data = pd.concat([all_data, df])


AWI_2018_JURAS_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 189971


  all_data = pd.concat([all_data, df])


NIPR_2007_JARE49_GRN_BM3.csv
#rows dropped: 16037


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


BAS_2019_Thwaites_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 23445


  all_data = pd.concat([all_data, df])


RNRF_2010_RAE_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 284963


  all_data = pd.concat([all_data, df])


RNRF_2004_AMSap5_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 203887


  all_data = pd.concat([all_data, df])


LDEO_2015_ROSETTA_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 22


  all_data = pd.concat([all_data, df])


BAS_2012_Castle_GRN_BM3.csv
#rows dropped: 0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


RNRF_2015_RAE_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 128672


  all_data = pd.concat([all_data, df])


BAS_2007_Lake-Ellsworth_GRN_BM3.csv
#rows dropped: 0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


UTIG_2016_OLDICE_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 156180


  all_data = pd.concat([all_data, df])


UTIG_2009_Darwin-Hatherton_AIR_BM3.csv
#rows dropped: 0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


AWI_2013_GEA-IV_AIR_BM3.csv
#rows dropped: 124251


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


KOPRI_2017_KRT1_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 124086


  all_data = pd.concat([all_data, df])


NPI_2015_POLARGAP_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 57387


  all_data = pd.concat([all_data, df])


NIPR_1992_JARE33_GRN_BM3.csv
#rows dropped: 1082


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


NASA_2016_ICEBRIDGE_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 460846


  all_data = pd.concat([all_data, df])


RNRF_2019_RAE_AIR_BM3.csv
#rows dropped: 109642


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


BAS_2015_FISS_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 143194


  all_data = pd.concat([all_data, df])


PRIC_2017_CHA3_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 312352


  all_data = pd.concat([all_data, df])


NIPR_2012_JARE54_GRN_BM3.csv
#rows dropped: 1179


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


RNRF_2003_AMSap5_AIR_BM3.csv
#rows dropped: 20609


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


CRESIS_2013_Siple-Coast_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 48486


  all_data = pd.concat([all_data, df])


INGV_1997_Talos-Dome_AIR_BM3.csv
#rows dropped: 48194


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


NIPR_1996_JARE37_GRN_BM3.csv
#rows dropped: 6480


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


UTIG_2015_EAGLE_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 607482


  all_data = pd.concat([all_data, df])


NASA_2013_ICEBRIDGE_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 5766


  all_data = pd.concat([all_data, df])


AWI_2018_DML-Coast_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 6753


  all_data = pd.concat([all_data, df])


PRIC_2016_CHA2_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 153161


  all_data = pd.concat([all_data, df])


CRESIS_2009_AntarcticaTO_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 14356


  all_data = pd.concat([all_data, df])


BAS_2013_ISTAR_GRN_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 0


  all_data = pd.concat([all_data, df])


NIPR_1999_JARE40_GRN_BM3.csv
#rows dropped: 285


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


PRIC_2015_CHA1_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 744428


  all_data = pd.concat([all_data, df])


RNRF_2011_RAE_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 254999


  all_data = pd.concat([all_data, df])


BAS_2007_Rutford_GRN_BM3.csv
#rows dropped: 159027


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


INGV_2003_Talos-Dome_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 2659478


  all_data = pd.concat([all_data, df])


INGV_2001_Talos-Dome_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 481886


  all_data = pd.concat([all_data, df])


NASA_2018_ICEBRIDGE_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 221687


  all_data = pd.concat([all_data, df])


PRIC_2018_CHA4_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 74132


  all_data = pd.concat([all_data, df])


RNRF_2006_RAEap5_AIR_BM3.csv
#rows dropped: 114789


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


RNRF_2014_RAE_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 133707


  all_data = pd.concat([all_data, df])


RNRF_2018_RAE_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 120332


  all_data = pd.concat([all_data, df])


RNRF_2006_Komsom-Vostok_AIR_BM3.csv
#rows dropped: 50335


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


STANFORD_1971_SPRI-NSF-TUD_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 0


  all_data = pd.concat([all_data, df])


NASA_2019_ICEBRIDGE_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 52887


  all_data = pd.concat([all_data, df])


BAS_2010_IMAFI_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 0


  all_data = pd.concat([all_data, df])


NASA_2017_ICEBRIDGE_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 287513


  all_data = pd.concat([all_data, df])


NIPR_2007_JASE_GRN_BM3.csv
#rows dropped: 39944


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


ULB_2012_ICECON_GRN_BM3.csv
#rows dropped: 28


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


NASA_2014_ICEBRIDGE_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 378054


  all_data = pd.concat([all_data, df])


BAS_2008_Lake-Ellsworth_GRN_BM3.csv
#rows dropped: 0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


RNRF_2008_AMSap5_AIR_BM3.csv
#rows dropped: 124204


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


RNRF_2017_RAE_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 141819


  all_data = pd.concat([all_data, df])


UWASHINGTON_2018_South-Pole-Lake_GRN_BM3.csv
#rows dropped: 0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


RNRF_2007_AMSap5_AIR_BM3.csv
#rows dropped: 113105


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


RNRF_2013_RAE_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 1648138


  all_data = pd.concat([all_data, df])


NIPR_2017_JARE59_GRN_BM3.csv
#rows dropped: 39677


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


NPI_2016_MADICE_GRN_BM3.csv
#rows dropped: 0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


RNRF_1975_Filchner-Ronne_SEI_BM3.csv
#rows dropped: 94


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


AWI_2015_GEA-DML_AIR_BM3.csv
#rows dropped: 23382


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


NIPR_2018_JARE60_GRN_BM3.csv
#rows dropped: 57904


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


BAS_2016_FISS_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 27997


  all_data = pd.concat([all_data, df])


CECS_2006_Subglacial-Lake-CECs_GRN_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 58900


  all_data = pd.concat([all_data, df])


CRESIS_2009_Thwaites_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 0


  all_data = pd.concat([all_data, df])


BAS_2012_ICEGRAV_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 212993


  all_data = pd.concat([all_data, df])


RNRF_2009_RAEap5_AIR_BM3.csv
#rows dropped: 18829


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


BAS_2018_Thwaites_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 0


  all_data = pd.concat([all_data, df])


BAS_2017_English-Coast_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 2398


  all_data = pd.concat([all_data, df])


RNRF_2016_RAE_AIR_BM3.csv
#rows dropped: 133473


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


BAS_2015_POLARGAP_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 41700


  all_data = pd.concat([all_data, df])


RNRF_2004_Mirny-Vostok_AIR_BM3.csv
#rows dropped: 113574


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


AWI_2016_OIR_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 563667


  all_data = pd.concat([all_data, df])


AWI_2019_JURAS_AIR_BM3.csv
#rows dropped: 0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


RNRF_2005_AMSap5_AIR_BM3.csv
#rows dropped: 87185


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


INGV_1999_Talos-Dome_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 1470980


  all_data = pd.concat([all_data, df])


RNRF_1975_Lazarev_SEI_BM3.csv
#rows dropped: 49


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


AWI_2014_Recovery-Glacier_AIR_BM3.csv
#rows dropped: 104080


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


ULB_2012_BEWISE_GRN_BM3.csv
#rows dropped: 0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False
  all_data = pd.concat([all_data, df])


AWI_2018_ANIRES_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 0


  all_data = pd.concat([all_data, df])


UTIG_2010_ICECAP_AIR_BM3.csv


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["b_inferred"] = False


#rows dropped: 4388774


  all_data = pd.concat([all_data, df])


# Runtime note:

- 84 files
- 44 M/53 M data points
- 3 min to run (now > 5)

# Cumulative:
- 67 M

# Checks

In [53]:
# write to csv
# It took 9 min to export!
# all_data.to_csv("/home/kim/data/bedmap/bedmap123.csv")

In [57]:
file_stats = os.stat("/home/kim/data/bedmap/bedmap123.csv")
file_stats.st_size / (1024 * 1024 * 1024) # 8 GB

8.319295328110456

In [51]:
all_data["b_inferred"].value_counts()
# 3M/65M inferred

False    64739723
True      3000399
Name: b_inferred, dtype: int64

In [58]:
print("Shape: ", all_data.shape)

all_data.head(5)

Shape:  (67740122, 9)


Unnamed: 0,lon,lat,x,y,s,t,b,b_inferred,source
246,-162.2167,-78.175,-393747.895145,-1227610.0,43.0,257.0,-607.0,False,BEDMAP1_1966-2000_AIR_BM1.csv
247,-163.0,-78.78833,-357254.891928,-1168528.0,197.0,415.0,-218.0,True,BEDMAP1_1966-2000_AIR_BM1.csv
248,-164.8333,-78.91666,-316007.544874,-1165778.0,48.0,369.0,-425.0,False,BEDMAP1_1966-2000_AIR_BM1.csv
249,-168.65,-79.15334,-232601.272638,-1158792.0,59.0,384.0,-539.0,False,BEDMAP1_1966-2000_AIR_BM1.csv
250,-171.5167,-79.11667,-174949.0027,-1172950.0,49.0,328.0,-695.0,False,BEDMAP1_1966-2000_AIR_BM1.csv


In [59]:
print("Minimum x values: ", int(np.min(all_data['x']))) # - 2.5 M
print("Minimum y values: ", int(np.min(all_data['y']))) # - 2.2 M
print()
print("Maximum x values: ", int(np.max(all_data['x']))) # 2.6 M
print("Maximum y values: ", int(np.max(all_data['y']))) # 2.2 M
# very plausible

Minimum x values:  -3397514
Minimum y values:  -2214258

Maximum x values:  2681772
Maximum y values:  2199674


In [60]:
# Check missing data

# None for b because we removed those
np.where(all_data['b'] == -9999)

print("Number of rows missing t: ", len(all_data[all_data['t'] == -9999]))
print("Proportion of rows missing t: ", np.round(len(all_data[all_data['t'] == -9999])/len(all_data)*100, 2), "%")
print()
print("Number of rows missing s: ", len(all_data[all_data['s'] == -9999]))
print("Percentage of rows missing s: ", np.round(len(all_data[all_data['s'] == -9999])/len(all_data)*100, 2), "%")
print()
print("Number of rows missing s & t: ", len(all_data[(all_data['s'] == -9999) & (all_data['t'] == -9999)]))
print("Percentage of rows missing s & t: ", np.round(len(all_data[(all_data['s'] == -9999) & (all_data['t'] == -9999)])/len(all_data)*100, 4), "%")

Number of rows missing t:  112821
Proportion of rows missing t:  0.17 %

Number of rows missing s:  1470780
Percentage of rows missing s:  2.17 %

Number of rows missing s & t:  20030
Percentage of rows missing s & t:  0.0296 %


In [61]:
complete_entries = all_data[(all_data['s'] != -9999) & (all_data['t'] != -9999)]

In [62]:
# Where it was imputed it should be all good
# 28 M don't add up
np.where((complete_entries['b'] + complete_entries['t'] - complete_entries['s']) != 0.0)[0].shape
# 100k don't add up with a 1 m margin
np.where((complete_entries['b'] + complete_entries['t'] - complete_entries['s']) > 1.0)[0].shape
# 61k don't add up with a 5 m margin
np.where((complete_entries['b'] + complete_entries['t'] - complete_entries['s']) > 5.0)[0].shape
# 58k don't add up with a 100m margin
np.where((complete_entries['b'] + complete_entries['t'] - complete_entries['s']) > 100.0)[0].shape

(2301473,)

In [15]:
np.where((complete_entries['b'] + complete_entries['t'] - complete_entries['s']) > 100.0)[0].shape

(58007,)

In [63]:
complete_entries.iloc[np.where((complete_entries['b'] + complete_entries['t'] - complete_entries['s']) > 200.0)[0]]

Unnamed: 0,lon,lat,x,y,s,t,b,b_inferred,source
829,-61.395870,-71.956620,-1.734973e+06,9.461000e+05,0.00,597.00,0.00,False,BEDMAP1_1966-2000_AIR_BM1.csv
830,-61.360750,-71.959870,-1.734075e+06,9.469899e+05,0.00,615.00,0.00,False,BEDMAP1_1966-2000_AIR_BM1.csv
831,-61.328930,-71.963130,-1.733230e+06,9.477787e+05,0.00,589.00,0.00,False,BEDMAP1_1966-2000_AIR_BM1.csv
832,-61.285460,-71.966450,-1.732187e+06,9.489160e+05,0.00,589.00,0.00,False,BEDMAP1_1966-2000_AIR_BM1.csv
833,-61.228500,-71.971340,-1.730766e+06,9.503756e+05,0.00,550.00,0.00,False,BEDMAP1_1966-2000_AIR_BM1.csv
...,...,...,...,...,...,...,...,...,...
2694865,139.745673,-66.690847,1.658556e+06,-1.958865e+06,-535.57,289.17,-100.06,False,UTIG_2010_ICECAP_AIR_BM3.csv
3184846,133.514082,-66.753937,1.856212e+06,-1.762347e+06,-1053.82,939.36,-19.88,False,UTIG_2010_ICECAP_AIR_BM3.csv
4078613,162.809760,-77.505508,4.027529e+05,-1.301870e+06,-1027.52,683.19,347.30,False,UTIG_2010_ICECAP_AIR_BM3.csv
4909918,127.188149,-67.771540,1.947589e+06,-1.477666e+06,-1602.63,1733.16,-343.64,False,UTIG_2010_ICECAP_AIR_BM3.csv


In [64]:
complete_entries.iloc[np.where((complete_entries['b'] + complete_entries['t'] - complete_entries['s']) > 200.0)[0]]["source"].value_counts()
# subglacial lake: discrepencies make sense
# PRIC_2016_CHA2_AIR_BM3.csv: very large data set

NASA_2010_ICEBRIDGE_AIR_BM2.csv               1214031
NASA_2004_ICEBRIDGE_AIR_BM2.csv                955039
PRIC_2016_CHA2_AIR_BM3.csv                      47700
NASA_2002_ICEBRIDGE_AIR_BM2.csv                 47666
BEDMAP1_1966-2000_AIR_BM1.csv                   12197
CECS_2006_Subglacial-Lake-CECs_GRN_BM3.csv       8201
UTIG_2015_EAGLE_AIR_BM3.csv                      1251
BAS_2005_WISE-ISODYN_AIR_BM2.csv                  234
UTIG_2010_ICECAP_AIR_BM3.csv                       17
BGR_2002_PCMEGA_AIR_BM2.csv                         1
PRIC_2015_CHA1_AIR_BM3.csv                          1
Name: source, dtype: int64

In [65]:
complete_entries.iloc[np.where((complete_entries['b'] > complete_entries['s']))[0]]["source"].value_counts()

NASA_2010_ICEBRIDGE_AIR_BM2.csv    1220333
NASA_2004_ICEBRIDGE_AIR_BM2.csv     965478
NASA_2002_ICEBRIDGE_AIR_BM2.csv      47730
PRIC_2016_CHA2_AIR_BM3.csv           26195
BAS_2018_Thwaites_AIR_BM3.csv         3436
UTIG_2015_EAGLE_AIR_BM3.csv           1252
BEDMAP1_1966-2000_AIR_BM1.csv          333
UTIG_2010_ICECAP_AIR_BM3.csv           277
NASA_2017_ICEBRIDGE_AIR_BM3.csv        255
BAS_1998_Dufek_AIR_BM2.csv              31
PRIC_2015_CHA1_AIR_BM3.csv               4
NASA_2014_ICEBRIDGE_AIR_BM3.csv          3
BGR_2002_PCMEGA_AIR_BM2.csv              1
NASA_2013_ICEBRIDGE_AIR_BM3.csv          1
Name: source, dtype: int64

In [66]:
np.where((all_data['b'] + all_data['t'] - all_data['s']) > 10000)[0].shape

(1449614,)

In [21]:
all_data[all_data["source"] == "PRIC_2016_CHA2_AIR_BM3.csv"]

Unnamed: 0,lon,lat,x,y,s,t,b,source
0,110.099040,-67.199222,2.356445e+06,-862291.627518,1354.1826,1573.91,-219.7274,PRIC_2016_CHA2_AIR_BM3.csv
1,110.098860,-67.199369,2.356432e+06,-862278.521535,1354.7116,1573.91,-219.1984,PRIC_2016_CHA2_AIR_BM3.csv
2,110.098681,-67.199515,2.356419e+06,-862265.495567,1355.2405,1573.91,-218.6695,PRIC_2016_CHA2_AIR_BM3.csv
3,110.098501,-67.199661,2.356407e+06,-862252.428564,1355.7692,1573.91,-218.1408,PRIC_2016_CHA2_AIR_BM3.csv
4,110.098321,-67.199808,2.356394e+06,-862239.322857,1356.2983,1572.23,-215.9317,PRIC_2016_CHA2_AIR_BM3.csv
...,...,...,...,...,...,...,...,...
1003466,177.127525,-89.741743,1.406190e+03,-28025.042913,2809.3734,2800.43,8.9434,PRIC_2016_CHA2_AIR_BM3.csv
1003467,177.098587,-89.741882,1.419580e+03,-28009.245720,2809.5645,2802.20,7.3645,PRIC_2016_CHA2_AIR_BM3.csv
1003468,177.069618,-89.742021,1.432969e+03,-27993.441380,2809.4943,2798.76,10.7343,PRIC_2016_CHA2_AIR_BM3.csv
1003469,177.040638,-89.742159,1.446354e+03,-27977.738895,2809.6542,2791.99,17.6642,PRIC_2016_CHA2_AIR_BM3.csv


In [None]:
all = pd.read_csv("/home/kim/data/bedmap/bedmap123.csv")