In [20]:
import pandas as pd
import numpy as np
import h5py

In [21]:
# load clean gdp data
gdp = pd.read_csv("data/clean_gdp.csv")
nearnad_snow_cov_max, nearnad_snow_cov_min = 0, 1000000
nearnad_snow_free_max, nearnad_snow_free_min = 0, 1000000
offnad_snow_cov_max, offnad_snow_cov_min = 0, 1000000
offnad_snow_free_max, offnad_snow_free_min = 0, 1000000
allangle_snow_cov_max, allangle_snow_cov_min = 0, 1000000
allangle_snow_free_max, allangle_snow_free_min = 0, 1000000

for i in range(len(gdp)):

    # get year, region, and gdp
    year = gdp["year"][i]
    region = gdp["region"][i]
    gdp_value = gdp["real_gdp"][i]

    # get the file name
    file_name = f"{year}_{region}.h5"

    # load the image
    file_path = f"data/annual_region_images/{file_name}"
    
    with h5py.File(file_path, 'r') as annual_region:
        nearnad_snow_cov = annual_region["NearNadir_Composite_Snow_Covered"][:]
        nearnad_snow_free = annual_region["NearNadir_Composite_Snow_Free"][:]
        offnad_snow_cov = annual_region["OffNadir_Composite_Snow_Covered"][:]
        offnad_snow_free = annual_region["OffNadir_Composite_Snow_Free"][:]
        allangle_snow_cov = annual_region["AllAngle_Composite_Snow_Covered"][:]
        allangle_snow_free = annual_region["AllAngle_Composite_Snow_Free"][:]

        # check what the max and min values are within each category
        if nearnad_snow_cov.max() > nearnad_snow_cov_max:
            nearnad_snow_cov_max = nearnad_snow_cov.max()
        if nearnad_snow_cov.min() < nearnad_snow_cov_min:
            nearnad_snow_cov_min = nearnad_snow_cov.min()
        if nearnad_snow_free.max() > nearnad_snow_free_max:
            nearnad_snow_free_max = nearnad_snow_free.max()
        if nearnad_snow_free.min() < nearnad_snow_free_min:
            nearnad_snow_free_min = nearnad_snow_free.min()
        if offnad_snow_cov.max() > offnad_snow_cov_max:
            offnad_snow_cov_max = offnad_snow_cov.max()
        if offnad_snow_cov.min() < offnad_snow_cov_min:
            offnad_snow_cov_min = offnad_snow_cov.min()
        if offnad_snow_free.max() > offnad_snow_free_max:
            offnad_snow_free_max = offnad_snow_free.max()
        if offnad_snow_free.min() < offnad_snow_free_min:
            offnad_snow_free_min = offnad_snow_free.min()
        if allangle_snow_cov.max() > allangle_snow_cov_max:
            allangle_snow_cov_max = allangle_snow_cov.max()
        if allangle_snow_cov.min() < allangle_snow_cov_min: 
            allangle_snow_cov_min = allangle_snow_cov.min()
        if allangle_snow_free.max() > allangle_snow_free_max:
            allangle_snow_free_max = allangle_snow_free.max()
        if allangle_snow_free.min() < allangle_snow_free_min:
            allangle_snow_free_min = allangle_snow_free.min()

print(f"NearNadir Composite Snow Covered Max: {nearnad_snow_cov_max}")
print(f"NearNadir Composite Snow Covered Min: {nearnad_snow_cov_min}")
print(f"NearNadir Composite Snow Free Max: {nearnad_snow_free_max}")
print(f"NearNadir Composite Snow Free Min: {nearnad_snow_free_min}")
print(f"OffNadir Composite Snow Covered Max: {offnad_snow_cov_max}")
print(f"OffNadir Composite Snow Covered Min: {offnad_snow_cov_min}")
print(f"OffNadir Composite Snow Free Max: {offnad_snow_free_max}")
print(f"OffNadir Composite Snow Free Min: {offnad_snow_free_min}")
print(f"AllAngle Composite Snow Covered Max: {allangle_snow_cov_max}")
print(f"AllAngle Composite Snow Covered Min: {allangle_snow_cov_min}")
print(f"AllAngle Composite Snow Free Max: {allangle_snow_free_max}")
print(f"AllAngle Composite Snow Free Min: {allangle_snow_free_min}")

# take the log of all the max values
nearnad_snow_cov_max = np.log(nearnad_snow_cov_max)
nearnad_snow_free_max = np.log(nearnad_snow_free_max)
offnad_snow_cov_max = np.log(offnad_snow_cov_max)
offnad_snow_free_max = np.log(offnad_snow_free_max)
allangle_snow_cov_max = np.log(allangle_snow_cov_max)
allangle_snow_free_max = np.log(allangle_snow_free_max)


NearNadir Composite Snow Covered Max: 30891.0
NearNadir Composite Snow Covered Min: 0.0
NearNadir Composite Snow Free Max: 30000.0
NearNadir Composite Snow Free Min: 0.0
OffNadir Composite Snow Covered Max: 30000.0
OffNadir Composite Snow Covered Min: 0.0
OffNadir Composite Snow Free Max: 26834.0
OffNadir Composite Snow Free Min: 0.0
AllAngle Composite Snow Covered Max: 30000.0
AllAngle Composite Snow Covered Min: 0.0
AllAngle Composite Snow Free Max: 23922.0
AllAngle Composite Snow Free Min: 0.0


In [41]:
def get_tabular_data(composite_name, log_max, prefix, annual_region, data_dict):

    image = annual_region[composite_name][:].flatten()

    # exclude the 0 values, take the log
    image = np.log(image[image != 0])

    # count the proportion of pixels in each of 10 bins
    bins = np.linspace(0, log_max, 11)
    digitized = np.digitize(image, bins) - 1
    bin_counts = np.bincount(digitized, minlength=10)
    bin_proportions = bin_counts / len(image)

    # create a dictionary to store the data
    data = data_dict.copy()
    for i in range(1, 11):
        data[f"{prefix}_{i}"] = bin_proportions[i-1]

    return data
    



In [45]:
# initialise a pandas dataframe to store the data
df = pd.DataFrame(columns=["year", "region", "gdp", 
                           "nearnad_snow_cov_1", "nearnad_snow_cov_2", "nearnad_snow_cov_3", "nearnad_snow_cov_4", "nearnad_snow_cov_5", "nearnad_snow_cov_6", "nearnad_snow_cov_7", "nearnad_snow_cov_8", "nearnad_snow_cov_9", "nearnad_snow_cov_10",
                           "nearnad_snow_free_1", "nearnad_snow_free_2", "nearnad_snow_free_3", "nearnad_snow_free_4", "nearnad_snow_free_5", "nearnad_snow_free_6", "nearnad_snow_free_7", "nearnad_snow_free_8", "nearnad_snow_free_9", "nearnad_snow_free_10",
                           "offnad_snow_cov_1", "offnad_snow_cov_2", "offnad_snow_cov_3", "offnad_snow_cov_4", "offnad_snow_cov_5", "offnad_snow_cov_6", "offnad_snow_cov_7", "offnad_snow_cov_8", "offnad_snow_cov_9", "offnad_snow_cov_10",
                           "offnad_snow_free_1", "offnad_snow_free_2", "offnad_snow_free_3", "offnad_snow_free_4", "offnad_snow_free_5", "offnad_snow_free_6", "offnad_snow_free_7", "offnad_snow_free_8", "offnad_snow_free_9", "offnad_snow_free_10", 
                           "allangle_snow_cov_1", "allangle_snow_cov_2", "allangle_snow_cov_3", "allangle_snow_cov_4", "allangle_snow_cov_5", "allangle_snow_cov_6", "allangle_snow_cov_7", "allangle_snow_cov_8", "allangle_snow_cov_9", "allangle_snow_cov_10", 
                           "allangle_snow_free_1", "allangle_snow_free_2", "allangle_snow_free_3", "allangle_snow_free_4", "allangle_snow_free_5", "allangle_snow_free_6", "allangle_snow_free_7", "allangle_snow_free_8", "allangle_snow_free_9", "allangle_snow_free_10"])

for i in range(len(gdp)):

    # get year, region, and gdp
    year = gdp["year"][i]
    region = gdp["region"][i]
    gdp_value = gdp["real_gdp"][i]
    data_dict = {"year": year, "region": region, "gdp": gdp_value}

    # get the file name
    file_name = f"{year}_{region}.h5"

    # load the image
    file_path = f"data/annual_region_images/{file_name}"
    
    with h5py.File(file_path, 'r') as annual_region:

        data_dict = get_tabular_data("NearNadir_Composite_Snow_Covered", nearnad_snow_cov_max, "nearnad_snow_cov", annual_region, data_dict)
        data_dict = get_tabular_data("NearNadir_Composite_Snow_Free", nearnad_snow_free_max, "nearnad_snow_free", annual_region, data_dict)
        data_dict = get_tabular_data("OffNadir_Composite_Snow_Covered", offnad_snow_cov_max, "offnad_snow_cov", annual_region, data_dict)
        data_dict = get_tabular_data("OffNadir_Composite_Snow_Free", offnad_snow_free_max, "offnad_snow_free", annual_region, data_dict)
        data_dict = get_tabular_data("AllAngle_Composite_Snow_Covered", allangle_snow_cov_max, "allangle_snow_cov", annual_region, data_dict)
        data_dict = get_tabular_data("AllAngle_Composite_Snow_Free", allangle_snow_free_max, "allangle_snow_free", annual_region, data_dict)

        # add the data to the dataframe
        new_row = pd.DataFrame([data_dict])
        df = pd.concat([df, new_row], ignore_index=True)

# save the dataframe

df.to_csv("data/tabular_data_ukraine.csv", index=False)

  df = pd.concat([df, new_row], ignore_index=True)
