# Arctic ice application

**Goal:** "promote" the persistence of an $H_1$ feature by pixel weighting to find patterns in the ice data.


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from ripser import ripser
from H1_optimizer import one_step_reweight_enhance_H1
from loadsir import loadsir
from loadsir import loadsir
from tqdm import tqdm
import os
from sklearn.decomposition import PCA

### Plot all persistence diagrams

In [None]:
# Point this to the full ice data if you have it. Otherwise, skip this and proceed below with 2006 data.
folder_prefix = '.../daacdata.apps.nsidc.org/pub/DATASETS/nsidc0265_QuikSCAT_ice_extent/data/north/binary/eggs'

def reduce_image(img, scale=30):
    rows, cols = int(img.shape[0]/scale), int(img.shape[1]/scale)
    rimg = np.zeros((rows, cols))
    for i in range(rows):
        for j in range(cols):
            rimg[i,j] = np.sum(img[i*scale:(i+1)*scale, j*scale:(j+1)*scale])
    return rimg

all_eggs = []
for year in range(1999, 2010):
    eggs = []
    days = []
    for file in tqdm(sorted(os.listdir('{}/{}'.format(folder_prefix, year)))):
        #print('ice/{}/'.format(year) + file)
        sir = loadsir('{}/{}/'.format(folder_prefix, year) + file)
        binary_img = np.array(np.abs(np.abs(sir[0] - 1) < 1e-2), dtype= int)
        eggs.append(reduce_image(binary_img))
        days.append(file.split('.')[0][-3:])

    X = np.array([e.ravel() for e in eggs])
    X = X/X.max()
    all_eggs.append(X)

In [None]:
for year, X in enumerate(all_eggs):
    PD = ripser(X, maxdim=1, metric='manhattan')['dgms'][1]
    plt.scatter(PD[:,0], PD[:,1])
    plt.plot([0,110],[0,110], 'k--')
    plt.xlim([0,110])
    plt.ylim([0,110])
    plt.gca().set_aspect('equal', 'box')
    plt.xlabel('birth')
    plt.ylabel('death')
    plt.title('{}'.format(year+1999))
    plt.savefig('ice_eggs_{}_PD.png'.format(year+1999), bbox_inches='tight', dpi=300)
    plt.close()

### Pixel weighting on the 2006 data

This dataset is provided in this repo.

In [None]:
folder_prefix = 'ice/'

def reduce_image(img, scale=30):
    rows, cols = int(img.shape[0]/scale), int(img.shape[1]/scale)
    rimg = np.zeros((rows, cols))
    for i in range(rows):
        for j in range(cols):
            rimg[i,j] = np.sum(img[i*scale:(i+1)*scale, j*scale:(j+1)*scale])
    return rimg

year = 2006
eggs = []
days = []
for file in tqdm(sorted(os.listdir('{}/{}'.format(folder_prefix, year)))):
    #print('ice/{}/'.format(year) + file)
    sir = loadsir('{}/{}/'.format(folder_prefix, year) + file)
    binary_img = np.array(np.abs(np.abs(sir[0] - 1) < 1e-2), dtype= int)
    eggs.append(reduce_image(binary_img))
    days.append(file.split('.')[0][-3:])

X = np.array([e.ravel() for e in eggs])
X = X/X.max()

Plot snapshots of the ice extent for 2006.

In [None]:
fig, ax = plt.subplots(1,10, figsize=(10,10))
idxs = np.linspace(0, len(eggs)-2, 10, dtype=int)
for i in range(1):
    for j in range(10):
        ax[j].axis('off')
        ax[j].imshow(eggs[idxs[j]], cmap='gray')
        ax[j].set_title('Day ' + str(format(days[idxs[j]])).lstrip('0'), fontsize=10)

plt.savefig('ice_eggs_2006.png', bbox_inches='tight', dpi=300)

Do the analysis for 2006 data only. Make sure you have run the blocks above to populate `eggs` and `X`.

In [None]:
year = 2006
print(year)

T = one_step_reweight_enhance_H1(X, method='cochains', epsilon=[0.01, 0.05, 0.1])
Q = np.quantile(T[np.where(T > 0)[0]], 0.9)
T = np.array(T > Q, dtype=int)
plt.imshow(1-T.reshape(eggs[0].shape), cmap='binary')
plt.xticks([])
plt.yticks([])
plt.savefig(f'arctic/ice_eggs_{year}_weights.png', bbox_inches='tight', dpi=300)
plt.show()

pca = PCA(n_components=2)
A = pca.fit_transform(X)
loadings = pca.components_.T
plt.scatter(A[:,0], A[:,1], s=5, c=range(A.shape[0]))
cbar = plt.colorbar()
cbar.ax.tick_params(labelsize=12)
cbar.ax.set_title('Day', fontsize=15)
plt.axis('equal')
plt.xlabel('PC1', fontsize=15)
plt.ylabel('PC2', fontsize=15)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.savefig(f'arctic/ice_eggs_{year}_PCA_uniform.png', bbox_inches='tight', dpi=300)
plt.show()
limit = np.max(np.abs(loadings))
plt.imshow(loadings[:,0].reshape(eggs[0].shape), cmap='PiYG', vmin=-limit, vmax=limit)
plt.xticks([])
plt.yticks([])
plt.title('PC1')
plt.savefig(f'arctic/ice_eggs_{year}_PC1_uniform.png', bbox_inches='tight', dpi=300)
plt.show()
plt.imshow(loadings[:,1].reshape(eggs[0].shape), cmap='PiYG', vmin=-limit, vmax=limit)
plt.xticks([])
plt.yticks([])
plt.title('PC2')
cbar = plt.colorbar()
cbar.ax.tick_params(labelsize=15)
plt.savefig(f'arctic/ice_eggs_{year}_PC2_uniform.png', bbox_inches='tight', dpi=300)
plt.show()

pca = PCA(n_components=2)
A = pca.fit_transform(T*X)
loadings = pca.components_.T
plt.scatter(A[:,0], A[:,1], s=5, c=range(A.shape[0]))
cbar = plt.colorbar()
cbar.ax.tick_params(labelsize=12)  
cbar.ax.set_title('Day', fontsize=15)
plt.axis('equal')
plt.xlabel('PC1', fontsize=15)
plt.ylabel('PC2', fontsize=15)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.savefig(f'arctic/ice_eggs_{year}_PCA_learned.png', bbox_inches='tight', dpi=300)
plt.show()
limit = np.max(np.abs(loadings))
plt.imshow(loadings[:,0].reshape(eggs[0].shape), cmap='PiYG', vmin=-limit, vmax=limit)
plt.xticks([])
plt.yticks([])
plt.title('PC1')
plt.savefig(f'arctic/ice_eggs_{year}_PC1_learned.png', bbox_inches='tight', dpi=300)
plt.show()
plt.imshow(loadings[:,1].reshape(eggs[0].shape), cmap='PiYG', vmin=-limit, vmax=limit)
plt.title('PC2')
cbar = plt.colorbar()
cbar.ax.tick_params(labelsize=15)
plt.xticks([])
plt.yticks([])
plt.savefig(f'arctic/ice_eggs_{year}_PC2_learned.png', bbox_inches='tight', dpi=300)
plt.show()

## Do other years if available

If other years are available, repeat the analysis. 

In [None]:
T_store = {}

In [None]:
for year in [1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008]:
    print(year)
    eggs = []
    days = []
    for file in tqdm(sorted(os.listdir('{}/{}'.format(folder_prefix, year)))):
        #print('ice/{}/'.format(year) + file)
        sir = loadsir('{}/{}/'.format(folder_prefix, year) + file)
        binary_img = np.array(np.abs(np.abs(sir[0] - 1) < 1e-2), dtype= int)
        eggs.append(reduce_image(binary_img))
        days.append(file.split('.')[0][-3:])

    X = np.array([e.ravel() for e in eggs])
    X = X/X.max()

    if year not in T_store:
        T = one_step_reweight_enhance_H1(X, method='cochains', epsilon=[0.01, 0.05, 0.1])
        T_store[year] = T
    else:
        T = T_store[year]
    Q = np.quantile(T[np.where(T > 0)[0]], 0.9)
    T = np.array(T > Q, dtype=int)
    plt.imshow(1-T.reshape(eggs[0].shape), cmap='binary')
    plt.xticks([])
    plt.yticks([])
    plt.savefig(f'arctic/ice_eggs_{year}_weights.png', bbox_inches='tight', dpi=300)
    plt.show()

    pca = PCA(n_components=2)
    A = pca.fit_transform(X)
    loadings = pca.components_.T
    plt.scatter(A[:,0], A[:,1], s=5, c=range(A.shape[0]))
    cbar = plt.colorbar()
    cbar.ax.tick_params(labelsize=12)
    cbar.ax.set_title('Day', fontsize=15)
    plt.axis('equal')
    plt.xlabel('PC1', fontsize=15)
    plt.ylabel('PC2', fontsize=15)
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    plt.savefig(f'arctic/ice_eggs_{year}_PCA_uniform.png', bbox_inches='tight', dpi=300)
    plt.show()
    limit = np.max(np.abs(loadings))
    plt.imshow(loadings[:,0].reshape(eggs[0].shape), cmap='PiYG', vmin=-limit, vmax=limit)
    plt.xticks([])
    plt.yticks([])
    plt.title('PC1')
    plt.savefig(f'arctic/ice_eggs_{year}_PC1_uniform.png', bbox_inches='tight', dpi=300)
    plt.show()
    plt.imshow(loadings[:,1].reshape(eggs[0].shape), cmap='PiYG', vmin=-limit, vmax=limit)
    plt.xticks([])
    plt.yticks([])
    plt.title('PC2')
    cbar = plt.colorbar()
    cbar.ax.tick_params(labelsize=15)
    plt.savefig(f'arctic/ice_eggs_{year}_PC2_uniform.png', bbox_inches='tight', dpi=300)
    plt.show()

    pca = PCA(n_components=2)
    A = pca.fit_transform(T*X)
    loadings = pca.components_.T
    plt.scatter(A[:,0], A[:,1], s=5, c=range(A.shape[0]))
    cbar = plt.colorbar()
    cbar.ax.tick_params(labelsize=12)  
    cbar.ax.set_title('Day', fontsize=15)
    plt.axis('equal')
    plt.xlabel('PC1', fontsize=15)
    plt.ylabel('PC2', fontsize=15)
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    plt.savefig(f'arctic/ice_eggs_{year}_PCA_learned.png', bbox_inches='tight', dpi=300)
    plt.show()
    limit = np.max(np.abs(loadings))
    plt.imshow(loadings[:,0].reshape(eggs[0].shape), cmap='PiYG', vmin=-limit, vmax=limit)
    plt.xticks([])
    plt.yticks([])
    plt.title('PC1')
    plt.savefig(f'arctic/ice_eggs_{year}_PC1_learned.png', bbox_inches='tight', dpi=300)
    plt.show()
    plt.imshow(loadings[:,1].reshape(eggs[0].shape), cmap='PiYG', vmin=-limit, vmax=limit)
    plt.title('PC2')
    cbar = plt.colorbar()
    cbar.ax.tick_params(labelsize=15)
    plt.xticks([])
    plt.yticks([])
    plt.savefig(f'arctic/ice_eggs_{year}_PC2_learned.png', bbox_inches='tight', dpi=300)
    plt.show()