# Correlation analysis of the metrics time series

In [1]:
import pandas as pd
import pylandstats as pls
import seaborn as sns
import swisslandstats as sls

from swiss_urbanization.data import settings

sns.set()

In [2]:
urban_class = settings.EXTRACTS_URBAN
nodata = settings.EXTRACTS_NODATA

agglomeration_slugs = ['bern', 'lausanne', 'zurich']

class_metrics = [
    'proportion_of_landscape', 'area_mn', 'largest_patch_index',
    'edge_density', 'fractal_dimension_am',
    'euclidean_nearest_neighbor_mn', 'landscape_shape_index',
    'patch_density'
]
landscape_metrics = ['contagion', 'shannon_diversity_index']

In [3]:
agglomeration_dict = {}
for agglomeration_slug in agglomeration_slugs:
    ldf = sls.read_csv(
        f'../data/processed/agglomeration_extracts/{agglomeration_slug}.csv')
    landscapes = []
    dates = []
    for urban_nonurban_column in ldf.columns[ldf.columns.str.startswith('AS')]:
        landscapes.append(
            pls.Landscape(
                ldf.to_ndarray(urban_nonurban_column, nodata=nodata),
                res=ldf.res, nodata=nodata))
        # get the year of the snapshot by taking the most recurrent timestamp
        # (year) among the pixels
        dates.append(
            ldf['FJ' + urban_nonurban_column[2:4]].value_counts().index[0])
    agglomeration_dict[agglomeration_slug] = pls.SpatioTemporalAnalysis(
        landscapes, dates=dates)

In [4]:
metrics_df = pd.DataFrame(columns=class_metrics + landscape_metrics)
for agglomeration_slug in agglomeration_slugs:
    class_metrics_df = agglomeration_dict[agglomeration_slug].class_metrics_df
    landscape_metrics_df = agglomeration_dict[
        agglomeration_slug].landscape_metrics_df
    agglomeration_metrics_df = pd.concat([
        class_metrics_df.loc[urban_class][class_metrics],
        landscape_metrics_df[landscape_metrics]
    ], axis=1)
    # need minmax scaling for correlations to work
    agglomeration_metrics_df = agglomeration_metrics_df.apply(
        pd.to_numeric).apply(
            lambda x: (x.astype(float) - min(x)) / (max(x) - min(x)), axis=0)
    metrics_df = pd.concat([metrics_df, agglomeration_metrics_df],
                           ignore_index=True)

In [None]:
metrics_df = pd.DataFrame(columns=class_metrics+landscape_metrics)
for agglomeration_slug in agglomeration_slugs:
    class_metrics_df = agglomeration_dict[agglomeration_slug].class_metrics_df
    landscape_metrics_df = agglomeration_dict[
        agglomeration_slug].landscape_metrics_df
    agglomeration_metrics_df = pd.concat([
        class_metrics_df.loc[urban_class][class_metrics],
        landscape_metrics_df[landscape_metrics]
    ] , axis=1)
    # need minmax scaling for correlations to work
    agglomeration_metrics_df = agglomeration_metrics_df.apply(pd.to_numeric).apply(lambda x:(x.astype(float) - min(x))/(max(x)-min(x)), axis = 0)
    metrics_df = pd.concat([metrics_df, agglomeration_metrics_df], ignore_index=True)

In [3]:
city_sta_dict = {
    city_slug : pls.SpatioTemporalAnalysis(
        ['../data/processed/agglomeration_extracts/'
        f'{city_slug}/{year_code}/{year_code}.tif'
         for year_code in year_codes],
        metrics=class_metrics+landscape_metrics, classes=[1],
        dates=year_codes
    )
    for city_slug in city_slugs
}

In [9]:
metrics_df = pd.DataFrame(columns=class_metrics+landscape_metrics)
for city_slug in city_slugs:
    class_metrics_df = city_sta_dict[city_slug].class_metrics_df
    landscape_metrics_df = city_sta_dict[city_slug].landscape_metrics_df
    city_metrics_df = pd.concat([class_metrics_df.loc[1][class_metrics], landscape_metrics_df[landscape_metrics]], axis=1)
    # need minmax scaling for correlations to work
    city_metrics_df = city_metrics_df.apply(pd.to_numeric).apply(lambda x:(x.astype(float) - min(x))/(max(x)-min(x)), axis = 0)
    metrics_df = pd.concat([metrics_df, city_metrics_df], ignore_index=True)

In [10]:
metrics_df.corr()

metric                          area_mn  proportion_of_landscape  \
metric                                                             
area_mn                        1.000000                -0.080844   
proportion_of_landscape       -0.080844                 1.000000   
largest_patch_index            0.219710                 0.781128   
edge_density                  -0.271450                 0.906382   
fractal_dimension_am          -0.075264                 0.463403   
euclidean_nearest_neighbor_mn  0.274681                -0.842056   
landscape_shape_index         -0.463490                 0.764900   
number_of_patches             -0.631163                 0.703030   
patch_density                 -0.635539                 0.699083   
contagion                      0.151913                -0.988008   
shannon_diversity_index       -0.075947                 0.999966   

metric                         largest_patch_index  edge_density  \
metric                                         