# Demo of Categorical Variable Change using Pontius Method
**See [the original GitHub repo](https://github.com/verma-priyanka/pontiPy) by developers.**

## Preparation
We first import libraries.

In [1]:
# !pip3 install pontiPy

In [2]:
# IMPORT LIBARIES####
# Handle Raster====
import rasterio as rio 
import numpy as np 

# Calculate Metrics====
import pandas as pd
from pontiPy import *
from scipy.stats.contingency import crosstab

# Visualize Reult====
import matplotlib as mpl 
import matplotlib.pyplot as plt 
import seaborn as sb

In [3]:
# Disable Warnings====
import warnings
warnings.filterwarnings('ignore')

## Create a Crosstabulation Data Frame
We will first create a cross tabulation that counts changes or errors. These are not explained in the Developers' GitHub, but I figured this out:).

If we handle changes, then the data frame contains counts from time 1 in the row direction (X) and those from time 2 in the column direction (Y). If we handle errors, then the data frame contains counts from map in the row direction and those from reference in the column direction.

In [4]:
# CREATE CROSSTABULATION####
def makeCrossTab(xPath:str,
                 yPath:str,
                 analysisType:str = "error",
                 # Change 1 to 1000 to show crosstab with unit of 1000 pixles----
                 factor = 1,
                 csvPath:str = None,
                 noValuesPosition = None,
                 categoryNames = []):
    
    # Open Raster as Numpy Array====
    xarr = rio.open(xPath).read(1)
    yarr = rio.open(yPath).read(1)
    # Create a Crosstabulation====
    ct = crosstab(xarr, yarr).count/ factor
    # print(ct)

    # Convert Matrix to DataFrame====
    if categoryNames == []:
        df = pd.DataFrame(ct)
    else:
        # Add column and index names if specified----
        df = pd.DataFrame(ct,
                          columns = categoryNames,
                          index = categoryNames)
    
    # Remove No Values====
    if noValuesPosition != None:
        # Drop columns of no data----
        df = df.drop(df.columns[noValuesPosition],
                     axis = 1)
        # Drop rows of no data----
        df = df.drop(df.index[noValuesPosition],
                     axis = 0)
    
    # Save the CSV File====
    if csvPath != None:
        df.to_csv(csvPath)
    
    # Return DataFrame as a PontiPy Object====
    if analysisType == "error":
        return pontiPy_Error(df), df
    elif analysisType == "change":
        return  pontiPy_Change(df), df
    else:
        raise "Error: Choose from either error or change"

In [5]:
changeDF, df = makeCrossTab("/Volumes/volume 1/GIS Projects/Research/240105/whole_state/fad2005_4conn_all.tif",
                            "/Volumes/volume 1/GIS Projects/Research/240105/whole_state/fad2016_4conn_all.tif",
                            "change",
                            1,
                            None,
                            0,
                            categoryNames = ["NoData", "FF", "NFF", "NF"])

In [6]:
df

Unnamed: 0,FF,NFF,NF
FF,2933538.0,96832.0,1475258.0
NFF,1189334.0,7205069.0,1425948.0
NF,1577438.0,562225.0,7102584.0


## Demonstration when We Want to Analyze Change
See the [developers GitHub](https://github.com/verma-priyanka/pontiPy) for error metrics

### Calculate Change Quantity

In [7]:
# CALCULATE CHANGE QUANTITY for EACH CATEGORY####
def calculateCategoryQuantity(changeDF, df):
    # Create a Blank Dictionary to Contain Values====
    cqDict = {}
    # Iterate the Data Extracting for All Categories====
    for i in range(len(df.columns)):
        # Get Quantity of Current Category with Lable of Gain/ Loss====
        subD = changeDF.quantity(category = i, label = True)
        # Separate the Key and Values====
        k = str(i) + "_" + list(subD.keys())[0]
        v = list(subD.values())[0]
        # Populate the Key Value Pair in the Empty Dictionary====
        cqDict[k] = v
    # Get the Final Output====
    return cqDict

In [8]:
qd1 = calculateCategoryQuantity(changeDF, df)
qd1

{'0_Gain': 1194682.0, '1_Loss': 1956225.0, '2_Gain': 761543.0}

### Calculate Change Exchange

In [9]:
# CALCULATE CHANGE EXCHANGE for EACH CATEGORY####
def calculateCategoryExchange(change_df, df):
    ceDict = {}
    for i in range(len(df.columns)):
        # Get Exchange of Current Category with Another Category as Dictionary====
        subD = change_df.exchange(category1 = i)
        # Separate the Key and Values of Dictionary====
        k = str(i)
        v = subD
        # Populate the Key Value Pair in the Empty Dictionary====
        ceDict[k] = v
    # Get the Final Output====
    return ceDict

In [10]:
ed1 = calculateCategoryExchange(changeDF, df)
ed1

{'0': {'Category 1': 96832.0, 'Category 2': 1475258.0},
 '1': {'Category 0': 96832.0, 'Category 2': 562225.0},
 '2': {'Category 0': 1475258.0, 'Category 1': 562225.0}}

### Calculate Shift

In [11]:
# CALCULATE CHANGE SHIFT for EACH CATEGORY####
def calculateCategoryShift(change_df, df):
    csDict = {}
    for i in range(len(df.columns)):
        subD = change_df.shift(category = i)
        k = str(i)
        v = subD
        csDict[k] = v
    return csDict

In [12]:
sd1 = calculateCategoryShift(changeDF, df)
sd1

{'0': 0.0, '1': 0.0, '2': 102180.0}

## Summarize the Category Size

In [13]:
def summarizeCategoryDiffSize(change_df, df, quantity_dictionary, exchange_dictionary, shift_dictionary):
    print("====Summary of Category & Difference Size====")
    for i in range(len(df.columns)):
        # Get the Category Name====
        print("Category: " + str(i))
        # Get the Persistence Size====
        print("Persistence: " + str(change_df.persistence(category = i)))
        # Get the Gain Size====
        print("Gain: " + str(change_df.gain(category = i)))
        # Get the Loss Size====
        print("Loss: " + str(change_df.loss(category = i)))
        # Get the Quantity Size and Label of Gain/ Loss====
        print("Quantity: " + str(list(quantity_dictionary.keys())[i]) + "->" + str(list(quantity_dictionary.values())[i]))
        print("Note that Gain Quantity is 0, if loss quantity is more than 0, and vice versa")
        # Take All Exchanges between the Current Category and the Other Category====
        eValues = exchange_dictionary[str(i)].values()
        # Sum up the Exchanges====
        print("Category Exchange: " + str(2*sum(list(eValues))))
        # Get Shift from the Dictionary=====
        sValues = list(shift_dictionary.values())[i]
        print("Category Shift: " + str(sValues))
        # Calculate Shift and Exchange for Gain and Losses====
        print("Gain Exchange = Loss Exchange: " + str(sum(list(eValues))))
        print("Gain Shift = Loss Shift: " + str(sValues/ 2))
        # Transition to the Next Category=====
        print("====")

In [14]:
summarizeCategoryDiffSize(changeDF, df, qd1, ed1, sd1)

====Summary of Category & Difference Size====
Category: 0
Persistence: 2933538.0
Gain: 2766772.0
Loss: 1572090.0
Quantity: 0_Gain->1194682.0
Note that Gain Quantity is 0, if loss quantity is more than 0, and vice versa
Category Exchange: 3144180.0
Category Shift: 0.0
Gain Exchange = Loss Exchange: 1572090.0
Gain Shift = Loss Shift: 0.0
====
Category: 1
Persistence: 7205069.0
Gain: 659057.0
Loss: 2615282.0
Quantity: 1_Loss->1956225.0
Note that Gain Quantity is 0, if loss quantity is more than 0, and vice versa
Category Exchange: 1318114.0
Category Shift: 0.0
Gain Exchange = Loss Exchange: 659057.0
Gain Shift = Loss Shift: 0.0
====
Category: 2
Persistence: 7102584.0
Gain: 2901206.0
Loss: 2139663.0
Quantity: 2_Gain->761543.0
Note that Gain Quantity is 0, if loss quantity is more than 0, and vice versa
Category Exchange: 4074966.0
Category Shift: 102180.0
Gain Exchange = Loss Exchange: 2037483.0
Gain Shift = Loss Shift: 51090.0
====


## Calculate Difference Size for the Extent

## Calculate Change Intensities

## Next Steps
1. Double check the values.
    - Use Pontius Excel sheet.
2. Calculate difference size for the extent.
2. Calculate change intensities.
3. Visualize the size and intensities.