In [13]:
# This script is used for testing all adaptations and changes made

In [1]:
import pandas as pd
import numpy as np

from scipy import linalg
from scipy import stats

In [2]:
# Load data for change detection as a pandas data frame.
infys_df =  pd.read_csv("../data/iMAD/iMAD_Data_23.csv") #need to change the dataset 
infys_df.head()

Unnamed: 0.1,Unnamed: 0,Cycle,Cluster_ID,AvgDbh,AvgCrownDiameter,AvgCrownHeight,AvgCrownArea,SpeciesCount,AvgTreeHeight,TreeCount,J,X,Y
0,1,2,77652,12.425352,2.443564,5.727228,5.241798,15,7.169014,142,0.795764,-92.433389,15.588935
1,2,2,28451,11.009524,2.766667,0.0,6.45711,2,2.819048,21,0.428027,-99.546778,26.072916
2,3,2,55242,17.239344,3.632759,3.487931,12.143966,6,6.285246,61,0.667639,-104.218713,20.403546
3,4,2,20795,10.0875,3.759524,4.54878,11.793714,8,4.585714,56,0.721202,-108.957083,27.390907
4,5,2,53236,12.101266,3.202817,3.877465,8.296011,4,6.801266,79,0.624422,-98.775463,20.791278


In [3]:
# Create new ids by concatenating 'Conglomerado' and 'Sitio'.
    ## don't think I need to do that
    ## Use existing ID of Metadataset called Cluster_ID -> should be able to leave that one out


# As we want to compare cycle 1 vs cycle 2 lets separate them.   
## Stays the same for long dataformat
## might figure out how to 
infys_df_c1 = infys_df[infys_df['Cycle'] == 2]
infys_df_c2 = infys_df[infys_df['Cycle'] == 3]

In [4]:
# Find intersection of new ID variable: 'cng_sit', so obs. that are present in both cycles.
s1 = infys_df_c1['Cluster_ID']
s2 = infys_df_c2['Cluster_ID']
cluster_id_inter = pd.Series(list(set(s1).intersection(set(s2))))
## might have already done this in R by using my filter(muestreado1 == 1 & muestreado2 == 1)



In [5]:
# Filter out obs. that dont appear in cng_sit_inter.
infys_df_c1 = infys_df_c1[infys_df_c1['Cluster_ID'].isin(cluster_id_inter)]
infys_df_c2 = infys_df_c2[infys_df_c2['Cluster_ID'].isin(cluster_id_inter)]

In [6]:
# Select only variables to be utilized in change detection.
vars = ["SpeciesCount", "TreeCount", "J", "AvgTreeHeight", "AvgDbh", "AvgCrownDiameter", "AvgCrownHeight", "AvgCrownArea"]
nvars = len(vars)
infys_df_c1_c = infys_df_c1[infys_df_c1.columns.intersection(vars)]
infys_df_c2_c = infys_df_c2[infys_df_c2.columns.intersection(vars)]

c1_c_nans = infys_df_c1_c.isna().any(axis=1).to_numpy()
c2_c_nans = infys_df_c2_c.isna().any(axis=1).to_numpy()

In [7]:
# Create change data matrix, missing values mask and initial change weights.
dm = np.zeros((2 * nvars, infys_df_c1_c.shape[0]))
dm[0:nvars] = np.transpose(infys_df_c1_c.to_numpy())
dm[nvars:] = np.transpose(infys_df_c2_c.to_numpy())

nodataidx = c1_c_nans | c2_c_nans
gooddataidx = nodataidx == False
dm = dm[:, gooddataidx]
ngood = np.sum(gooddataidx)

In [8]:
# Change detection iterations.

# iteration of MAD   
wt = np.ones(int(ngood))
delta = 1.0
oldrho = np.zeros(nvars)
iter = 0
 
while (delta > 0.01) and (iter < 35): 
    print(iter)
    # Weighted covariance matrices and means.
    sumw = np.sum(wt)
    means = np.average(dm,axis=1, weights=wt)
    dmc = dm - means[:,np.newaxis]
    dmc = np.multiply(dmc,np.sqrt(wt))
    sigma = np.dot(dmc,dmc.T)/sumw
   
    s11 = sigma[0:nvars, 0:nvars]
    s22 = sigma[nvars:, nvars:]
    s12 = sigma[0:nvars, nvars:]
    s21 = sigma[nvars:, 0:nvars]
    
    # Solution of generalized eigenproblems.
    aux_1 = linalg.solve(s22, s21)
    lama, a = linalg.eig(np.dot(s12, aux_1), s11)
    aux_2 = linalg.solve(s11, s12)
    lamb, b = linalg.eig(np.dot(s21, aux_2), s22)
    
    
    # Sort a.  
    idx = np.argsort(lama)
    a = a[:, idx]
    
    # Sort b.        
    idx = np.argsort(lamb)
    b = b[:, idx]
    
    # Canonical correlations.        
    rho = np.sqrt(np.real(lamb[idx]))
    
    
    # Normalize dispersions.  
    tmp1 = np.dot(np.dot(a.T,s11), a)
    tmp2 = 1. / np.sqrt(np.diag(tmp1))
    tmp3 = np.tile(tmp2, (nvars, 1))
    a = np.multiply(a, tmp3)
    b = np.mat(b)
    tmp1 = np.dot(np.dot(b.T,s22), b)
    tmp2 = 1. / np.sqrt(np.diag(tmp1))
    tmp3 = np.tile(tmp2, (nvars, 1))
    b = np.multiply(b, tmp3)
        
    # Assure positive correlation
    tmp = np.diag(np.dot(np.dot(a.T,s12), b))
    b = np.dot(b,np.diag(tmp / np.abs(tmp)))

    # Canonical and MAD variates
    U = np.dot(a.T , (dm[0:nvars, :] - means[0:nvars, np.newaxis]))    
    V = np.dot(b.T , (dm[nvars:, :] - means[nvars:, np.newaxis]))          
    MAD = U - V  
    
            
    # New weights.        
    var_mad = np.tile(np.mat(2 * (1 - rho)).T, (1, ngood))    
    chisqr = np.sum(np.multiply(MAD, MAD) / var_mad, 0)
    wt = np.squeeze(1 - np.array(stats.chi2._cdf(chisqr, nvars)))
    
        
    # Continue iteration.        
    delta = np.sum(np.abs(rho - oldrho))
    oldrho = rho
    iter += 1
    
# reshape to original image size, by including nodata pixels   
# actually reshaping to image size plus one to add chi-squared values
MADout = np.zeros((int(nvars+1), infys_df_c1_c.shape[0]))
MADout[0:nvars, gooddataidx] = MAD
MADout[nvars, gooddataidx] = chisqr

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34


In [9]:
# check whether adding chi-squared worked -> should be 9 rows (or in pyhton count: 0-8)
MADout_df = pd.DataFrame(MADout)
MADout_df.head(10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,10460,10461,10462,10463,10464,10465,10466,10467,10468,10469
0,-2.979377,-0.760287,0.274654,0.580254,-0.31051,0.88447,0.0,0.827502,0.094795,-3.06741,...,-1.808731,-0.200626,-1.863631,3.177221,2.435652,0.0,0.074303,4.001923,0.510203,-1.72611
1,-0.733703,-0.668849,0.100925,-3.364945,0.331551,-0.311293,0.0,-0.082064,-0.447627,-3.955985,...,0.2810006,-0.520749,-0.5582053,0.7916,-0.073975,0.0,-1.240826,-0.568888,-1.048859,1.426915
2,0.140976,0.388292,1.373508,-2.478867,0.160625,-0.806809,0.0,0.053288,1.278292,-0.736592,...,3.776395,-0.252245,2.019629,-0.618109,1.100127,0.0,0.618212,1.827284,1.687017,1.065813
3,-0.320487,-0.085443,0.176631,-1.414721,1.100589,-0.139251,0.0,-0.136239,-0.213269,-1.53739,...,0.8864404,0.191501,0.9575275,-0.004676,-3.193256,0.0,-0.352652,-1.482337,0.308376,1.395952
4,0.798025,-0.780646,-0.588718,1.470308,-0.072566,0.391418,0.0,0.394732,-1.542221,0.731848,...,-1.151472,-0.942741,-3.331951,1.472214,-1.431753,0.0,-1.188247,-0.144178,-0.075881,-0.447491
5,-0.506594,0.286801,0.080879,0.20909,-0.025374,0.015308,0.0,-0.050598,0.539301,0.344953,...,0.8369315,0.208276,1.137828,-0.370548,-0.698467,0.0,0.203438,-0.112812,0.998332,0.188591
6,1.365201,0.002136,-0.936382,1.148204,-0.987395,-0.110515,0.0,0.072044,0.331887,0.807016,...,-0.8320245,-0.022831,-0.9506642,-0.171737,1.377543,0.0,0.232114,0.300799,-0.202833,-0.962024
7,0.531895,-0.56433,-0.561399,1.183564,0.153372,0.253004,0.0,0.395763,-0.781845,0.990721,...,-1.817961,-0.316228,-2.522543,0.384332,0.045005,0.0,-0.556418,-0.091263,-0.916914,-0.71836
8,172115.218424,144479.467897,159942.271586,663272.041831,33863.086129,29560.710815,0.0,69754.587076,289436.090022,455620.430416,...,1487431.0,49973.565901,2895770.0,81014.013934,82613.989997,0.0,146508.336337,12192.489345,384451.020282,252371.120968


In [10]:
# increase dimension of transposed dataframe by adding on column called chi_squared
vars_with_chi_squared = vars + ["chi_squared"]

# transpose matrix 
MADout = np.transpose(MADout).copy()
MADout_df = pd.DataFrame(MADout, columns=vars_with_chi_squared)
# should now have 9 columns and the last one should be chi_squared
MADout_df.head(10)

Unnamed: 0,SpeciesCount,TreeCount,J,AvgTreeHeight,AvgDbh,AvgCrownDiameter,AvgCrownHeight,AvgCrownArea,chi_squared
0,-2.979377,-0.733703,0.140976,-0.320487,0.798025,-0.506594,1.365201,0.531895,172115.218424
1,-0.760287,-0.668849,0.388292,-0.085443,-0.780646,0.286801,0.002136,-0.56433,144479.467897
2,0.274654,0.100925,1.373508,0.176631,-0.588718,0.080879,-0.936382,-0.561399,159942.271586
3,0.580254,-3.364945,-2.478867,-1.414721,1.470308,0.20909,1.148204,1.183564,663272.041831
4,-0.31051,0.331551,0.160625,1.100589,-0.072566,-0.025374,-0.987395,0.153372,33863.086129
5,0.88447,-0.311293,-0.806809,-0.139251,0.391418,0.015308,-0.110515,0.253004,29560.710815
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.827502,-0.082064,0.053288,-0.136239,0.394732,-0.050598,0.072044,0.395763,69754.587076
8,0.094795,-0.447627,1.278292,-0.213269,-1.542221,0.539301,0.331887,-0.781845,289436.090022
9,-3.06741,-3.955985,-0.736592,-1.53739,0.731848,0.344953,0.807016,0.990721,455620.430416


In [11]:
# add coordinates
MADout_df["X"] = infys_df_c1["X"]
MADout_df["Y"] = infys_df_c1["Y"]
MADout_df.head(10)

Unnamed: 0,SpeciesCount,TreeCount,J,AvgTreeHeight,AvgDbh,AvgCrownDiameter,AvgCrownHeight,AvgCrownArea,chi_squared,X,Y
0,-2.979377,-0.733703,0.140976,-0.320487,0.798025,-0.506594,1.365201,0.531895,172115.218424,-92.433389,15.588935
1,-0.760287,-0.668849,0.388292,-0.085443,-0.780646,0.286801,0.002136,-0.56433,144479.467897,-99.546778,26.072916
2,0.274654,0.100925,1.373508,0.176631,-0.588718,0.080879,-0.936382,-0.561399,159942.271586,-104.218713,20.403546
3,0.580254,-3.364945,-2.478867,-1.414721,1.470308,0.20909,1.148204,1.183564,663272.041831,-108.957083,27.390907
4,-0.31051,0.331551,0.160625,1.100589,-0.072566,-0.025374,-0.987395,0.153372,33863.086129,-98.775463,20.791278
5,0.88447,-0.311293,-0.806809,-0.139251,0.391418,0.015308,-0.110515,0.253004,29560.710815,-103.579314,23.316
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-101.453185,26.362889
7,0.827502,-0.082064,0.053288,-0.136239,0.394732,-0.050598,0.072044,0.395763,69754.587076,-108.212148,28.779231
8,0.094795,-0.447627,1.278292,-0.213269,-1.542221,0.539301,0.331887,-0.781845,289436.090022,-103.285564,23.454722
9,-3.06741,-3.955985,-0.736592,-1.53739,0.731848,0.344953,0.807016,0.990721,455620.430416,-98.962972,16.72387


In [13]:
# create .csv
MADout_df.to_csv('iMAD_results_23.csv', index=False)

In [None]:
#### ENDE ####

In [54]:
## Beta

MADout_df["Y"] = infys_df_c1["std_chg"]
np.std(madout(lstone))
madout(lastone <2*np.std)=2


# Add chi squared variable to table

CHIsqr_df = pd.DataFrame(chisqr)


chisqr = np.transpose(chisqr).copy()
CHI_df = pd.DataFrame(chisqr)

MADout_df['CHI'] = CHI_df



# Assuming chisqr is a 1D numpy array with length equal to the number of rows in MADout
# We ensure chisqr is reshaped as a 2D column vector if it isn't already
chisqr_column = chisqr.reshape(-1, 1)

# Append chisqr as a new column to MADout
MADout = np.concatenate((MADout, chisqr_column), axis=1)




MADout_df.head()



SyntaxError: cannot assign to function call here. Maybe you meant '==' instead of '='? (54363561.py, line 5)