In [13]:
# This script is used for testing all adaptations and changes made

In [28]:
import pandas as pd
import numpy as np

from scipy import linalg
from scipy import stats

In [29]:
# Load data for change detection as a pandas data frame.
infys_df =  pd.read_csv("../data/iMAD/iMAD_Data_13_TreesOnly.csv") #need to change the dataset 
infys_df.head()

Unnamed: 0.1,Unnamed: 0,Cycle,Cluster_ID,AvgDbh,AvgCrownDiameter,AvgCrownHeight,AvgCrownArea,SpeciesCount,AvgTreeHeight,TreeCount,J,X,Y
0,1,1,77652,16.103571,3.937736,3.367925,15.409993,8,8.555357,56,0.440017,-92.433389,15.588935
1,2,1,28451,10.380952,2.209524,0.0,3.95692,2,2.909524,21,0.334649,-99.546778,26.072916
2,3,1,55242,14.996429,4.439683,4.557937,17.199388,5,5.458333,84,0.50543,-104.218713,20.403546
3,4,1,20795,11.6,3.169565,4.286957,8.243627,5,4.493548,31,0.770242,-108.957083,27.390907
4,5,1,53236,12.194444,4.886047,5.753488,20.022403,6,7.605556,54,0.841231,-98.775463,20.791278


In [30]:
# Create new ids by concatenating 'Conglomerado' and 'Sitio'.
    ## don't think I need to do that
    ## Use existing ID of Metadataset called Cluster_ID -> should be able to leave that one out


# As we want to compare cycle 1 vs cycle 2 lets separate them.   
## Stays the same for long dataformat
## might figure out how to 
infys_df_c1 = infys_df[infys_df['Cycle'] == 1]
infys_df_c2 = infys_df[infys_df['Cycle'] == 3]

In [31]:
# Find intersection of new ID variable: 'cng_sit', so obs. that are present in both cycles.
s1 = infys_df_c1['Cluster_ID']
s2 = infys_df_c2['Cluster_ID']
cluster_id_inter = pd.Series(list(set(s1).intersection(set(s2))))
## might have already done this in R by using my filter(muestreado1 == 1 & muestreado2 == 1)



In [32]:
# Filter out obs. that dont appear in cng_sit_inter.
infys_df_c1 = infys_df_c1[infys_df_c1['Cluster_ID'].isin(cluster_id_inter)]
infys_df_c2 = infys_df_c2[infys_df_c2['Cluster_ID'].isin(cluster_id_inter)]

In [33]:
# Select only variables to be utilized in change detection.
vars = ["SpeciesCount", "TreeCount", "J", "AvgTreeHeight", "AvgDbh", "AvgCrownDiameter", "AvgCrownHeight", "AvgCrownArea"]
nvars = len(vars)
infys_df_c1_c = infys_df_c1[infys_df_c1.columns.intersection(vars)]
infys_df_c2_c = infys_df_c2[infys_df_c2.columns.intersection(vars)]

c1_c_nans = infys_df_c1_c.isna().any(axis=1).to_numpy()
c2_c_nans = infys_df_c2_c.isna().any(axis=1).to_numpy()

In [34]:
# Create change data matrix, missing values mask and initial change weights.
dm = np.zeros((2 * nvars, infys_df_c1_c.shape[0]))
dm[0:nvars] = np.transpose(infys_df_c1_c.to_numpy())
dm[nvars:] = np.transpose(infys_df_c2_c.to_numpy())

nodataidx = c1_c_nans | c2_c_nans
gooddataidx = nodataidx == False
dm = dm[:, gooddataidx]
ngood = np.sum(gooddataidx)

In [36]:
# Change detection iterations.

# iteration of MAD   
wt = np.ones(int(ngood))
delta = 1.0
oldrho = np.zeros(nvars)
iter = 0
 
while (delta > 0.01) and (iter < 33): 
    print(iter)
    # Weighted covariance matrices and means.
    sumw = np.sum(wt)
    means = np.average(dm,axis=1, weights=wt)
    dmc = dm - means[:,np.newaxis]
    dmc = np.multiply(dmc,np.sqrt(wt))
    sigma = np.dot(dmc,dmc.T)/sumw
   
    s11 = sigma[0:nvars, 0:nvars]
    s22 = sigma[nvars:, nvars:]
    s12 = sigma[0:nvars, nvars:]
    s21 = sigma[nvars:, 0:nvars]
    
    # Solution of generalized eigenproblems.
    aux_1 = linalg.solve(s22, s21)
    lama, a = linalg.eig(np.dot(s12, aux_1), s11)
    aux_2 = linalg.solve(s11, s12)
    lamb, b = linalg.eig(np.dot(s21, aux_2), s22)
    
    
    # Sort a.  
    idx = np.argsort(lama)
    a = a[:, idx]
    
    # Sort b.        
    idx = np.argsort(lamb)
    b = b[:, idx]
    
    # Canonical correlations.        
    rho = np.sqrt(np.real(lamb[idx]))
    
    
    # Normalize dispersions.  
    tmp1 = np.dot(np.dot(a.T,s11), a)
    tmp2 = 1. / np.sqrt(np.diag(tmp1))
    tmp3 = np.tile(tmp2, (nvars, 1))
    a = np.multiply(a, tmp3)
    b = np.mat(b)
    tmp1 = np.dot(np.dot(b.T,s22), b)
    tmp2 = 1. / np.sqrt(np.diag(tmp1))
    tmp3 = np.tile(tmp2, (nvars, 1))
    b = np.multiply(b, tmp3)
        
    # Assure positive correlation
    tmp = np.diag(np.dot(np.dot(a.T,s12), b))
    b = np.dot(b,np.diag(tmp / np.abs(tmp)))

    # Canonical and MAD variates
    U = np.dot(a.T , (dm[0:nvars, :] - means[0:nvars, np.newaxis]))    
    V = np.dot(b.T , (dm[nvars:, :] - means[nvars:, np.newaxis]))          
    MAD = U - V  
    
            
    # New weights.        
    var_mad = np.tile(np.mat(2 * (1 - rho)).T, (1, ngood))    
    chisqr = np.sum(np.multiply(MAD, MAD) / var_mad, 0)
    wt = np.squeeze(1 - np.array(stats.chi2._cdf(chisqr, nvars)))
    
        
    # Continue iteration.        
    delta = np.sum(np.abs(rho - oldrho))
    oldrho = rho
    iter += 1
    
# reshape to original image size, by including nodata pixels   
# actually reshaping to image size plus one to add chi-squared values
MADout = np.zeros((int(nvars+1), infys_df_c1_c.shape[0]))
MADout[0:nvars, gooddataidx] = MAD
MADout[nvars, gooddataidx] = chisqr

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32


In [37]:
# check whether adding chi-squared worked -> should be 9 rows (or in pyhton count: 0-8)
MADout_df = pd.DataFrame(MADout)
MADout_df.head(10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,8522,8523,8524,8525,8526,8527,8528,8529,8530,8531
0,-6.853842,-3.519086,-0.09705645,4.895934,-2.041761,-2.196718,0.0,1.757639,-2.28939,0.6792918,...,-10.39411,5.831103,-0.8874683,-6.773673,2.149456,-1.180171,-3.729438,-0.3459147,-4.858157,-7.629861
1,4.820803,0.7813935,1.331455,-0.3128524,0.8361135,-0.02719518,0.0,0.07951475,-0.561916,-0.3694988,...,2.351077,-1.765769,2.293238,1.696391,-1.012566,0.9890892,-2.893168,0.2636397,2.143147,0.625496
2,0.1881412,0.2217609,0.3621881,-0.7976074,1.48839,2.148146,0.0,-0.05129704,0.08477846,0.119862,...,1.168121,0.07925038,1.369314,0.6594701,0.04205511,2.126176,3.129789,-2.411566,0.9859682,1.693667
3,1.824207,-0.07666559,-0.7255799,4.432983,0.2145143,-2.138818,0.0,0.8184765,-1.870737,0.7210314,...,-1.760172,-0.0746934,0.6552507,-1.012079,-0.8616765,2.326294,-4.080121,-1.180767,1.181224,-3.749293
4,0.3169813,-1.313266,-0.4128861,0.1716095,-0.607213,-2.546069,0.0,0.5302199,-1.429823,-0.8225616,...,-2.872359,0.00841154,0.9824917,-2.717338,-0.2328813,-1.803518,-2.647364,1.4447,-0.1618167,-0.5702842
5,0.9075044,2.463664,-1.981128,2.963786,-0.5532205,-1.190521,0.0,-1.223466,-1.035882,0.769599,...,0.1695506,0.4205396,-3.166186,-0.934977,-2.065366,0.6930676,-3.183889,-0.1625003,-1.008531,-3.20643
6,-0.199527,-1.019715,-0.5705727,0.8968777,-1.585134,-2.831503,0.0,0.2413656,-1.430415,-0.07258405,...,-2.852549,0.4327424,-0.5594806,-2.300282,-0.1326396,-2.508202,-2.28252,1.19825,-0.9743297,-1.67178
7,1.300037,0.9583952,-0.5578135,-0.2878669,-0.3008908,0.3408851,0.0,-0.8016873,0.05078475,-0.0864321,...,1.634693,-1.230617,-0.4377003,0.7890085,-1.015036,0.462372,0.569093,0.08327873,0.6160823,0.06939777
8,456804500000000.0,4421063000000000.0,2299124000000000.0,5126988000000000.0,2513066000000000.0,9646776000000000.0,0.0,869220800000000.0,2921477000000000.0,480290300000000.0,...,9501145000000000.0,241255100000000.0,5552026000000000.0,7152998000000000.0,2153946000000000.0,6766306000000000.0,1.161856e+16,1859322000000000.0,1366987000000000.0,7748191000000000.0


In [38]:
# increase dimension of transposed dataframe by adding on column called chi_squared
vars_with_chi_squared = vars + ["chi_squared"]

# transpose matrix 
MADout = np.transpose(MADout).copy()
MADout_df = pd.DataFrame(MADout, columns=vars_with_chi_squared)
# should now have 9 columns and the last one should be chi_squared
MADout_df.head(10)

Unnamed: 0,SpeciesCount,TreeCount,J,AvgTreeHeight,AvgDbh,AvgCrownDiameter,AvgCrownHeight,AvgCrownArea,chi_squared
0,-6.853842,4.820803,0.188141,1.824207,0.316981,0.907504,-0.199527,1.300037,456804500000000.0
1,-3.519086,0.781394,0.221761,-0.076666,-1.313266,2.463664,-1.019715,0.958395,4421063000000000.0
2,-0.097056,1.331455,0.362188,-0.72558,-0.412886,-1.981128,-0.570573,-0.557814,2299124000000000.0
3,4.895934,-0.312852,-0.797607,4.432983,0.17161,2.963786,0.896878,-0.287867,5126988000000000.0
4,-2.041761,0.836114,1.48839,0.214514,-0.607213,-0.55322,-1.585134,-0.300891,2513066000000000.0
5,-2.196718,-0.027195,2.148146,-2.138818,-2.546069,-1.190521,-2.831503,0.340885,9646776000000000.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,1.757639,0.079515,-0.051297,0.818477,0.53022,-1.223466,0.241366,-0.801687,869220800000000.0
8,-2.28939,-0.561916,0.084778,-1.870737,-1.429823,-1.035882,-1.430415,0.050785,2921477000000000.0
9,0.679292,-0.369499,0.119862,0.721031,-0.822562,0.769599,-0.072584,-0.086432,480290300000000.0


In [39]:
# add coordinates
MADout_df["X"] = infys_df_c1["X"]
MADout_df["Y"] = infys_df_c1["Y"]
MADout_df.head(10)

Unnamed: 0,SpeciesCount,TreeCount,J,AvgTreeHeight,AvgDbh,AvgCrownDiameter,AvgCrownHeight,AvgCrownArea,chi_squared,X,Y
0,-6.853842,4.820803,0.188141,1.824207,0.316981,0.907504,-0.199527,1.300037,456804500000000.0,-92.433389,15.588935
1,-3.519086,0.781394,0.221761,-0.076666,-1.313266,2.463664,-1.019715,0.958395,4421063000000000.0,-99.546778,26.072916
2,-0.097056,1.331455,0.362188,-0.72558,-0.412886,-1.981128,-0.570573,-0.557814,2299124000000000.0,-104.218713,20.403546
3,4.895934,-0.312852,-0.797607,4.432983,0.17161,2.963786,0.896878,-0.287867,5126988000000000.0,-108.957083,27.390907
4,-2.041761,0.836114,1.48839,0.214514,-0.607213,-0.55322,-1.585134,-0.300891,2513066000000000.0,-98.775463,20.791278
5,-2.196718,-0.027195,2.148146,-2.138818,-2.546069,-1.190521,-2.831503,0.340885,9646776000000000.0,-103.579314,23.316
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-101.453185,26.362889
7,1.757639,0.079515,-0.051297,0.818477,0.53022,-1.223466,0.241366,-0.801687,869220800000000.0,-108.212148,28.779231
8,-2.28939,-0.561916,0.084778,-1.870737,-1.429823,-1.035882,-1.430415,0.050785,2921477000000000.0,-103.285564,23.454722
9,0.679292,-0.369499,0.119862,0.721031,-0.822562,0.769599,-0.072584,-0.086432,480290300000000.0,-98.962972,16.72387


In [40]:
# create .csv
MADout_df.to_csv('iMAD_results_13_TreesOnly.csv', index=False)

In [None]:
#### ENDE ####

In [54]:
## Beta

MADout_df["Y"] = infys_df_c1["std_chg"]
np.std(madout(lstone))
madout(lastone <2*np.std)=2


# Add chi squared variable to table

CHIsqr_df = pd.DataFrame(chisqr)


chisqr = np.transpose(chisqr).copy()
CHI_df = pd.DataFrame(chisqr)

MADout_df['CHI'] = CHI_df



# Assuming chisqr is a 1D numpy array with length equal to the number of rows in MADout
# We ensure chisqr is reshaped as a 2D column vector if it isn't already
chisqr_column = chisqr.reshape(-1, 1)

# Append chisqr as a new column to MADout
MADout = np.concatenate((MADout, chisqr_column), axis=1)




MADout_df.head()



SyntaxError: cannot assign to function call here. Maybe you meant '==' instead of '='? (54363561.py, line 5)