In [13]:
# This script is used for testing all adaptations and changes made

In [37]:
import pandas as pd
import numpy as np

from scipy import linalg
from scipy import stats

In [38]:
# Load data for change detection as a pandas data frame.
infys_df =  pd.read_csv("../data/iMAD/iMAD_Data_13.csv") #need to change the dataset 
infys_df.head()

Unnamed: 0.1,Unnamed: 0,Cycle,Cluster_ID,AvgDbh,AvgCrownDiameter,AvgCrownHeight,AvgCrownArea,SpeciesCount,AvgTreeHeight,TreeCount,J,X,Y
0,1,1,77652,16.103571,3.937736,3.367925,15.409993,8,8.555357,56,0.440017,-92.433389,15.588935
1,2,1,28451,10.380952,2.209524,0.0,3.95692,2,2.909524,21,0.334649,-99.546778,26.072916
2,3,1,55242,14.996429,4.439683,4.557937,17.199388,5,5.458333,84,0.50543,-104.218713,20.403546
3,4,1,20795,11.6,3.169565,4.286957,8.243627,5,4.493548,31,0.770242,-108.957083,27.390907
4,5,1,53236,12.194444,4.886047,5.753488,20.022403,6,7.605556,54,0.841231,-98.775463,20.791278


In [39]:
# Create new ids by concatenating 'Conglomerado' and 'Sitio'.
    ## don't think I need to do that
    ## Use existing ID of Metadataset called Cluster_ID -> should be able to leave that one out


# As we want to compare cycle 1 vs cycle 2 lets separate them.   
## Stays the same for long dataformat
## might figure out how to 
infys_df_c1 = infys_df[infys_df['Cycle'] == 1]
infys_df_c2 = infys_df[infys_df['Cycle'] == 3]

In [40]:
# Find intersection of new ID variable: 'cng_sit', so obs. that are present in both cycles.
s1 = infys_df_c1['Cluster_ID']
s2 = infys_df_c2['Cluster_ID']
cluster_id_inter = pd.Series(list(set(s1).intersection(set(s2))))
## might have already done this in R by using my filter(muestreado1 == 1 & muestreado2 == 1)



In [41]:
# Filter out obs. that dont appear in cng_sit_inter.
infys_df_c1 = infys_df_c1[infys_df_c1['Cluster_ID'].isin(cluster_id_inter)]
infys_df_c2 = infys_df_c2[infys_df_c2['Cluster_ID'].isin(cluster_id_inter)]

In [42]:
# Select only variables to be utilized in change detection.
vars = ["SpeciesCount", "TreeCount", "J", "AvgTreeHeight", "AvgDbh", "AvgCrownDiameter", "AvgCrownHeight", "AvgCrownArea"]
nvars = len(vars)
infys_df_c1_c = infys_df_c1[infys_df_c1.columns.intersection(vars)]
infys_df_c2_c = infys_df_c2[infys_df_c2.columns.intersection(vars)]

c1_c_nans = infys_df_c1_c.isna().any(axis=1).to_numpy()
c2_c_nans = infys_df_c2_c.isna().any(axis=1).to_numpy()

In [43]:
# Create change data matrix, missing values mask and initial change weights.
dm = np.zeros((2 * nvars, infys_df_c1_c.shape[0]))
dm[0:nvars] = np.transpose(infys_df_c1_c.to_numpy())
dm[nvars:] = np.transpose(infys_df_c2_c.to_numpy())

nodataidx = c1_c_nans | c2_c_nans
gooddataidx = nodataidx == False
dm = dm[:, gooddataidx]
ngood = np.sum(gooddataidx)

In [44]:
# Change detection iterations.

# iteration of MAD   
wt = np.ones(int(ngood))
delta = 1.0
oldrho = np.zeros(nvars)
iter = 0
 
while (delta > 0.01) and (iter < 35): 
    print(iter)
    # Weighted covariance matrices and means.
    sumw = np.sum(wt)
    means = np.average(dm,axis=1, weights=wt)
    dmc = dm - means[:,np.newaxis]
    dmc = np.multiply(dmc,np.sqrt(wt))
    sigma = np.dot(dmc,dmc.T)/sumw
   
    s11 = sigma[0:nvars, 0:nvars]
    s22 = sigma[nvars:, nvars:]
    s12 = sigma[0:nvars, nvars:]
    s21 = sigma[nvars:, 0:nvars]
    
    # Solution of generalized eigenproblems.
    aux_1 = linalg.solve(s22, s21)
    lama, a = linalg.eig(np.dot(s12, aux_1), s11)
    aux_2 = linalg.solve(s11, s12)
    lamb, b = linalg.eig(np.dot(s21, aux_2), s22)
    
    
    # Sort a.  
    idx = np.argsort(lama)
    a = a[:, idx]
    
    # Sort b.        
    idx = np.argsort(lamb)
    b = b[:, idx]
    
    # Canonical correlations.        
    rho = np.sqrt(np.real(lamb[idx]))
    
    
    # Normalize dispersions.  
    tmp1 = np.dot(np.dot(a.T,s11), a)
    tmp2 = 1. / np.sqrt(np.diag(tmp1))
    tmp3 = np.tile(tmp2, (nvars, 1))
    a = np.multiply(a, tmp3)
    b = np.mat(b)
    tmp1 = np.dot(np.dot(b.T,s22), b)
    tmp2 = 1. / np.sqrt(np.diag(tmp1))
    tmp3 = np.tile(tmp2, (nvars, 1))
    b = np.multiply(b, tmp3)
        
    # Assure positive correlation
    tmp = np.diag(np.dot(np.dot(a.T,s12), b))
    b = np.dot(b,np.diag(tmp / np.abs(tmp)))

    # Canonical and MAD variates
    U = np.dot(a.T , (dm[0:nvars, :] - means[0:nvars, np.newaxis]))    
    V = np.dot(b.T , (dm[nvars:, :] - means[nvars:, np.newaxis]))          
    MAD = U - V  
    
            
    # New weights.        
    var_mad = np.tile(np.mat(2 * (1 - rho)).T, (1, ngood))    
    chisqr = np.sum(np.multiply(MAD, MAD) / var_mad, 0)
    wt = np.squeeze(1 - np.array(stats.chi2._cdf(chisqr, nvars)))
    
        
    # Continue iteration.        
    delta = np.sum(np.abs(rho - oldrho))
    oldrho = rho
    iter += 1
    
# reshape to original image size, by including nodata pixels   
# actually reshaping to image size plus one to add chi-squared values
MADout = np.zeros((int(nvars+1), infys_df_c1_c.shape[0]))
MADout[0:nvars, gooddataidx] = MAD
MADout[nvars, gooddataidx] = chisqr

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34


In [45]:
# check whether adding chi-squared worked -> should be 9 rows (or in pyhton count: 0-8)
MADout_df = pd.DataFrame(MADout)
MADout_df.head(10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,10019,10020,10021,10022,10023,10024,10025,10026,10027,10028
0,5.437405,2.405378,1.327816,-4.552112,0.1372696,-0.9466198,0.0,-1.066795,3.098533,-1.603075,...,9.357227,-5.301276,-0.4941792,7.528185,-2.32003,-4.775046,2.076104,2.302529,4.596856,5.927396
1,0.9626554,-0.7247054,1.033827,1.05021,0.8542376,-2.132211,0.0,0.6255356,-1.267391,-0.6126498,...,-1.771581,-0.4384359,1.77459,-1.71888,-1.124911,-1.424104,-1.955404,1.036951,1.229038,0.1560425
2,-10.32079,-2.361721,-1.695157,-0.9671645,-1.540354,2.305062,0.0,0.3474515,0.8033283,1.21131,...,-6.30681,5.993173,-2.992284,-4.210023,4.674634,-0.0886629,4.311729,1.835648,-6.922709,-0.9734255
3,3.134562,1.30367,0.4536299,-1.18718,1.648971,1.381702,0.0,-0.4029347,0.6639379,-0.3188096,...,5.571334,-2.379747,1.101107,2.562464,-1.321008,1.39124,2.216319,-2.809122,2.759581,2.778459
4,3.052105,1.054188,1.368149,-3.395432,0.733953,1.529025,0.0,-0.8899245,1.338909,-0.8785407,...,5.664258,-2.464416,1.281144,2.415249,-0.954223,-0.7450945,1.812253,0.4429755,2.246209,3.800367
5,-3.241667,-1.112736,-0.6567736,2.059342,-0.09990935,-0.301823,0.0,0.5076562,-0.7793796,0.3916627,...,-3.219931,1.536313,0.07027491,-2.876604,0.3983227,0.3212171,1.09507,-0.07435969,-1.042254,-1.799952
6,0.1523143,0.2914294,0.6170532,0.2882223,-0.3698032,0.8762953,0.0,-0.2858156,0.6303132,-0.03905091,...,0.7966533,-1.074671,-0.04955892,0.7794359,-0.655539,-0.9567884,0.7020531,-1.217793,0.01694075,-0.09412747
7,-1.728942,0.6137319,0.7569257,0.6885598,-0.6084994,0.2519553,0.0,-0.3380572,0.5193926,-0.582717,...,-1.750283,-0.8792048,0.01221502,-0.1208675,-0.7985596,-1.916598,-0.4250452,-1.032355,0.07323212,0.7538249
8,19017300000000.0,2791983000000.0,5451926000000.0,3419309000000.0,2993235000000.0,4111345000000.0,0.0,1116508000000.0,3623543000000.0,2147336000000.0,...,22440930000000.0,10467340000000.0,13002490000.0,3087954000000.0,6094487000000.0,27561890000000.0,3528647000000.0,13879360000000.0,43562310000.0,3646791000000.0


In [46]:
# increase dimension of transposed dataframe by adding on column called chi_squared
vars_with_chi_squared = vars + ["chi_squared"]

# transpose matrix 
MADout = np.transpose(MADout).copy()
MADout_df = pd.DataFrame(MADout, columns=vars_with_chi_squared)
# should now have 9 columns and the last one should be chi_squared
MADout_df.head(10)

Unnamed: 0,SpeciesCount,TreeCount,J,AvgTreeHeight,AvgDbh,AvgCrownDiameter,AvgCrownHeight,AvgCrownArea,chi_squared
0,5.437405,0.962655,-10.320786,3.134562,3.052105,-3.241667,0.152314,-1.728942,19017300000000.0
1,2.405378,-0.724705,-2.361721,1.30367,1.054188,-1.112736,0.291429,0.613732,2791983000000.0
2,1.327816,1.033827,-1.695157,0.45363,1.368149,-0.656774,0.617053,0.756926,5451926000000.0
3,-4.552112,1.05021,-0.967165,-1.18718,-3.395432,2.059342,0.288222,0.68856,3419309000000.0
4,0.13727,0.854238,-1.540354,1.648971,0.733953,-0.099909,-0.369803,-0.608499,2993235000000.0
5,-0.94662,-2.132211,2.305062,1.381702,1.529025,-0.301823,0.876295,0.251955,4111345000000.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,-1.066795,0.625536,0.347451,-0.402935,-0.889924,0.507656,-0.285816,-0.338057,1116508000000.0
8,3.098533,-1.267391,0.803328,0.663938,1.338909,-0.77938,0.630313,0.519393,3623543000000.0
9,-1.603075,-0.61265,1.21131,-0.31881,-0.878541,0.391663,-0.039051,-0.582717,2147336000000.0


In [47]:
# add coordinates
MADout_df["X"] = infys_df_c1["X"]
MADout_df["Y"] = infys_df_c1["Y"]
MADout_df.head(10)

Unnamed: 0,SpeciesCount,TreeCount,J,AvgTreeHeight,AvgDbh,AvgCrownDiameter,AvgCrownHeight,AvgCrownArea,chi_squared,X,Y
0,5.437405,0.962655,-10.320786,3.134562,3.052105,-3.241667,0.152314,-1.728942,19017300000000.0,-92.433389,15.588935
1,2.405378,-0.724705,-2.361721,1.30367,1.054188,-1.112736,0.291429,0.613732,2791983000000.0,-99.546778,26.072916
2,1.327816,1.033827,-1.695157,0.45363,1.368149,-0.656774,0.617053,0.756926,5451926000000.0,-104.218713,20.403546
3,-4.552112,1.05021,-0.967165,-1.18718,-3.395432,2.059342,0.288222,0.68856,3419309000000.0,-108.957083,27.390907
4,0.13727,0.854238,-1.540354,1.648971,0.733953,-0.099909,-0.369803,-0.608499,2993235000000.0,-98.775463,20.791278
5,-0.94662,-2.132211,2.305062,1.381702,1.529025,-0.301823,0.876295,0.251955,4111345000000.0,-103.579314,23.316
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-101.453185,26.362889
7,-1.066795,0.625536,0.347451,-0.402935,-0.889924,0.507656,-0.285816,-0.338057,1116508000000.0,-108.212148,28.779231
8,3.098533,-1.267391,0.803328,0.663938,1.338909,-0.77938,0.630313,0.519393,3623543000000.0,-103.285564,23.454722
9,-1.603075,-0.61265,1.21131,-0.31881,-0.878541,0.391663,-0.039051,-0.582717,2147336000000.0,-98.962972,16.72387


In [48]:
# create .csv
MADout_df.to_csv('iMAD_results_13.csv', index=False)

In [None]:
#### ENDE ####

In [54]:
## Beta

MADout_df["Y"] = infys_df_c1["std_chg"]
np.std(madout(lstone))
madout(lastone <2*np.std)=2


# Add chi squared variable to table

CHIsqr_df = pd.DataFrame(chisqr)


chisqr = np.transpose(chisqr).copy()
CHI_df = pd.DataFrame(chisqr)

MADout_df['CHI'] = CHI_df



# Assuming chisqr is a 1D numpy array with length equal to the number of rows in MADout
# We ensure chisqr is reshaped as a 2D column vector if it isn't already
chisqr_column = chisqr.reshape(-1, 1)

# Append chisqr as a new column to MADout
MADout = np.concatenate((MADout, chisqr_column), axis=1)




MADout_df.head()



SyntaxError: cannot assign to function call here. Maybe you meant '==' instead of '='? (54363561.py, line 5)