In [2]:
import pandas as pd
import numpy as np

# Heights reading

In [3]:
def fix_format(df, NCOLS):
    '''
    The height document has a format error, as each row is splitted between two rows (1000 and 950 columns). 
    This function changes the format into a correct one.
    Args:
        - df: DataFrame with the wrong format ( 1 real_row = 2 df_rows )
        - NCOLS: Number of columns of the real file.
        
    Returns:
        - DataFrame with the correct format without null values.
    '''
    df = df.drop(columns=0) # Dropping first column ( Null values )
    
    even_df = df.iloc[ np.arange(0, df.shape[0], 2) ].reset_index(drop= True) # Taking even rows
    odd_df = df.iloc[ np.arange(1, df.shape[0], 2) ].reset_index(drop=True) # Taking odd rows
    
    full_df = pd.concat([even_df, odd_df], axis=1, ignore_index=True) # Concatenating both [a,b],[c,d] = [a,b,c,d]
    
    # Check all values dropped will be nan
    if full_df.iloc[:, NCOLS:].isna().sum().sum() != (full_df.shape[1] - NCOLS) * full_df.shape[0]:
        print( "Warning: dropping non-null values")
    
    full_df = full_df.drop(columns=range(NCOLS, full_df.shape[1])) # Dropping all null columns
    
    return full_df

In [4]:
def index_matrix(df, XCENT, YCENT, CELLSIZE):
    '''
    Fastly indexes the height matrix to change its format to x,y,z.
    Args:
        - df: Dataframe with the heights.
        - XCENT: X Coordenate of the bottom left corner.
        - YCENT: Y Coordenate of the bottom left corner.
        - CELLSIZE: Distance between two cells.
    
    Returns:
        - A dataframe with the rows in the format: x,y,z.        
    '''
    ncols = df.shape[1]
    nrows = df.shape[0]
    
    # Building a matrix for each other dimension (x and y) with the same format of z
    xdf = pd.DataFrame(np.tile(XCENT+np.arange( 0, ncols, 1)*CELLSIZE, [nrows,1]))
    ydf = pd.DataFrame(np.tile(YCENT + np.arange( nrows-1, -1, -1) * CELLSIZE, [ncols,1]).transpose())

    # Concatenating all 3 dimensions into a dataframe
    df = pd.concat([xdf.stack(), ydf.stack(), df.stack()], axis=1).reset_index(drop=True)
    
    return df

### Example

In [5]:
metadata = pd.read_csv('../toy dataset/met2v10as0f279110mr1r010.txt', sep=' ',skipinitialspace=True, header = None,
                       skip_blank_lines=True, nrows=6)

NCOLS, NROWS, XCENT, YCENT, CELLSIZE, NODATA_VALUE = map(int, metadata.iloc[:][1])

In [6]:
test = pd.read_csv('../toy dataset/met2v10as0f279110mr1r010.txt', sep=' ', header = None, skip_blank_lines=True, skiprows=6)
test

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,991,992,993,994,995,996,997,998,999,1000
0,,311.51,312.50,313.50,314.48,315.04,315.03,315.04,315.11,315.18,...,283.17,283.26,283.31,283.30,283.31,283.67,283.50,283.59,283.65,283.63
1,,283.61,283.71,283.76,283.83,283.87,283.87,283.92,283.98,284.02,...,,,,,,,,,,
2,,311.35,312.35,313.35,314.37,314.99,315.15,315.16,315.22,315.31,...,283.17,283.27,283.38,283.38,283.39,283.47,283.44,283.54,283.61,283.64
3,,283.66,283.71,283.75,283.85,283.86,283.86,283.96,284.00,284.05,...,,,,,,,,,,
4,,311.20,312.20,313.38,314.86,315.12,315.25,315.21,315.29,315.40,...,283.16,283.25,283.45,283.40,283.44,283.50,283.44,283.45,283.53,283.59
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2759,,398.70,398.25,397.93,397.64,397.44,397.12,396.62,395.17,393.77,...,,,,,,,,,,
2760,,505.88,505.87,505.95,506.03,506.25,506.68,506.93,507.03,506.83,...,404.44,404.02,403.58,403.08,402.28,401.65,401.25,400.67,400.10,399.55
2761,,399.12,398.62,398.11,397.86,397.68,397.38,396.58,395.24,393.80,...,,,,,,,,,,
2762,,504.81,504.88,504.92,505.06,505.52,505.97,506.40,506.63,506.52,...,405.05,404.71,403.93,403.09,402.31,401.90,401.51,400.80,400.37,400.05


In [7]:
A = fix_format(test, NCOLS)
index_matrix(A, XCENT, YCENT, CELLSIZE)

Unnamed: 0,0,1,2
0,394072,4620686,311.51
1,394074,4620686,312.50
2,394076,4620686,313.50
3,394078,4620686,314.48
4,394080,4620686,315.04
...,...,...,...
2696277,397964,4617924,325.12
2696278,397966,4617924,325.15
2696279,397968,4617924,325.06
2696280,397970,4617924,324.79


# Manual Prediction

In [None]:
def CC_manual_percentage_app2(my_block, threshold, outliers, vegetation):
    '''
    De cada punt (20m x 20m), assumir que el terra d’aquella zona està a altura constant, i per tant, per a cada punt 20 x 20, 
    l’altura de cada pixel es calcularà amb: max_pixel(z)-min_punt(z). 
    %CC = %punts de vegetacio > threshold

    Args:
        - my_block: Dataframe contenin el block de dades.
        - Threshold: Threshold from which a point is considered to be from the canopy cover.
        - outliers: Number of the point-class to be considered as an outlier.
        - vegetation: Number of the point-class to be considered as vegetation.
        
    Returns:
        - Canopy Cover percentage over 100.
    '''
    if my_block.shape[0] == 0:
        return 0
    
    # Erasing outliers
    my_block = my_block[~my_block['class'].isin(outliers)]
    
    # Rounding the coordenates to correct the measurement errors
    my_block['x'] = list(map(round, my_block['x']))
    my_block['y'] = list(map(round, my_block['y']))
    
    # Agrupem cada punt per coordenades x,y, i ens quedem amb aquelles z que siguin el màxim valor
    my_block_max = my_block.groupby(['x','y'])[['z']].max().reset_index()
    my_value_min = min(my_block['z'])   
    
    my_block_max['heigh'] = my_block_max['z']-my_value_min
    
    # Adding the classes of the points
    surface_points = pd.merge(my_block_max, my_block, how='left')
    
    # Choosing vegetation points
    vegetation_points = surface_points[ surface_points['class'].isin(vegetation) ]
    
    # Canopy Cover points
    CC_points = vegetation_points[vegetation_points['heigh'] > threshold]
    
    CC_percentage = CC_points.shape[0] / my_block_max.shape[0]
    
    return CC_percentage*100

In [None]:
def CC_manual_percentage_app3(my_block, outliers, vegetation):
    '''
    Cada punt x,y enter té un representant.
    No usar thresholds. %CC = %punts de la classe vegetació respecte tots els altres punts representants.

    Args:
        - my_block: Dataframe contenin el block de dades.
        - outliers: Number of the point-class to be considered as an outlier.
        - vegetation: Number of the point-class to be considered as vegetation.
        
    Returns:
        - Canopy Cover percentage over 100.
    '''
    if my_block.shape[0] == 0:
        return 0
    
    # Erasing outliers
    my_block = my_block[~my_block['class'].isin(outliers)]
    
    # Rounding the coordenates to correct the measurement errors
    my_block['x'] = list(map(round, my_block['x']))
    my_block['y'] = list(map(round, my_block['y']))
    
    # Agrupem cada punt per coordenades x,y. Simplement per a llevar duplicats
    my_block_max = my_block.groupby(['x','y'])[['z']].max().reset_index()   
    
    # Adding the classes of the points
    surface_points = pd.merge(my_block_max, my_block, how='left')
    
    # Choosing vegetation points
    CC_points = surface_points[ surface_points['class'].isin(vegetation) ]
    
    CC_percentage = CC_points.shape[0] / my_block_max.shape[0]
    
    return CC_percentage*100

In [None]:
def CC_manual_percentage_app4(my_block, threshold, outliers, vegetation):
    '''
    Cada punt x,y enter JA NO té un representant.
    No usar thresholds. %CC = %punts de la classe vegetació respecte tots els altres punts del punt.

    Args:
        - my_block: Dataframe contenin el block de dades.
        - outliers: Number of the point-class to be considered as an outlier.
        - vegetation: Number of the point-class to be considered as vegetation.
        
    Returns:
        - Canopy Cover percentage over 100.
    '''
    if my_block.shape[0] == 0:
        return 0
    
    # Erasing outliers
    my_block = my_block[~my_block['class'].isin(outliers)]
    
    # Choosing vegetation points
    CC_points = my_block[ my_block['class'].isin(vegetation) ]
    
    CC_percentage = CC_points.shape[0] / my_block.shape[0]
    
    return CC_percentage*100