In [122]:
# from scripts.parallel_phoreal import *

dirpath = '../data_store/data/sodankyla_full/'

all_ATL03, all_ATL08 = track_pairs(dirpath)
N = len(all_ATL03)

coords = (26.634154, 67.361833)

for i in range(9,10):
    data = pvpg_parallel(all_ATL03[i], all_ATL08[i], coords=coords, width=.1,height=.1,\
                         graph_detail=1, loss='arctan',file_index=i, keep_flagged=1, opsys='bad',f_scale=.1, altitude=185, small_box=0.01)
    print(data)
    # print(flatten_structure(data))

          asr  atlas_pa  beam_azimuth  beam_coelev  brightness_flag  \
169  0.105831  0.007716     -2.607923     1.563080                0   
170  0.105831  0.007716     -2.608101     1.563080                0   
175  0.120534  0.007715     -2.607939     1.563081                0   
176  0.120534  0.007715     -2.608076     1.563081                0   
189  0.094151  0.007714     -2.608035     1.563083                0   
..        ...       ...           ...          ...              ...   
386  0.117708  0.007707     -2.609245     1.563089                0   
387  0.117708  0.007708     -2.609120     1.563089                0   
388  0.114122  0.007708     -2.609078     1.563089                0   
389  0.114122  0.007708     -2.609240     1.563088                0   
390  0.114122  0.007709     -2.609368     1.563088                0   

     can_noise  canopy_h_metrics_0  canopy_h_metrics_1  canopy_h_metrics_2  \
169   1.070877            1.391190            1.775497            1.9

In [121]:
def flatten_structure(structure):
    flat_list = []
    if isinstance(structure, (list, tuple, np.ndarray)):
        for item in structure:
            flat_list.extend(flatten_structure(item))
    else:
        flat_list.append(structure)
    return flat_list

def pvpg_parallel(atl03path, atl08path, coords, width=.1, height=.1, f_scale = .1, loss = 'arctan', init = -.6, lb = -np.inf, ub = 0,\
    file_index = None, model = parallel_model, res = parallel_residuals, odr = parallel_odr, zeros=None,\
    beam = None, y_init = np.max, graph_detail = 0, canopy_frac = None, terrain_frac = None, keep_flagged=True, opsys='bad', altitude=None,
                 alt_thresh=200, threshold = 2, small_box = 0.01):
    """
    Parallel regression of all tracks on a given overpass.

    atl03path - Path/to/ATL03/file
    atl08path - Path/to/matching/ATL08/file
    f_scale - Parameter in least_squares() function when loss is nonlinear, indiciating the value of the soft margin between inlier and outlier residuals.
    loss - string for loss parameter in least_squares().
    init - initial slope guess for the parallel slope parameter
    lb - Lower bound of allowed value for the slope of the regression, default -100
    ub - Upper bound of allowed value for the slope of the regression, default -1/100
    file_index - Index of file if cycling through an array of filenames, displayed in figure titles for a given file. Allows us to easily pick out strange cases for investigation.
    model - model function to be used in least squares. Default is the parallel model function
    res - Default holds the ODR residuals function to be used in least_squares(). Can hold adjusted residual functions as well.
    odr - function that performs the orthogonal regression. Replace with great care if you do.
    zeros - Default is None. If changed, this will keep all the canopy height = 0 and Ev = 0 outliers in the data.
    beam - Default is None. Put in input in the form of an array of integers. For example, if you only want to display pv/pg on the plot for Beams 3 and 4, the input is [3,4]
    y_init - This is the function used to initialize the guess for the y intercept. Default is simply the maximum value, as this is expected to correspond with the data point closest to the y-intercept.
    graph_detail - Default is 0. If set to 1, will show a single pv/pg plot for all chosen, available beams. If set to 2, will also show each available groundtrack.
    canopy_frac - Default is None. If changed, this will say in the title of the groundtrack what percentage of the data has canopy photon data. Low canopy fraction could indicate poor quality data. This is only displayed if Detail = 2.
    keep_flagged - Default is True. If None, we throw out tracks that have segments with zero photon returns.
    """
    
    polygon = make_box(coords, width,height)
    min_lon, min_lat, max_lon, max_lat = polygon.total_bounds

    lats = np.arange(min_lat, max_lat, small_box)
    lons = np.arange(min_lon, max_lon, small_box/np.cos(np.radians(coords[1])))
    # print(lats, lons)
    
    # This will hold all of the data in one place:
    # [[Eg, Ev, Beam 1],...[Eg,Ev,Beam 1],[Eg,Ev,Beam 2],...,[Eg,Ev,Beam6],[Eg,Ev,Beam 6]]
    # This will be made into a dataframe later.
    meanEgstrong = []
    meanEgweak = []
    meanEvstrong = []
    meanEvweak = []

    msw_flag = []
    night_flag = []
    asr = []
    
    dataset = []
    
    # Holds all of the X data to plot later.
    plotX = []
    
    # Holds all of the Y data to plot later.
    plotY = []
    
    # Holds all of the ATL03 objects to plot groundtracks later
    atl03s = []

    # To find the starting slope guess
    slope_init = []
    slope_weight = []

    data_amount = 0
    
    # Check the satellite orientation so we know which beams are strong and weak.
    # Listed from Beam 1 to Beam 6 in the tracks array
    A = h5py.File(atl03path, 'r')
    if list(A['orbit_info']['sc_orient'])[0] == 1:
    	strong = ['gt1r', 'gt2r', 'gt3r']
    	weak = ['gt1l', 'gt2l', 'gt3l']
    elif list(A['orbit_info']['sc_orient'])[0] == 0:
        strong = ['gt3l', 'gt2l', 'gt1l']
        weak = ['gt3r', 'gt2r', 'gt1r']
    else:
        print('Satellite in transition orientation.')
        A.close()
        return 0, 0, 0, 0, 0, 0
    tracks = [strong[0], weak[0], strong[1], weak[1], strong[2], weak[2]]
    
    # The only purpose of this is to keep the data organised later.
    beam_names = [f"Beam {i}" for i in range(1,7)]
    
    # Very quick quality check; if any of the segments have zero return photons at all,
    # the file is just skipped on assumptions that the data quality isn't good
    if keep_flagged == None:
        for gt in tracks:
            try:
                if 0 in A[gt]['geolocation']['ph_index_beg']:
                    print('File ' + str(file_index) + ' has been skipped because some segments contain zero photon returns.')
                    A.close()
                    return 0, 0, 0, 0, 0, 0
                # This block will be executed if 0 is found in the list
            except (KeyError, FileNotFoundError):
            # Handle the exception (e.g., print a message or log the error)
                continue

    A.close()

    #Keep indices of colors to plot regression lines later:
    colors = []
    
    # Extracting date and time from the filename
    title_date = datetime_to_title(parse_filename_datetime(atl03path))
    
    # Holds the maximum of the successfully read Ev values to use as y-intercept
    # guesses in the regression
    intercepts = []
    maxes = []
    
    # If the user wants to know the fraction of segments that have canopy photons,
    # then we need an array to save it
    if (canopy_frac != None) & (terrain_frac != None):
        canopy_frac = []
        terrain_frac = []
    elif canopy_frac != None:
        canopy_frac = []
    elif terrain_frac != None:
        terrain_frac = []
    
    # Now that we have assurances that the data is good quality,
    # we loop through the ground tracks
    for i, gt in enumerate(tracks):
        
        # If the object fails to be created, we put worthless information into
        # plotX, plotY, and canopy_frac to save us looping effort later
        try:
#             print(atl03path, gt, atl08path)
            atl03 = get_atl03_struct(atl03path, gt, atl08path)
        except (KeyError, ValueError, OSError) as e:
            plotX.append([])
            plotY.append([])
            # msw_flag = np.concatenate((msw_flag,-1))
            # night_flag = np.concatenate((night_flag,-1))
            # asr = np.concatenate((asr,-1))
            msw_flag.append(-1)
            night_flag.append(-1)
            asr.append(-1)
            if canopy_frac != None:
                canopy_frac.append(-1)
            if terrain_frac != None:
                terrain_frac.append(-1)
            print(f'Beam {i + 1} in file {file_index} has insufficient data.')
            if i % 2 == 0:
                meanEgstrong.append(-1)
                meanEvstrong.append(-1)
            else:
                meanEgweak.append(-1)
                meanEvweak.append(-1)
            continue
            
        try:
            atl08 = get_atl08_struct(atl08path, gt)
        except (KeyError, ValueError, OSError) as e:
            msw_flag.append(-1)
            night_flag.append(-1)
            asr.append(-1)
            if canopy_frac != None:
                canopy_frac.append(-1)
            if terrain_frac != None:
                terrain_frac.append(-1)
            if i % 2 == 0:
                meanEgstrong.append(-1)
                meanEvstrong.append(-1)
            else:
                meanEgweak.append(-1)
                meanEvweak.append(-1)
            print(f"Failed to open ATL08 file for file {file_index}'s beam {i+1}.")
            continue
        
        atl03.df = atl03.df[(atl03.df['lon_ph'] >= min_lon) & (atl03.df['lon_ph'] <= max_lon) &\
                                (atl03.df['lat_ph'] >= min_lat) & (atl03.df['lat_ph'] <= max_lat)]
        atl08.df = atl08.df[(atl08.df['longitude'] >= min_lon) & (atl08.df['longitude'] <= max_lon) &\
                                (atl08.df['latitude'] >= min_lat) & (atl08.df['latitude'] <= max_lat)]
        
        atl08.df = atl08.df[(atl08.df.photon_rate_can_nr < 100) & (atl08.df.photon_rate_te < 100) & (atl08.df.h_canopy < 100)]
        

        # NEW BIT FOR LAND COVER CLASSIFICATION ##############################################################################
        # print(atl08.df['landcover'])
        atl08.df = atl08.df[atl08.df['segment_landcover'].isin([111, 112, 113, 114, 115, 116, 121, 122, 123, 124, 125, 126])]
        if altitude != None:
            atl08.df = atl08.df[abs(atl08.df['h_te_best_fit'] - altitude) <= alt_thresh]
        # print(atl08.df['landcover'])
        for lat in lats:
            for lon in lons:
                polygon = make_box((lon,lat), small_box/2,small_box/2)
                sub_min_lon, sub_min_lat, sub_max_lon, sub_max_lat = polygon.total_bounds
                atl03_temp = atl03.df[(atl03.df['lon_ph'] >= sub_min_lon) & (atl03.df['lon_ph'] <= sub_max_lon) &\
                                        (atl03.df['lat_ph'] >= sub_min_lat) & (atl03.df['lat_ph'] <= sub_max_lat)].copy()
                atl08_temp = atl08.df[(atl08.df['longitude'] >= sub_min_lon) & (atl08.df['longitude'] <= sub_max_lon) &\
                                        (atl08.df['latitude'] >= sub_min_lat) & (atl08.df['latitude'] <= sub_max_lat)].copy()
        # lat = lats[3]
        # lon = lons[1]
        polygon = make_box((lon,lat), small_box/2,small_box/2)
        sub_lon, sub_lat, sub_lon, sub_lat = polygon.total_bounds
        atl03_temp = atl03.df[(atl03.df['lon_ph'] >= sub_lon) & (atl03.df['lon_ph'] <= sub_lon) &\
                                (atl03.df['lat_ph'] >= sub_lat) & (atl03.df['lat_ph'] <= sub_lat)].copy()
        atl08_temp = atl08.df[(atl08.df['longitude'] >= sub_lon) & (atl08.df['longitude'] <= sub_lon) &\
                                (atl08.df['latitude'] >= sub_lat) & (atl08.df['latitude'] <= sub_lat)].copy()
        # for lat in lats:
        #     for lon in lons:
        #         polygon = make_box((lon,lat), small_box/2,small_box/2)
        #         min_lon, min_lat, max_lon, max_lat = polygon.total_bounds
        #         atl03_temp = atl03.df[(atl03.df['lon_ph'] >= min_lon) & (atl03.df['lon_ph'] <= max_lon) &\
        #                                 (atl03.df['lat_ph'] >= min_lat) & (atl03.df['lat_ph'] <= max_lat)]
        #         atl08_temp = atl08.df[(atl08.df['longitude'] >= min_lon) & (atl08.df['longitude'] <= max_lon) &\
        #                                 (atl08.df['latitude'] >= min_lat) & (atl08.df['latitude'] <= max_lat)]

        #         if atl08_temp.size == 0:
        #             continue
        # atl08_temp = atl08.df
        # atl03_temp = atl03.df
        
        # Retrieve the canopy fraction (fraction of segments that contain any
        # canopy photons) if the user wants it.
        if canopy_frac != None:
            canopy_frac.append(np.mean([atl08_temp[atl08_temp['subset_can_flag_0'] >= 0]['subset_can_flag_0'].mean(),\
                   atl08_temp[atl08_temp['subset_can_flag_1'] >= 0]['subset_can_flag_1'].mean(),\
                   atl08_temp[atl08_temp['subset_can_flag_2'] >= 0]['subset_can_flag_2'].mean(),\
                   atl08_temp[atl08_temp['subset_can_flag_3'] >= 0]['subset_can_flag_3'].mean(),\
                   atl08_temp[atl08_temp['subset_can_flag_4'] >= 0]['subset_can_flag_4'].mean()]))
        if terrain_frac != None:
            terrain_frac.append(np.mean([atl08.df[atl08.df['subset_te_flag_0'] >= 0]['subset_te_flag_0'].mean(),\
                   atl08_temp[atl08_temp['subset_te_flag_1'] >= 0]['subset_te_flag_1'].mean(),\
                   atl08_temp[atl08_temp['subset_te_flag_2'] >= 0]['subset_te_flag_2'].mean(),\
                   atl08_temp[atl08_temp['subset_te_flag_3'] >= 0]['subset_te_flag_3'].mean(),\
                   atl08_temp[atl08_temp['subset_te_flag_4'] >= 0]['subset_te_flag_4'].mean()]))
        
        # X and Y are data for the regression
        X = atl08_temp.photon_rate_te
        Y = atl08_temp.photon_rate_can_nr
        
        # Save it for plotting after the loop goes through all the groundtracks
        plotX.append(X)
        plotY.append(Y)
        
#         if atl03.df.size != 0:
#             # Save the ATL03 object
#             atl03s.append(atl03)
#             colors.append(i)
            
        
        if len(Y) < threshold:
            print(f'Beam {i + 1} in file {file_index} has insufficient data.')
            X = atl08_temp.photon_rate_te[atl08_temp.photon_rate_te.isin([-1])]
            Y = atl08_temp.photon_rate_can_nr[atl08_temp.photon_rate_can_nr.isin([-1])]
            msw_flag.append(-1)
            night_flag.append(-1)
            asr.append(-1)
            if i % 2 == 0:
                meanEgstrong.append(-1)
                meanEvstrong.append(-1)
            else:
                meanEgweak.append(-1)
                meanEvweak.append(-1)
            continue
        else:
            data_amount += len(Y)
            atl03s.append(atl03)
            colors.append(i)

        if i % 2 == 0:
            meanEgstrong.append(np.mean(X))
            meanEvstrong.append(np.mean(Y))
        else:
            meanEgweak.append(np.mean(X))
            meanEvweak.append(np.mean(Y))

        msw_flag.append(atl08_temp['msw_flag'].mean())
        night_flag.append(atl08_temp['night_flag'].mean())
        asr.append(atl08_temp['asr'].mean())
            
        # Save each individual data point from the ground track along with the Beam it belongs to.
        for x, y in zip(X,Y):
            dataset.append([x, y, beam_names[i]])
        
        # We append the colour we need for the plotting later.
        # Useful when the function is run many times to have many plots
        # and we want the colours to be consistent
#         colors.append(i)

        # tweaking starting parameters
        ############################################################
        lower_X, lower_Y, upper_X, upper_Y = divide_arrays_2(X, Y)
        
        y1 = np.mean(lower_Y)
        y2 = np.mean(upper_Y)

        x1 = np.mean(lower_X)
        x2 = np.mean(upper_X)

        slope, intercept = find_slope_and_intercept(x1, y1, x2, y2)
        # print(slope)
        if slope > -0.1:
            slope = -0.1
            intercept = intercept_from_slope_and_point(slope, (np.mean([x1,x2]),np.mean([y1,y2])))
        elif slope < -1.5:
            slope = -1.5
            intercept = intercept_from_slope_and_point(slope, (np.mean([x1,x2]),np.mean([y1,y2])))

        slope_init.append(slope)
        slope_weight.append(len(Y))
        # Save the initial y_intercept guess
        intercepts.append(intercept)
        maxes.append(16)
        #############################################################

    slope_weight /= np.sum([slope_weight])
    slope_init = np.dot(slope_init,slope_weight)

    #########################
    # slope_init = -1

    # Create DataFrame
    df = pd.DataFrame(dataset, columns=['Eg', 'Ev', 'gt'])

    # Dummy encode the categorical variable
    df_encoded = pd.get_dummies(df, columns=['gt'], prefix='', prefix_sep='')

    if df_encoded.shape[0] == 0:
        print(f'No beams have data in file {file_index}, cannot regress.')
        return [-1, -1, -1, -1, -1, -1, -1], [coords[0],coords[1]], [[-1,-1,-1],[-1,-1,-1],[-1,-1,-1],[-1,-1,-1]],\
                                [-1,-1,-1,-1,-1,-1], [-1,-1,-1,-1,-1,-1], [-1,-1,-1,-1,-1,-1], 0
    # Retrieve optimal coefficients [slope, y_intercept_dataset_1, y_intercept_dataset_2, etc.]
    
    coefs = odr(df_encoded, intercepts = intercepts, maxes = maxes, init = slope_init, lb=lb, ub=ub, model = model, res = res, loss=loss, f_scale=f_scale)
    
    
    if len(colors) == 0:
        graph_detail = 0
        
    if graph_detail == 3:
        plot_parallel(atl03s = atl03s,
                      coefs = coefs,
                      colors = colors,
                      title_date = title_date,
                      X = plotX,
                      Y = plotY,
                      beam = beam,
                      canopy_frac = canopy_frac,
                      terrain_frac = terrain_frac,
                      file_index = file_index,
                      three = True)

    # Activate this if you want the whole shebang
    elif graph_detail == 2:
        plot_parallel(atl03s = atl03s,
                      coefs = coefs,
                      colors = colors,
                      title_date = title_date,
                      X = plotX,
                      Y = plotY,
                      beam = beam,
                      canopy_frac = canopy_frac,
                      terrain_frac = terrain_frac,
                      file_index = file_index)
    
    # Activate this if you don't want the groundtracks, just the plot
    elif graph_detail == 1:
        plot_graph(coefs = coefs,
                   colors = colors,
                   title_date = title_date,
                   X = plotX,
                   Y = plotY,
                   beam = beam,
                   file_index = file_index)
    # Don't activate either of them if you don't want a plot

    # if coefs[0] > -0.02:
    #     print(f'pv/pg slope for file {file_index} is too shallow')
    #     return 0, 0, 0, 0, 0, 0
    # if coefs[0] > 9:
    #     print(f'pv/pg slope for file {file_index} is too steep')
    #     return 0, 0, 0, 0, 0, 0
    
    means = [meanEgstrong, meanEgweak, meanEvstrong, meanEvweak]

    indices_to_insert = [i + 1 for i, entry in enumerate(asr) if entry == -1]
    for index in indices_to_insert:
        coefs = np.insert(coefs, index, -1)
    
    return coefs, [lon,lat], means, msw_flag, night_flag, asr, data_amount

In [86]:
np.arange(10,20,.01)

array([10.  , 10.01, 10.02, 10.03, 10.04, 10.05, 10.06, 10.07, 10.08,
       10.09, 10.1 , 10.11, 10.12, 10.13, 10.14, 10.15, 10.16, 10.17,
       10.18, 10.19, 10.2 , 10.21, 10.22, 10.23, 10.24, 10.25, 10.26,
       10.27, 10.28, 10.29, 10.3 , 10.31, 10.32, 10.33, 10.34, 10.35,
       10.36, 10.37, 10.38, 10.39, 10.4 , 10.41, 10.42, 10.43, 10.44,
       10.45, 10.46, 10.47, 10.48, 10.49, 10.5 , 10.51, 10.52, 10.53,
       10.54, 10.55, 10.56, 10.57, 10.58, 10.59, 10.6 , 10.61, 10.62,
       10.63, 10.64, 10.65, 10.66, 10.67, 10.68, 10.69, 10.7 , 10.71,
       10.72, 10.73, 10.74, 10.75, 10.76, 10.77, 10.78, 10.79, 10.8 ,
       10.81, 10.82, 10.83, 10.84, 10.85, 10.86, 10.87, 10.88, 10.89,
       10.9 , 10.91, 10.92, 10.93, 10.94, 10.95, 10.96, 10.97, 10.98,
       10.99, 11.  , 11.01, 11.02, 11.03, 11.04, 11.05, 11.06, 11.07,
       11.08, 11.09, 11.1 , 11.11, 11.12, 11.13, 11.14, 11.15, 11.16,
       11.17, 11.18, 11.19, 11.2 , 11.21, 11.22, 11.23, 11.24, 11.25,
       11.26, 11.27,