Determine whether to use the GradientBoostingRegressor from sklearn or a Neural Network.
Evaluate which model gets better results

In [2]:
import csv
import numpy as np
import scipy
import networkx as nx
import math
import sys
import os
import subprocess
import scipy.stats
import scipy.optimize
import operator
from sklearn.manifold import TSNE
from sklearn.ensemble import GradientBoostingRegressor
import matplotlib.pyplot as plt

sys.path.append("../Python_code") # go to parent dir
from canvas_vis import * 
from analytics_combined import *
from generate_proj_to_remove import *
from project_data_analysis import *
from user_embedding import *
from segmentation import *
from evaluation import *
from nonlinear_regressor import *
import pickle
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor

In [3]:
#Parameters
projects_to_remove = get_list_of_removed_proj(output_filename = "../data/proj_to_remove.txt")

input_file= "../data/sorted_tile_placements_proj.csv"

#Area of the canvas considered (0-1002,0-1002 is the full canvas)
min_x = 450
max_x = 550
min_y = 450
max_y = 550

#distance threshold in update graphs
#updates within dist_threshold positions from each other
#that co-exist at any point in time will be connected
dist_threshold = 1

#Multithreading
num_threads = 3

#Kappa parameter for the region segmentation of the
#update graph
KAPPA_updates = 1.

#Kappa parameter for the segmentation of the
#region graph
KAPPA_region = .55

#User signed embedding parameters
ndim=80
feature_threshold=5
total_samples=300
n_negatives=5
n_iterations=10

In [4]:
def create_regions(iterations,
                    modeltype,
                    num_threads,
                    input_file,
                    projects_to_remove,
                    dist_threshold, 
                    ndim, 
                    feature_threshold, 
                    total_samples, 
                    n_negatives, 
                    n_iterations,
                    min_x = 0, 
                    max_x = 1002, 
                    min_y = 0, 
                    max_y = 1002,
                    excluded_folds = [],
                    use_scalar = True,
                    delete_pkl_files = False
                    ):
    '''
       Take all of the updates within input_file and cluster them into regions
       PARAMS:
            - modeltype indicated which type of model will be used to cluster the
            updates.
            modeltype = 'gboost' will use a GradientBoostingRegressor
            modeltype = 'nn' will use a neural network
            This will only affect the first iteration. All subsequent iterations will use GradientBoostingRegressor

            - excluded_folds = list of values ranging between 0 to 9 inclusive
            The folds indicted in this list will not be used to train models or generate regions

            - use_scalar will use a StandardScalar to scale the data that is being used to train the first iteration model

            - delete_pkl_files: boolean
                If true, then all the saved pickle files will be deleted and regenerated
                else the pickles will be loaded whenever possible



        NOTE: This function saves several pkl files in order to avoid
        recomputation in case of failure.
        However, this also means that these pkl files must be manually deleted
        if you want to regenerate the data structures.
    '''

    # Verify that the user selected a valid modeltype
    valid_modeltypes = set(['nn', 'gboost'])
    if not modeltype in valid_modeltypes:
        return None

    graph_filename = 'graph.pkl'
    ups_filename = 'ups.pkl'
    features_filename = 'features.pkl'
    model0_filename = 'model0.pkl'
    regions_filename = 'up_regions.pkl'
    scalar_filenameA = 'std_scaler_A.pkl'
    scalar_filenameb = 'std_scaler_b.pkl'

    if delete_pkl_files:
        if os.path.exists(graph_filename):
            os.remove(graph_filename)
        if os.path.exists(ups_filename):
            os.remove(ups_filename)
        if os.path.exists(features_filename):
            os.remove(features_filename)
        if os.path.exists(model0_filename):
            os.remove(model0_filename)
        if os.path.exists(regions_filename):
            os.remove(regions_filename)

        if os.path.exists(scalar_filenameA):
            os.remove(scalar_filenameA)
        if os.path.exists(scalar_filenameb):
            os.remove(scalar_filenameb)

    # Create a graph of the updates where each update is one node
    # First check if this graph has already been created. If so, load the pickle
    # Else create it and save to a pickle
    G_ups = None
    ups = None
    if not (os.path.exists(graph_filename) and os.path.exists(ups_filename)):
        t = time.time()
        G_ups, ups = create_graph(input_file, projects_to_remove, dist_threshold, min_x, max_x, min_y, max_y, excluded_folds = excluded_folds)
        
        print("num edges = ", G_ups.n_edges)

        pfile = open(graph_filename, 'wb')
        pickle.dump(G_ups, pfile)
        pfile.close()

        pfile = open(ups_filename, 'wb')
        pickle.dump(ups, pfile)
        pfile.close()

        print("time to create G_ups= ", time.time()-t, " seconds")
    else:
        pfile = open(graph_filename, 'rb')
        G_ups = pickle.load(pfile)
        pfile.close()

        pfile = open(ups_filename, 'rb')
        ups = pickle.load(pfile)
        pfile.close()

    updates_proj = compute_updates_per_project(ups, False)
    
    fold_boundaries = None
    if len(excluded_folds) > 0:
        folds = create_folds(num_folds = 10, min_x = min_x, min_y = min_y, max_x = max_x, max_y = max_y)

        fold_boundaries = []
        # List of dictionaries containing min_x, max_x, min_y, max_y for each fold
        for fold in folds:
            fold_boundaries.append(get_fold_border(fold))
        


    # Create features for the updates graph
    # First check if this features dictionary has already been created. If so, load the pickle
    # Else create it and save to a pickle
    features = None
    if not os.path.exists(features_filename):
        features = create_features(G_ups, ups, ndim, feature_threshold,
                                   total_samples, n_negatives, n_iterations, features_filename)
    else:
        pfile = open(features_filename, 'rb')
        features = pickle.load(pfile)
        pfile.close()  
    
    model = None
    if not os.path.exists(model0_filename):
        #Creating feature matrix A and vector of labels b
        #for learning an edge weight model
        t = time.time()

        A,b = build_feat_label_data(G_ups, ups, features, fold_boundaries = fold_boundaries, excluded_folds = excluded_folds)
        
        print("A before scaling")
        print(A)

        if use_scalar:
            scaler_A = StandardScaler()
            scaler_b = StandardScaler()

            scaler_A.fit(A)
            b = np.matrix(b).T
            scaler_b.fit(b)
            A = scaler_A.transform(A)
            b = (scaler_b.transform(b)).T[0]

            if os.path.exists(scalar_filenameA):
                os.remove(scalar_filenameA)

            pickle.dump(scaler_A, open(scalar_filenameA, 'wb'))

            if os.path.exists(scalar_filenameb):
                os.remove(scalar_filenameb)
            
            pickle.dump(scaler_b, open(scalar_filenameb, 'wb'))



        print(A.shape)
        print("A after scaling")
        print(A)
        print(b.sum() / b.shape[0])

        print("time to build feat label data = ", time.time()-t, " seconds")

        t = time.time()
        if modeltype == 'gboost':
            model = GradientBoostingRegressor(random_state=1, n_estimators=25).fit(A, b)
        else:
            # model = createNonlinearRegressionNeuralNet(A, b, train_proportion = 0.8, first_nodes=64, second_nodes=32, dropout = 0)
            model = MLPRegressor(max_iter=100, verbose=True, early_stopping=True).fit(A, b)
        pfile = open(model0_filename, 'wb')
        pickle.dump(model, pfile)
        pfile.close()

        print("time to fit model= ", time.time()-t, " seconds")
    else:
        pfile = open(model0_filename, 'rb')
        model = pickle.load(pfile)
        pfile.close()
                
    regions = None
    sizes = None
    int_weights = None
    if not os.path.exists(regions_filename):
        #Computing edge weights using multithreading
        #and sorting the edges in increasing order.
        #Not every edge in the the unique_edges file
        #will be included in the new file.

        t = time.time()
        if use_scalar:
            compute_edge_weights_multithread(G_ups, ups, model, features, features_filename, num_threads, scalar_filenameA, scalar_filenameb)
        else:
            compute_edge_weights_multithread(G_ups, ups, model, features, features_filename, num_threads)
        G_ups.sort_edges()

        print("time to compute edge weights= ", time.time()-t, " seconds")

        #Performing region segmentation on the update graph

        t = time.time()

        # comp_assign, int_weights = region_segmentation(G_ups, ups, 0.8)
        # regions, sizes, int_weights = extract_regions(comp_assign, int_weights)
        regions, sizes, int_weights = superv_reg_segm_ups(G_ups, ups, 0., 2., updates_proj, recall)

        pfile = open(regions_filename, 'wb')
        pickle.dump([regions, sizes, int_weights], pfile)
        pfile.close()

        print("num regions = ", len(regions), " max size region = ", np.max(sizes))

        print("time to create regions= ", time.time()-t, " seconds")
    else:
        pfile = open(regions_filename, 'rb')
        ups_region_info = pickle.load(pfile)
        pfile.close()

        regions = ups_region_info[0]
        sizes = ups_region_info[1]
        int_weights = ups_region_info[2]
     
    durations = compute_update_durations(ups)
    for i in range(1, iterations):
        reg_graph_filename = "reg_graph" + str(i) + ".pkl"
        reg_features_filename = "features_regions" + str(i) + ".pkl"
        reg_model_filename = 'model_regions' + str(i) + '.pkl'
        super_regions_filename = 'super_regions' + str(i) + '.pkl'

        if delete_pkl_files:
            if os.path.exists(reg_graph_filename):
                os.remove(reg_graph_filename)
            if os.path.exists(reg_features_filename):
                os.remove(reg_features_filename)
            if os.path.exists(reg_model_filename):
                os.remove(reg_model_filename)
            if os.path.exists(super_regions_filename):
                os.remove(super_regions_filename)

        G_reg = None

        if not os.path.exists(reg_graph_filename):
            t = time.time()

            G_reg = build_region_graph(G_ups, regions, ups, .5, projects_to_remove)

            pfile = open(reg_graph_filename, 'wb')
            pickle.dump(G_reg, pfile)
            pfile.close()

            print("time to create reg_graph",i,"= ", time.time()-t, " seconds")
        else:
            pfile = open(reg_graph_filename, 'rb')
            G_reg = pickle.load(pfile)
            pfile.close()

        region_features = None
        if not os.path.exists(reg_features_filename):
            region_features = create_superfeatures(
                regions, int_weights, ups, features, durations, reg_features_filename)
            
        else:
            #Reading existing feature data
            pfile = open(reg_features_filename, 'rb')
            region_features = pickle.load(pfile)
            pfile.close()
        
        model = None

        if not os.path.exists(reg_model_filename):
            t = time.time()

            A, b = build_feat_label_regions(G_reg, ups, region_features)

            print(A.shape)
            print(b.sum() / b.shape[0])

            print("time to build feat label data",i,"= ", time.time()-t, " seconds")

            #Learning region edge weight model using gradient boosting regression
            #and saving model.

            t = time.time()

            model = GradientBoostingRegressor(random_state=1, n_estimators=25).fit(A, b)

            pfile = open(reg_model_filename, 'wb')
            pickle.dump(model, pfile)
            pfile.close()

            print("time to fit model", i, "= ", time.time()-t, " seconds")
        else:
            #Reading an existing region edge weight model

            pfile = open(reg_model_filename, 'rb')
            model = pickle.load(pfile)
            pfile.close()

        
        #Computing region edge weights using multithreading
        #and sorting the edges in increasing order.
        super_region_sizes = None
        
        if not os.path.exists(super_regions_filename):
            t = time.time()

            compute_edge_weights_multithread(G_reg, ups, model, region_features, reg_features_filename, num_threads)
            G_reg.sort_edges()

            print("time compute edge weights",i,"= ", time.time()-t, " seconds")

            #Performing segmentation on the region graph
            t = time.time()

            # comp_assign_reg, int_weights_reg = region_segmentation(G_reg, regions, 0.55)
            # reg_regions, reg_sizes, int_weights = extract_regions(comp_assign_reg, int_weights_reg)
            # regions, super_region_sizes, super_region_assign = extract_super_region_info(reg_regions, regions)
            regions, super_region_sizes, int_weights = superv_reg_segm_reg(G_reg, ups, regions, 0., 2., updates_proj, recall)

            pfile = open(super_regions_filename, 'wb')
            pickle.dump([regions, super_region_sizes, int_weights], pfile)
            pfile.close()

            print("time to create super regions",i,"= ", time.time()-t, " seconds")
        else:
            pfile = open(super_regions_filename, 'rb')
            super_region_info = pickle.load(pfile)
            pfile.close()

            regions = super_region_info[0]
            super_region_sizes = super_region_info[1]
            int_weights = super_region_info[2]
        
        print("num regions = ", len(regions), " max size region = ", np.max(super_region_sizes))


    return regions


In [5]:
regions = create_regions(1,
                    "nn",
                    num_threads,
                    input_file,
                    projects_to_remove,
                    dist_threshold, 
                    ndim, 
                    feature_threshold, 
                    total_samples, 
                    n_negatives, 
                    n_iterations,
                    min_x = min_x, 
                    max_x = max_x, 
                    min_y = min_y, 
                    max_y = max_y,
                    use_scalar = True,
                    delete_pkl_files = True
                    )

num edges =  1747975
time to create G_ups=  130.19095659255981  seconds
python ../../signet/signet.py -l signet_id.txt -i signet.txt -o signet -d 80 -t 5 -s 300
balanced:
+++  0.8509881685214751  rand =  0.34161980874689163
+--  0.08046849688306315  rand =  0.1899310287891106
unbalanced:
++-  0.06119599033495848  rand =  0.44119452080698934
---  0.007347344260503316  rand =  0.0272546416570083
avg pos =  0.8233456984688834 , n =  662873
avg neg =  1.7596048169773761 , n =  285362
time to create G_ups features=  900.0313775539398  seconds
A before scaling
[[0.         1.         0.03194444 0.         0.86627982 0.48888889]
 [0.         1.         0.0725     0.         0.70907629 0.14285714]
 [0.         1.41421356 0.09       0.         0.91005362 0.47191011]
 ...
 [0.         1.41421356 0.70583333 0.         1.2265822  0.42105263]
 [0.         1.         0.08277778 0.         0.63661309 0.48333333]
 [0.         1.41421356 0.71583333 0.         1.0707664  0.6       ]]
(626380, 6)
A after

In [None]:
# ONLY THE CODE ABOVE THIS IS USED FOR RUNNING MLP

In [2]:
#Determine the size of the canvas that is being looked at
min_x = 0
max_x = 10
min_y = 0
max_y = 10


In [3]:
ndim=80
threshold=5
total_samples=300
n_negatives=5
n_iterations=10

In [4]:
projects_to_remove = get_list_of_removed_proj(output_filename = "../data/proj_to_remove.txt")

input_file= "../data/sorted_tile_placements_proj.csv"
js_filename = "../data/atlas_complete.json"

names, descriptions = read_picture_names_and_descriptions(js_filename)

In [5]:
file_prefix = "10x10"

In [6]:

#TODO: How far two vertices should be to be connected (1-4)?
G, ups = create_graph(input_file, projects_to_remove, 4, min_x, max_x, min_y, max_y, file_prefix="comparison")

print("num edges = ", G.n_edges)

num edges =  189495


In [7]:
pfile = open(file_prefix + 'graph.pkl', 'wb')
pickle.dump(G, pfile)
pfile.close()

pfile = open(file_prefix + 'ups.pkl', 'wb')
pickle.dump(ups, pfile)
pfile.close()

In [8]:
pfile = open(file_prefix + 'graph.pkl', 'rb')
G = pickle.load(pfile)
pfile.close()

pfile = open(file_prefix + 'ups.pkl', 'rb')
ups = pickle.load(pfile)
pfile.close()

In [9]:
#Defining 7 edge features and computing the information they require
#Adding a new feature without changing the rest of the code should
#be easy.
#TODO: Are there other features that would improve the segmentation?
#TODO: How many dimensions we need?

#Functions that compute edge features for the
#update graph. The functions receive indexes
#of a pair of updates and some necessary data
#and return a real value.

def different_color(i, j, ups, data=None):
    '''
        Simply checks if updates have different color.
    '''
    if ups[i][4] == ups[j][4]:
        return 0
    else:
        return 1
        
def distance_space(i, j, ups, data=None):
    '''
        Eclidean distance between updates.
    '''
    xi = ups[i][2]
    yi = ups[i][3]
    xj = ups[j][2]
    yj = ups[j][3]
    
    return np.sqrt(pow(xi-xj,2)+pow(yi-yj,2))

def distance_time(i, j, ups, data=None):
    '''
        Time distance between updates
        in hours.
    '''
    time_i = ups[i][0]
    time_j = ups[j][0]
    
    return np.abs(time_i-time_j) / 3600000 #hours

def distance_duration(i, j, ups, durations):
    '''
        Distance between duration of updates.
        See function dist_duration for details.
    '''
    return dist_duration(durations[i], durations[j])

def distance_color(i, j, ups, conflicts):
    '''
        Computes the distance between two colors
        based on how often one has replaced the
        other in that particular position (x,y).
    '''
    color_i = ups[i][4]
    color_j = ups[j][4]
    
    if color_i == color_j:
        return 0
    else:
        max_up = len(ups)
        dist = 0
        
        conf_i = []
        if conflicts[i][0] <= max_up:
            conf_i.append(ups[conflicts[i][0]][4])
            
        if conflicts[i][1] <= max_up:
            conf_i.append(ups[conflicts[i][1]][4])
        
        conf_j = []
        if conflicts[j][0] <= max_up:
            conf_j.append(ups[conflicts[j][0]][4])
            
        if conflicts[j][1] <= max_up:
            conf_j.append(ups[conflicts[j][1]][4])
        
        if color_i in conf_j:
            dist = dist + 1
            
        if color_j in conf_i:
            dist = dist + 1
        
        return dist
    
def distance_user_embedding(i, j, ups, data):
    '''
        Euclidean distance between user embeddings.
    '''
    user_i = ups[i][1]
    user_j = ups[j][1]
    user_i_id = data['index'][user_i]
    user_j_id = data['index'][user_j]
    
    return np.linalg.norm(data['emb'][user_i_id]-data['emb'][user_j_id])

def distance_user_colors(i, j, ups, data):
    '''
        Distance between user color histograms.
        One minus sum of minimum values for each
        color.
    '''
    user_i = ups[i][1]
    user_j = ups[j][1]
    user_i_id = data['index'][user_i]
    user_j_id = data['index'][user_j]
    
    return 1.-data['emb'][user_i_id].minimum(data['emb'][user_j_id]).sum()

In [10]:
#Prepares data for feature computation and saves it.
#Takes a long time.

t = time.time()

conflicts = compute_update_conflicts(ups)
durations = compute_update_durations(ups)
user_color, user_index_color = compute_user_color(ups)

user_index, emb = embed_users(G, ups, ndim, threshold, total_samples, n_negatives, n_iterations, True)

features = [{'name': "different_color", 'func': different_color, 'data': None}, 
    {'name': "distance_space",  'func': distance_space, 'data': None}, 
    {'name': "distance_time", 'func': distance_time, 'data': None}, 
    {'name': "distance_color", 'func': distance_color, 'data': conflicts},
    {'name': "distance_user_embedding", 'func': distance_user_embedding, 'data': {'index': user_index, 'emb': emb}},
    {'name': "distance_user_colors", 'func': distance_user_colors, 'data': {'index': user_index_color, 'emb': user_color}}]

print("time = ", time.time()-t, " seconds")

python ../../signet/signet.py -l signet_id.txt -i signet.txt -o signet -d 80 -t 5 -s 300
balanced:
+++  0.8138598595168751  rand =  0.33582912434093615
+--  0.14665067671749185  rand =  0.19387030070573913
unbalanced:
++-  0.028696248072640054  rand =  0.44195235033334423
---  0.010793215692992976  rand =  0.028348224619980388
avg pos =  0.4156856997368502 , n =  18677
avg neg =  1.8777767123211635 , n =  8193
time =  133.08341026306152  seconds


In [11]:
pfile = open(file_prefix + 'features.pkl', 'wb')
pickle.dump(features, pfile)
pfile.close()

In [12]:
pfile = open(file_prefix + 'features.pkl', 'rb')
features = pickle.load(pfile)
pfile.close()

In [13]:
locations = store_locations("../data/atlas_complete.json")
folds = create_folds(min_x, min_y, max_x, max_y)

# List of dictionaries containing min_x, max_x, min_y, max_y for each fold
fold_boundaries = []
for fold in folds:
    fold_boundaries.append(get_fold_border(fold))

# All edges that belong to the validation fold need to be excluded
A_train, b_train = build_feat_label_data(G, ups, features, fold_boundaries=fold_boundaries, excluded_folds=[0])

In [14]:
# Trying standard scaler 
scaler_A = StandardScaler()
scaler_b = StandardScaler()

scaler_A.fit(A_train)
scaled_A = scaler_A.transform(A_train)

transformed_b = np.matrix(b_train).T
scaler_b.fit(transformed_b)
scaled_b = scaler_b.transform(transformed_b)
scaled_b = scaled_b.T[0]

print(scaled_b.shape)
print(scaled_A.shape)
print(b_train.shape)
print(A_train.shape)

(40292,)
(40292, 6)
(40292,)
(40292, 6)


In [15]:
# # trying minmax scaler
# scaler_A = MinMaxScaler(feature_range=(0, 1))
# scaler_b = MinMaxScaler(feature_range=(0, 1))

# scaler_A.fit(A_train)
# scaled_A = scaler_A.transform(A_train)

# transformed_b = np.matrix(b_train).T
# scaler_b.fit(transformed_b)
# scaled_b = scaler_b.transform(transformed_b)
# scaled_b = scaled_b.T[0]

# print(scaled_b.shape)
# print(scaled_A.shape)
# print(b_train.shape)
# print(A_train.shape)

In [16]:
scalerA_filename = "scaler_A.pkl"
scalerb_filename = "scaler_b.pkl"
pickle.dump(scaler_A, open(scalerA_filename, 'wb'))
pickle.dump(scaler_b, open(scalerb_filename, 'wb'))

In [17]:
model_gboost = GradientBoostingRegressor(random_state=1, n_estimators=25).fit(scaled_A, scaled_b)

In [18]:
model_nn = createNonlinearRegressionNeuralNet(scaled_A, scaled_b, train_proportion = 0.8, first_nodes=64, second_nodes=32, dropout = 0)

Train on 32233 samples, validate on 8059 samples
Epoch 1/256
 - 28s - loss: 6.7114e-04 - val_loss: 9.0966e-07
Epoch 2/256
 - 29s - loss: 6.1046e-08 - val_loss: 3.7310e-12
Epoch 3/256
 - 28s - loss: 9.6645e-10 - val_loss: 3.5261e-12
Epoch 4/256
 - 28s - loss: 7.7539e-10 - val_loss: 3.4856e-12
Epoch 5/256
 - 28s - loss: 8.0397e-10 - val_loss: 8.7282e-10
Epoch 6/256
 - 28s - loss: 7.9980e-10 - val_loss: 1.5783e-09
Epoch 00006: early stopping


In [19]:
model_gboost.train_score_[-1]

0.0

In [20]:
prediction = model_nn.predict(scaled_A)
print(prediction)

[[3.9663657e-05]
 [3.9663657e-05]
 [3.9663657e-05]
 ...
 [3.9663657e-05]
 [3.9663657e-05]
 [3.9663657e-05]]


In [21]:
count = 0
for i in prediction:
    if i < 0:
        count+=1
        
print(count)

0


In [22]:
count = 0
for i in scaled_b:
    if i < 0:
        count+=1
        
print(count)

0


In [23]:
ground_truth = create_ground_truth(input_file, min_x=min_x, max_x=max_x, min_y=min_y, max_y=max_y, projects_to_remove=projects_to_remove)

fold_boundaries = []
for fold in folds:
    fold_boundaries.append(get_fold_border(fold))


In [24]:
kappa = 0.25

compute_edge_weights_multithread(G, ups, model_gboost, features, file_prefix + 'features.pkl', 5, scalerX=scalerA_filename, scalerY=scalerb_filename)
G.sort_edges()



comp_assign, int_weights = region_segmentation(G, ups, kappa)
regions, sizes, int_weights = extract_regions(comp_assign, int_weights)
num_correct_counter, num_assignments_made, precision, recall = evaluate(locations, regions, ups, ground_truth, threshold=0.3, min_x=fold_boundaries[0]["min_x"], max_x=fold_boundaries[0]["max_x"], min_y=fold_boundaries[0]["min_y"], max_y=fold_boundaries[0]["max_y"])

print("Recall:", recall)
print("Precision:", precision)

Feature shape: (36208, 6)
result shape: (36208,)
Feature shape: (36207, 6)
result shape: (36207,)
Feature shape: (36207, 6)
result shape: (36207,)
Feature shape: (36207, 6)
result shape: (36207,)
Feature shape: (36208, 6)
result shape: (36208,)
Recall: 0.0
Precision: 0.0


In [25]:
compute_edge_weights(G, ups, model_nn, features, scalerX=scalerA_filename, scalerY=scalerb_filename)
G.sort_edges()



comp_assign, int_weights = region_segmentation(G, ups, kappa)
regions, sizes, int_weights = extract_regions(comp_assign, int_weights)
num_correct_counter, num_assignments_made, precision, recall = evaluate(locations, regions, ups, ground_truth, threshold=0.3, min_x=fold_boundaries[0]["min_x"], max_x=fold_boundaries[0]["max_x"], min_y=fold_boundaries[0]["min_y"], max_y=fold_boundaries[0]["max_y"])

print("Recall:", recall)
print("Precision:", precision)

Feature shape: (181037, 6)
result shape: (181037, 1)
Recall: 0.0
Precision: 0.0
