# Unified link predicition
### for structural udirected, node2vec, structural directed and structural directed update

# Libraries

In [36]:
import pandas as pd # to create a dataframe out of nonexistent edges and create X and y for prediction
import networkx as nx # to create a graph out of data in .txt file
import random # for nonexistent edges generator

from node2vec import Node2Vec # for node2vec
import numpy as np 
from math import log # for log calculation in recalculated adamic adar index

from sklearn.model_selection import train_test_split # for train/test split
from sklearn.linear_model import LogisticRegression # to create a model to predict
from sklearn.metrics import roc_auc_score, confusion_matrix, accuracy_score, precision_score,recall_score # param
from sklearn.preprocessing import StandardScaler # for coef standartisation

## Generating X, y out of Graph

Генерация несуществующих ребер

In [37]:
def nonexistent_edges_generator(graph,nodelist):
    i, j = random.sample(nodelist, 2)
    if graph.has_edge(i, j):
        nonexistent_edges_generator(graph,nodelist)
    else:
        return i, j
    return i, j

In [38]:
test_type = ['usual', 'weighted', 'bidirectional', 'puff']
graph_type = ['directed', 'undirected']

создание X,y в общем случае 

In [39]:
def create_GXy_usual(data, graphtype):
    if graphtype == 'undirected':
        G = nx.read_edgelist(data, delimiter='\t',create_using=nx.Graph())
    elif graphtype == 'directed':
        G = nx.read_edgelist(data, delimiter='\t',create_using=nx.DiGraph())
    else:
        raise ValueError('such test is impossible for this type of graph')
        
    # dataframe for existent edges
    existent_edges_df = pd.DataFrame(sorted([tuple(sorted(edge)) for edge in list(G.edges)]),columns=['source', 'target'])
    existent_edges_df['edge'] = [1] * len(existent_edges_df)
    # generating nonexistent edges
    nonexistent_edges = [nonexistent_edges_generator(G,list(G.nodes)) for _ in range(len(existent_edges_df))]
    # dataframe for nonexistent edges
    nonexistent_edges_df = pd.DataFrame(nonexistent_edges,columns=['source', 'target'])
    nonexistent_edges_df['edge'] = [0] * len(nonexistent_edges_df)
    # merge into one dataframe
    full_df = pd.concat((existent_edges_df, nonexistent_edges_df), axis=0, ignore_index=True)
    # deconstruct into X and y
    X = full_df.drop(['edge'], axis=1, inplace=False).copy()
    y = full_df['edge'].copy()
    return G,X,y, 0.1

создание X,y для проверки эффективности lp на тяжелых/легких ребрах

In [40]:
def create_GXy_weight(data, graphtype):
    df = pd.read_csv(data, sep='\t',header=None, names=['source', 'target','weight'])
    df['weight'] = df['weight'].str.replace(',', '.').astype(float)
    
    if graphtype == 'undirected':
        G = nx.to_networkx_graph(df, create_using=nx.Graph())
    elif graphtype == 'directed':
        G = nx.to_networkx_graph(df, create_using=nx.DiGraph())
    else:
        raise ValueError('such test is impossible for this type of graph')
    
    # flag = 1, the lightest first
    # flog = 0 , the heaviest first
    flag = 0 
    # dataframe for existent edges
    existent_edges_df = df.sort_values(by='weight', ascending= flag)
    existent_edges_df['edge'] = [1] * len(existent_edges_df)
    # generating nonexistent edges
    nonexistent_edges = [nonexistent_edges_generator(G,list(G.nodes)) for _ in range(len(existent_edges_df))]
    # dataframe for nonexistent edges
    nonexistent_edges_df = pd.DataFrame(nonexistent_edges,columns=['source', 'target'])
    nonexistent_edges_df['weight'] = [0] * len(nonexistent_edges_df)
    nonexistent_edges_df['edge'] = [0] * len(nonexistent_edges_df)
    # merge into one dataframe
    full_df = pd.concat((existent_edges_df, nonexistent_edges_df), axis=0, ignore_index=True)
    # deconstruct into X and y
    X = full_df.drop(['weight','edge'], axis=1, inplace=False).copy()
    y = full_df['edge'].copy()
    return G,X,y,0.05

создание X,y для проверки эффективности lp на двунаправленных ребрах

In [41]:
def have_bidirectional_relationship(G, node1, node2):
    return G.has_edge(node1, node2) and G.has_edge(node2, node1)

def create_GXy_bi(data, graphtype):
    if graphtype != 'directed':
        raise ValueError('such test is impossible for this type of graph')
    G = nx.read_edgelist(data, delimiter='\t',create_using=nx.DiGraph())
    df = pd.DataFrame(sorted([tuple(sorted(edge)) for edge in list(G.edges)]),columns=['source', 'target'])
    df['bi']= [0] * len(df)
    i = 0
    for x, y in zip(df['source'], df['target']):
        if have_bidirectional_relationship(G,x, y):
            df.loc[i,'bi'] = 1
        i+=1
    
    existent_edges_df = df.sort_values(by='bi', ascending= 0)
    existent_edges_df['edge'] = [1] * len(existent_edges_df)
    nonexistent_edges = [nonexistent_edges_generator(G,list(G.nodes)) for _ in range(len(existent_edges_df))]
    nonexistent_edges_df = pd.DataFrame(nonexistent_edges,columns=['source', 'target'])
    nonexistent_edges_df['bi'] = [0] * len(nonexistent_edges_df)
    nonexistent_edges_df['edge'] = [0] * len(nonexistent_edges_df)
    # merge into one dataframe
    full_df = pd.concat((existent_edges_df, nonexistent_edges_df), axis=0, ignore_index=True)

    bi_size = len(full_df.loc[full_df['bi'] == 1])/len(full_df)
    # deconstruct into X and y
    X  = full_df.drop(['bi','edge'], axis=1, inplace=False).copy()
    y = full_df['edge'].copy()
    
    if bi_size > 0:
        return G,X,y,bi_size 
    else:
        raise ValueError('no bidirectional edges')

создание "раздутого" X,y 

In [42]:
def create_GXy_puff(data, graphtype):
    if graphtype != 'directed':
        raise ValueError('such test is impossible for non directed graph')
    G = nx.read_edgelist(data, delimiter='\t',create_using=nx.DiGraph())
    
    # dataframe for existent edges
    existent_edges_df = pd.DataFrame(sorted([tuple(sorted(edge)) for edge in list(G.edges)]),columns=['source', 'target'])
    existent_edges_df['edge'] = [1] * len(existent_edges_df)
    # generating nonexistent edges
    nonexistent_edges = [nonexistent_edges_generator(G,list(G.nodes)) for _ in range(len(existent_edges_df))]
    # dataframe for nonexistent edges
    nonexistent_edges_df = pd.DataFrame(nonexistent_edges,columns=['source', 'target'])
    nonexistent_edges_df['edge'] = [0] * len(nonexistent_edges_df)
    # merge into one dataframe
    full_df = pd.concat((existent_edges_df, nonexistent_edges_df), axis=0, ignore_index=True)
    
    dupl_df =pd.DataFrame(columns=['source', 'target','edge'])
    i = 0
    j = 0
    for x, y in zip(full_df['source'], full_df['target']):
        if not G.has_edge(y, x):
            dupl_df.loc[i] = [y] + [x] + [full_df.loc[j,'edge']]
            i += 1
        j+=1
    
    df = pd.concat((full_df, dupl_df), axis=0, ignore_index=True)
    
    # deconstruct into X and y
    X = df.drop(['edge'], axis=1, inplace=False).copy()
    y = df['edge'].copy()
    y = y.astype(int)
    
    return G,X,y, 0.1  

# Unified generation of G, X, y

In [43]:
def create_G_X_y_test_size(data, test_set_type, graphtype):
    if test_set_type == 'usual':
        return create_GXy_usual(data, graphtype)
    elif test_set_type == 'weighted':
        return create_GXy_weight(data, graphtype)
    elif test_set_type == 'bidirectional':
        return create_GXy_bi(data,graphtype)
    elif test_set_type == 'puff':
        return create_GXy_puff(data, graphtype)
    else:
        raise ValueError('no such type of link prediction model')

## Node2vec

Определение операторов из https://stellargraph.readthedocs.io/en/stable/demos/link-prediction/node2vec-link-prediction.html#refs .

In [44]:
def operator_hadamard(u, v):
    return u * v


def operator_l1(u, v):
    return np.abs(u - v)


def operator_l2(u, v):
    return (u - v) ** 2


def operator_avg(u, v):
    return (u + v) / 2.0

operators = [operator_hadamard, operator_l1, operator_l2, operator_avg]

def build_Node2Vec_model(G_dir):
    G = G_dir.to_undirected()
    # Generate walks
    node2vec = Node2Vec(G, dimensions=32, walk_length=8, num_walks=50)
    # train node2vec model
    n2w_model = node2vec.fit(window=7, min_count=1)
    return(n2w_model)

# Structural undirected

In [45]:
def calc_undir_character(G_dir,X_new):
    G = G_dir.to_undirected()
    
    X = X_new.copy()
    ra_idx = nx.resource_allocation_index(G, X[['source', 'target']].values)
    X['ra_index'] = [p for u, v, p in ra_idx]

    jaccard_coef = nx.jaccard_coefficient(G, X[['source', 'target']].values)
    X['jaccard_coef'] = [p for u, v, p in jaccard_coef]

    adar_coef = nx.adamic_adar_index(G, X[['source', 'target']].values)
    X['adamic_adar_coef'] = [p for u, v, p in adar_coef]

    pref_attach = nx.preferential_attachment(G, X[['source', 'target']].values)
    X['pref_attach'] = [p for u, v, p in pref_attach]
    return X

# Structural directed

In [46]:
def adjacent_edges(graph, node, mode):
    if mode == 'out':
        return [item[1] for item in list(graph.out_edges(node))]
    elif mode == 'in':
        return [item[0] for item in list(graph.in_edges(node))]
    else:
        return [item[1] for item in list(graph.edges(node))] # for directed graph same as mode 'out'

def common_neighbors(graph, u, v, mode):
    edges_u = adjacent_edges(graph, u, mode)
    edges_v = adjacent_edges(graph, v, mode)
    return list(set.intersection(set(edges_u), set(edges_v))) # find intersection of two sets

def resource_allocation(graph, u, v, mode):
    if u not in graph:
        raise nx.NetworkXError("u is not in the graph.")
    if v not in graph:
        raise nx.NetworkXError("v is not in the graph.")
    result = 0
    for w in common_neighbors(graph, u, v,mode):
        degree = len(adjacent_edges(graph, w, mode))
        if degree != 0:
            result += 1/degree
    return result

def jaccard_coefficient(graph, u, v, mode):
    if u not in graph:
        raise nx.NetworkXError("u is not in the graph.")
    if v not in graph:
        raise nx.NetworkXError("v is not in the graph.")
    union_size = len(set(adjacent_edges(graph, u, mode)) | set(adjacent_edges(graph, v, mode)))
    if union_size == 0:
        return 0
    return len(list(common_neighbors(graph, u, v, mode))) / union_size

def adamic_adar_index(graph, u, v, mode):
    if u not in graph:
        raise nx.NetworkXError("u is not in the graph.")
    if v not in graph:
        raise nx.NetworkXError("v is not in the graph.")
    result = 0
    for w in common_neighbors(graph, u, v,mode):
        degree = len(adjacent_edges(graph, w, mode))
        if degree > 0:
            if log(degree)!= 0:
                result += 1/log(degree)
    return result

def preferential_attachment(graph, u, v, mode):
    if u not in graph:
        raise nx.NetworkXError("u is not in the graph.")
    if v not in graph:
        raise nx.NetworkXError("v is not in the graph.")
    return len(adjacent_edges(graph, u, mode))*len(adjacent_edges(graph, v, mode))

def calc_dir_character(G,X_new):
    X = X_new.copy()
    
    mode = 'out'
    X['ra_index_out'] = [resource_allocation(G, u, v, mode) for u,v in zip(X['source'], X['target'])]
    X['jaccard_coef_out'] = [jaccard_coefficient(G, u, v, mode) for u,v in zip(X['source'], X['target'])]
    X['adamic_adar_coef_out'] = [adamic_adar_index(G, u, v, mode) for u,v in zip(X['source'], X['target'])]
    X['pref_attach_out'] = [preferential_attachment(G, u, v, mode) for u,v in zip(X['source'], X['target'])]

    mode = 'in'
    X['ra_index_in'] = [resource_allocation(G, u, v, mode) for u,v in zip(X['source'], X['target'])]
    X['jaccard_coef_in'] = [jaccard_coefficient(G, u, v, mode) for u,v in zip(X['source'], X['target'])]
    X['adamic_adar_coef_in'] = [adamic_adar_index(G, u, v, mode) for u,v in zip(X['source'], X['target'])]
    X['pref_attach_in'] = [preferential_attachment(G, u, v, mode) for u,v in zip(X['source'], X['target'])]
    
    G_undir = G.to_undirected()
    ra_idx = nx.resource_allocation_index(G_undir, X[['source', 'target']].values)
    X['ra_index'] = [p for u, v, p in ra_idx]

    jaccard_coef = nx.jaccard_coefficient(G_undir, X[['source', 'target']].values)
    X['jaccard_coef'] = [p for u, v, p in jaccard_coef]

    adar_coef = nx.adamic_adar_index(G_undir, X[['source', 'target']].values)
    X['adamic_adar_coef'] = [p for u, v, p in adar_coef]

    pref_attach = nx.preferential_attachment(G_undir, X[['source', 'target']].values)
    X['pref_attach'] = [p for u, v, p in pref_attach]
    return X

# Structural directed with additional characteristics

In [47]:
def common_neighbors_recalc(G, u, v):
    edges_u = adjacent_edges(G, u, 'out')
    edges_v = adjacent_edges(G, v, 'in')
    return list(set.intersection(set(edges_u), set(edges_v))) # find intersection of two sets

def resource_allocation_recalc(G, u, v, mode):
    if u not in G:
        raise nx.NetworkXError("u is not in the graph.")
    if v not in G:
        raise nx.NetworkXError("v is not in the graph.")
    result = 0
    for w in common_neighbors_recalc(G, u, v):
        degree = len(adjacent_edges(G, w, mode))
        if degree != 0:
            result += 1/degree
    return result

def jaccard_coefficient_recalc(G, u, v, mode):
    if u not in G:
        raise nx.NetworkXError("u is not in the graph.")
    if v not in G:
        raise nx.NetworkXError("v is not in the graph.")
    union_size = len(set(adjacent_edges(G, u, mode)) | set(adjacent_edges(G, v, mode)))
    if union_size == 0:
        return 0
    return len(list(common_neighbors_recalc(G, u, v))) / union_size

def adamic_adar_index_recalc(G, u, v, mode):
    if u not in G:
        raise nx.NetworkXError("u is not in the graph.")
    if v not in G:
        raise nx.NetworkXError("v is not in the graph.")
    result = 0
    for w in common_neighbors_recalc(G, u, v):
        degree = len(adjacent_edges(G, w, mode))
        if degree > 0:
            if log(degree)!= 0:
                result += 1/log(degree)
    return result

def calc_dir_character_monster(G,X_new):
    X = X_new.copy()
    
    mode = 'out'
    X['ra_index_out'] = [resource_allocation(G, u, v, mode) for u,v in zip(X['source'], X['target'])]
    X['jaccard_coef_out'] = [jaccard_coefficient(G, u, v, mode) for u,v in zip(X['source'], X['target'])]
    X['adamic_adar_coef_out'] = [adamic_adar_index(G, u, v, mode) for u,v in zip(X['source'], X['target'])]
    X['pref_attach_out'] = [preferential_attachment(G, u, v, mode) for u,v in zip(X['source'], X['target'])]

    mode = 'in'
    X['ra_index_in'] = [resource_allocation(G, u, v, mode) for u,v in zip(X['source'], X['target'])]
    X['jaccard_coef_in'] = [jaccard_coefficient(G, u, v, mode) for u,v in zip(X['source'], X['target'])]
    X['adamic_adar_coef_in'] = [adamic_adar_index(G, u, v, mode) for u,v in zip(X['source'], X['target'])]
    X['pref_attach_in'] = [preferential_attachment(G, u, v, mode) for u,v in zip(X['source'], X['target'])]
    
    mode = 'out'
    X['ra_index_out_r'] = [resource_allocation_recalc(G, u, v, mode) for u,v in zip(X['source'], X['target'])]
    X['jaccard_coef_out_r'] = [jaccard_coefficient_recalc(G, u, v, mode) for u,v in zip(X['source'], X['target'])]
    X['adamic_adar_coef_out_r'] = [adamic_adar_index_recalc(G, u, v, mode) for u,v in zip(X['source'], X['target'])]

    mode = 'in'
    X['ra_index_in_r'] = [resource_allocation_recalc(G, u, v, mode) for u,v in zip(X['source'], X['target'])]
    X['jaccard_coef_in_r'] = [jaccard_coefficient_recalc(G, u, v, mode) for u,v in zip(X['source'], X['target'])]
    X['adamic_adar_coef_in_r'] = [adamic_adar_index_recalc(G, u, v, mode) for u,v in zip(X['source'], X['target'])]
    
    G_undir = G.to_undirected()
    ra_idx = nx.resource_allocation_index(G_undir, X[['source', 'target']].values)
    X['ra_index'] = [p for u, v, p in ra_idx]

    jaccard_coef = nx.jaccard_coefficient(G_undir, X[['source', 'target']].values)
    X['jaccard_coef'] = [p for u, v, p in jaccard_coef]

    adar_coef = nx.adamic_adar_index(G_undir, X[['source', 'target']].values)
    X['adamic_adar_coef'] = [p for u, v, p in adar_coef]

    pref_attach = nx.preferential_attachment(G_undir, X[['source', 'target']].values)
    X['pref_attach'] = [p for u, v, p in pref_attach]
    return X

# Structural directed weird

In [48]:
def calc_dir_character_weird(G,X_new):
    X = X_new.copy()
    
    X['k_out'] = [G.out_degree(u)+G.out_degree(v) for u,v in zip(X['source'], X['target'])]

    undirected_G = G.to_undirected()
    ra_idx = nx.resource_allocation_index(undirected_G, X[['source', 'target']].values)
    X['ra_index'] = [p for u, v, p in ra_idx]

    jaccard_coef = nx.jaccard_coefficient(undirected_G, X[['source', 'target']].values)
    X['jaccard_coef'] = [p for u, v, p in jaccard_coef]

    adar_coef = nx.adamic_adar_index(undirected_G, X[['source', 'target']].values)
    X['adamic_adar_coef'] = [p for u, v, p in adar_coef]

    pref_attach = nx.preferential_attachment(undirected_G, X[['source', 'target']].values)
    X['pref_attach'] = [p for u, v, p in pref_attach]

    return X

In [49]:
pred_types = ['node2vec','undir_structural','dir_structural','dir_structural_monster','dir_structural_weird']

# Unified model building

In [50]:
def calc_character(G,X,prediction_type):
    if prediction_type == 'node2vec':
        Gg = G.to_undirected()
        n2w_model = build_Node2Vec_model(Gg)
        char = [(operator_l2(n2w_model.wv[i],n2w_model.wv[j])) for i,j in zip(X['source'], X['target'])]
        X_new = pd.concat([X, pd.DataFrame(char)], axis=1)
        return X_new
    elif prediction_type == 'undir_structural':
        X_new = calc_undir_character(G,X)
        return X_new
    elif prediction_type == 'dir_structural' and G.is_directed():
        X_new = calc_dir_character(G,X)
        return X_new
    elif prediction_type == 'dir_structural_monster' and G.is_directed():
        X_new = calc_dir_character_monster(G,X)
        return X_new
    elif prediction_type == 'dir_structural_weird' and G.is_directed():
        X_new = calc_dir_character_weird(G,X)
        return X_new
    else:
        raise ValueError('no such type of link prediction model')

# LP

In [57]:
def lp(Gg,X_new,y,bi_size,testtype,prediction_type, file, step, i,j):
    G = Gg.copy()
    X = X_new.copy()
    
    if testtype == 'usual' or testtype == 'puff':
        shuffle_flag = True
    elif testtype == 'weighted' or 'bidirectional':
        shuffle_flag = False
    else :
        raise ValueError('no such type of test set')
        
    X_test, X_train = train_test_split(X, test_size=1 - (bi_size+step*i), random_state=j, shuffle=shuffle_flag)
    
    train_idx =X_train.index.values
    to_remove_idx = X_test.index.values
    test_idx = to_remove_idx[:int(len(to_remove_idx)*bi_size/(bi_size+step*i))]
    
    G.remove_edges_from(X.iloc[to_remove_idx, :].values)
  
    # characterisics
    Xx = calc_character(G,X,prediction_type)

    # build classification model

    # Get rid of source and node2 columns because they are not characteristics
    Xx.drop(['source', 'target'], axis=1, inplace=True) 
    
    scaler = StandardScaler()
    scaler.fit(Xx)
    print(scaler.mean_)
    Xxx =scaler.transform(Xx)
    X_df = pd.DataFrame(Xxx)
    
    # Split data into train set and test set
    X_train = X_df.iloc[train_idx]
    y_train = y.iloc[train_idx]

    X_test = X_df.iloc[test_idx]
    y_test = y.iloc[test_idx]
        
    model = LogisticRegression(random_state=42)
    model.fit(X_train, y_train)
    y_pred_probas = model.predict_proba(X_test)
    y_pred = model.predict(X_test)
           
        
    if testtype == 'bidirectional' or testtype == 'weighted':
        ras = -1
        acc =  accuracy_score(y_test, y_pred)
        ps = precision_score(y_test, y_pred, average='binary')
        rs =  recall_score(y_test, y_pred, average='binary')
    elif testtype == 'usual' or testtype == 'puff':
        ras = roc_auc_score(y_test, y_pred_probas[:, 1])
        acc =  accuracy_score(y_test, y_pred)
        ps = precision_score(y_test, y_pred, average='binary')
        rs =  recall_score(y_test, y_pred, average='binary')
    else:
        raise ValueError('no such type of test set type')
        
    
    
    
    
    if True:
        file.write(str(j) +'\t' +str(0.1+step*i) + '\t'+ str(prediction_type) + '\t'+ str(testtype) + '\t')
        file.write(str(ras)+'\t'+str(acc)+'\t'+str(ps)+'\t'+str(rs) +'\t')
        for i in range(len(model.coef_[0])-1):
            print(model.coef_[0][i])
            file.write(str(model.coef_[0][i]) + '\t')
        file.write(str(model.coef_[0][len(model.coef_[0])-1]) + '\n')
    else:
        file.write(str(j) +'\t' +str(0.1+step*i) + '\t'+ str(prediction_type) + '\t'+ str(testtype) + '\t')
        file.write(str(ras)+'\t'+str(acc)+'\t'+str(ps)+'\t'+str(rs) +'\n')
        
    
    

# Actual LP on real networks

### pred_types = ['node2vec','undir_structural','dir_structural','dir_structural_monster','dir_structural_weird']
### test_type = ['usual', 'weighted', 'bidirectional']
### graph_type = ['directed', 'undirected']

In [52]:
G,X,y,test_size = create_G_X_y_test_size('data/wiki-vote.txt', 'usual', 'directed')

In [53]:
print(test_size)

0.1


In [54]:

for j in range(10):
    for i in range(6):
        with open('test_results/wiki-vote_structural_weird.txt', "a") as file:
            lp(G,X,y,test_size,'usual','dir_structural_weird', file, 0.1, i ,j)
            file.close()

[9.77262535e+01 8.01031201e-02 3.10436236e-02 1.78781634e+00
 9.22581161e+03]
[9.06263393e+01 7.52784146e-02 2.91980735e-02 1.60693576e+00
 8.11610870e+03]
[8.34790238e+01 7.05673937e-02 2.74343333e-02 1.43882323e+00
 7.07482009e+03]
[7.64238540e+01 6.59331342e-02 2.57636756e-02 1.28200965e+00
 6.11388159e+03]
[6.95821495e+01 6.13549319e-02 2.41634898e-02 1.13771282e+00
 5.23941727e+03]
[6.27816451e+01 5.67206138e-02 2.25833661e-02 1.00197109e+00
 4.42124857e+03]
[9.76195498e+01 8.02653836e-02 3.10599434e-02 1.78849389e+00
 9.22363684e+03]
[9.05287157e+01 7.54978001e-02 2.92203668e-02 1.61000953e+00
 8.12648601e+03]
[8.34613845e+01 7.08172438e-02 2.74647705e-02 1.44311277e+00
 7.10450240e+03]
[7.64991561e+01 6.60275632e-02 2.57374987e-02 1.28456545e+00
 6.14092758e+03]
[6.97089614e+01 6.14137162e-02 2.41309668e-02 1.14079093e+00
 5.25857144e+03]
[6.28181196e+01 5.67429165e-02 2.25499186e-02 1.00251975e+00
 4.42509161e+03]
[9.77699852e+01 8.03716671e-02 3.10636554e-02 1.79055761e+00
 9.

In [58]:
G,X,y,test_size = create_G_X_y_test_size('data/assoc_eng2.txt', 'usual', 'directed')

In [59]:
print(test_size)

0.1


In [60]:

for j in range(10):
    for i in range(1,2):
        with open('test_results/coef_anal/assoc_eng2_dir_structural_weird.txt', "a") as file:
            lp(G,X,y,0.1,'usual','dir_structural_weird', file, 0.1, i ,j)
            file.close()

[2.33610247e+01 6.83953413e-02 3.08665178e-02 4.89783505e-01
 7.72211952e+02]
0.11465402104765815
-0.08099929676706875
0.7569044130943713
4.908996116936509
[2.33371680e+01 6.82076238e-02 3.07511165e-02 4.88221969e-01
 7.71148546e+02]
0.11865392483079937
-0.058474400169294685
0.7715441394214342
4.880077011744599
[2.33490649e+01 6.85229900e-02 3.09189765e-02 4.90522427e-01
 7.71647250e+02]
0.11442012563535711
0.0007389735144266372
0.6907165559073024
4.985560816460965
[2.33703049e+01 6.81738087e-02 3.07620113e-02 4.88141352e-01
 7.74964985e+02]
0.11303344430104519
-0.18358000906341398
0.6896034923422667
5.019645017700203
[2.33513044e+01 6.80615740e-02 3.06747102e-02 4.87007590e-01
 7.70748248e+02]
0.10282448825736715
0.020117139541118677
0.7907668925733631
4.786257404081499
[2.33456310e+01 6.82853902e-02 3.07918646e-02 4.88361195e-01
 7.71367280e+02]
0.10781556355897047
-0.15680439694448645
0.7098316994550635
4.998620558755331
[2.33266384e+01 6.83681061e-02 3.08114313e-02 4.88903696e-01
 

In [34]:

for j in range(10):
    for i in range(1,2):
        with open('test_results/coef_anal/assoc_eng2_dir_structural_monster.txt', "a") as file:
            lp(G,X,y,0.1,'usual','dir_structural_monster', file, 0.1, i ,j)
            file.close()

[6.84376287e-02 3.16837919e-02 2.93331414e-01 1.37300071e+02
 1.70199096e-01 1.55796262e-02 3.45622462e-01 3.31808832e+02
 3.53088590e-02 1.80233414e-02 1.59897954e-01 4.35958058e-02
 1.31665718e-02 1.60866669e-01 6.83481469e-02 3.08099294e-02
 4.89281850e-01 7.70978061e+02]
0.04509759347405902
0.6074498638504883
-0.257067335446007
-0.032638933245173904
-0.5064095415493619
0.8693872313272217
-0.5019774527276049
-2.4169737957312813
0.2978570665093233
0.5172421041405261
-0.4062523522717271
-0.0255442039995402
0.022064993480859614
-0.034205379655675244
1.0175413318178201
0.6572093410780834
3.4232599980797422
2.8291664691883582
[6.85292955e-02 3.15580083e-02 2.93513562e-01 1.36994099e+02
 1.71971885e-01 1.56026144e-02 3.46731018e-01 3.32372969e+02
 3.48883528e-02 1.77555254e-02 1.57997450e-01 4.32241968e-02
 1.30535170e-02 1.59473635e-01 6.81652560e-02 3.06990513e-02
 4.87849985e-01 7.70506184e+02]
-0.025593891176669912
0.41045977107316806
-0.016674616123425638
-0.034559922857396334
-0.354

In [35]:

for j in range(10):
    for i in range(1,2):
        with open('test_results/coef_anal/assoc_eng2_dir_structural_weird.txt', "a") as file:
            lp(G,X,y,0.1,'usual','dir_structural', file, 0.1, i ,j)
            file.close()

[6.84376287e-02 3.16837919e-02 2.93331414e-01 1.37300071e+02
 1.70199096e-01 1.55796262e-02 3.45622462e-01 3.31808832e+02
 6.83481469e-02 3.08099294e-02 4.89281850e-01 7.70978061e+02]
0.14442802667925791
0.739538231801622
-0.5123531188071195
-0.04366264878089006
-0.5946383416023201
0.9084552442403245
-0.5726322082736787
-2.4243943365927794
0.9548385258568272
0.6031268991066038
3.895413188819107
2.8350135638865406
[6.85292955e-02 3.15580083e-02 2.93513562e-01 1.36994099e+02
 1.71971885e-01 1.56026144e-02 3.46731018e-01 3.32372969e+02
 6.81652560e-02 3.06990513e-02 4.87849985e-01 7.70506184e+02]
0.10031617548706555
0.5678268551631375
-0.31233142537465525
-0.04301465924475858
-0.43686948975916423
0.7447568581903412
-0.5667281853269565
-2.3811705407947112
0.8423717120305528
0.6941350847429423
3.8294212222649646
2.792656416300649
[6.96072907e-02 3.17446990e-02 2.94757636e-01 1.37044107e+02
 1.72641600e-01 1.57038803e-02 3.46622376e-01 3.31914553e+02
 6.84742604e-02 3.08547898e-02 4.89954057