In [33]:
import networkx as nx
import pandas
import os
import glob
import statistics as stats
import jinja2

In [36]:
file_pattern = "*graph.graphml"
graph_files = glob.glob(file_pattern)

graphs = []
for file in graph_files:
    graph = nx.read_graphml(file)
    graphs.append(graph)

# Create dataframe that looks like this:
# Num nodes | average (weighted) degree | average (weighted) clustering | size of largest component (as a percentage) 
# | average weight | average num partners over the average weight

# Make empty dataframe
columns = ['num_nodes', 'average_degree', 'average_clustering', 'percent_largest_component', 'average_weight', 'average_strong_ties']
basic_metrics_df = pandas.DataFrame(columns=columns)

for graph in graphs:
    num_nodes = len(graph.nodes)

    degrees = dict(graph.degree(weight='weight'))
    degree_list = list(degrees.values())
    average_degree = stats.mean(degree_list)

    clustering_weighted = nx.clustering(graph, weight="weight")
    clustering_weighted = list(clustering_weighted.values())
    average_clustering = stats.mean(clustering_weighted)

    components = nx.connected_components(graph)
    largest_component = max(components, key=len)
    largest_component_size = len(largest_component)
    largest_component_percentage = largest_component_size / num_nodes * 100

    edge_weights_dict = nx.get_edge_attributes(graph, 'weight')
    edge_weights_list = list(edge_weights_dict.values())
    average_weight = stats.mean(edge_weights_list)

    strongly_tied_friends = []
    # Go through nodes in the graph, check if weight > average, if so, add to ctr
    for node in graph.nodes:
        ctr = 0
        for neighbor in graph.neighbors(node):
            weight = graph[node][neighbor]['weight']
            if weight > average_weight:
                ctr += 1
        strongly_tied_friends.append(ctr)
    
    average_strong_friends = stats.mean(strongly_tied_friends)

    new_row = pandas.DataFrame({'num_nodes': [num_nodes], 'average_degree': [average_degree], 
    'average_clustering': [average_clustering], 'percent_largest_component': [largest_component_percentage], 'average_weight': [average_weight], 
    'average_strong_ties': [average_strong_friends]})
    basic_metrics_df = pandas.concat([basic_metrics_df, new_row], ignore_index=True)

print(basic_metrics_df)

# Print to LaTeX for the writeup
latex_code = basic_metrics_df.to_latex(index=False)
with open("network_table.tex", "w") as f:
    f.write(latex_code)

# Find averages of all the averages for the slides + writeup
avg_nodes = stats.mean(basic_metrics_df['num_nodes'])
print(f'Average number of nodes over S1-20: {avg_nodes}')

avg_avg_degree = stats.mean(basic_metrics_df['average_degree'])
print(f'Average average degree: {avg_avg_degree}')

# Measures how tightly connected nodes' neighbors are to one another, but weighted
# Lower clustering coefficient -- neighbors are not strongly connected
avg_avg_clustering = stats.mean(basic_metrics_df['average_clustering'])
print(f'Average average clustering coefficient: {avg_avg_clustering}')

avg_percent_component = stats.mean(basic_metrics_df['percent_largest_component'])
print(f'Average percentage size of largest component: {avg_percent_component}')

avg_avg_weight = stats.mean(basic_metrics_df['average_weight'])
print(f'Average average weight: {avg_avg_weight}')

avg_avg_strong_ties = stats.mean(basic_metrics_df['average_strong_ties'])
print(f'Average average strong ties: {avg_avg_strong_ties}')


    

   num_nodes  average_degree  average_clustering  percent_largest_component  \
0         20       22.800000            0.188801                  90.000000   
1         20       31.600000            0.136265                 100.000000   
2         16       25.000000            0.196874                  87.500000   
3         16       21.000000            0.256924                  93.750000   
4         20       20.500000            0.152052                  95.000000   
5         20       20.000000            0.143703                  95.000000   
6         17       14.588235            0.213853                  88.235294   
7         16       14.875000            0.193372                 100.000000   
8         16       18.750000            0.186348                  93.750000   
9         18       16.222222            0.181123                  88.888889   
10        16       23.750000            0.255011                 100.000000   
11        16       18.250000            0.194819    

  basic_metrics_df = pandas.concat([basic_metrics_df, new_row], ignore_index=True)
