# U2T2 - Requirement III

## Files handling

In [1]:
import os

# Networks do be processed
FILES = ['Slashdot0811', 'Cit-HepTh', 'email-Eu-core', 'soc-Epinions1', 'Wiki-Vote']

def fpath(file_name, flag):
    """
    Format the relative path to files
    """
    file_name = f"{file_name}-processed" if flag else file_name
    return os.path.join('..','..','data', f'{file_name}.txt')

## Requirement III

In [2]:
import networkx as nx
import pandas as pd

class RequirementIII:
    def __init__(self, file):
        """
        Class responsible for supporting the analysis of many graphs
        """
        # Read file and its lines
        self.file = file
        fstream = open(fpath(self.file, True), 'r')
        lines = fstream.readlines()

        # Mount graph
        self.G = nx.Graph()

        # Extract nodes
        for idx, line in enumerate(lines):
            s = line.split()        # Remove blank space between values
            s[1] = s[1].strip("\n") # Remove break line
            from_node, to_node = s  # Get edges

            # Casting to integer
            from_node = int(from_node)
            to_node = int(to_node)

            # Add edge
            self.G.add_edge(from_node, to_node)

        # Create directed graph
        self.H = nx.DiGraph(self.G)

    def fill_properties(self):
        """
        Fill properties needed to build the table
        """
        # Qtd de vertices
        self.nodes = len(self.G.nodes)

        # Qtd arestas
        self.edges = len(self.G.edges)

        # Degree assortativity coefficient
        self.degree_assortativity_coefficient = nx.degree_assortativity_coefficient(self.G)

        # Qtd componentes conectados
        self.amount_connected_components = nx.number_connected_components(self.G)

        # Tamanho do componente gigante (GCC)
        self.gcc_length = len(max(nx.connected_components(self.G), key=len))
        
        # Coef. de clustering
        self.clustering_coefficient = nx.average_clustering(self.G)

# Build dataframe
cols = [
    "Rede",
    "Qtd de vertices",
    "Qtd arestas",
    "Degree assortativity coefficient",
    "Qtd componentes conectados",
    "Tamanho do componente gigante (GCC)",
    "Coef. de clustering"
]
df = pd.DataFrame(columns=cols)

# Insert lines
for file in FILES:
    # Instatiate Requirement 3 solution
    r3 = RequirementIII(file)

    # Fill properties needed to build the table
    r3.fill_properties()

    # Build row
    row = [
        r3.file, 
        r3.nodes,
        r3.edges,
        r3.degree_assortativity_coefficient,
        r3.amount_connected_components,
        r3.gcc_length,
        r3.clustering_coefficient
    ]

    # Append row
    df.loc[len(df)] = row

# Displays table
display(df)

Unnamed: 0,Rede,Qtd de vertices,Qtd arestas,Degree assortativity coefficient,Qtd componentes conectados,Tamanho do componente gigante (GCC),Coef. de clustering
0,Slashdot0811,77360,546487,-0.051728,1,77360,0.055499
1,Cit-HepTh,27770,352324,-0.030305,143,27400,0.312019
2,email-Eu-core,1005,16706,-0.01099,20,986,0.399355
3,soc-Epinions1,75879,405740,-0.040646,2,75877,0.137756
4,Wiki-Vote,7115,100762,-0.083052,24,7066,0.140898
