#Graphs & Social Networks &mdash; lab material
This notebook contains a definition of a python class *Graph*, containing functions described during labs.

Package import.

In [None]:
import numpy as np
from random import random, seed
from copy import deepcopy
from collections import deque
import time

## Simple functions from the first classes
This is left just as an example (to compare the behaviour).

In [None]:
def print_matrix(vertices, matrix):
  """
  Printing a graph given by adjacency matrix
  """
  n = len(matrix)
  if (vertices is not None) and (len(vertices) == n):
    vv = vertices
  else:
    vv = range(1, n+1)
  for i in range(n):
    print(vv[i], ":", end="")
    for j in range(n):
      if matrix[i, j]:
        print(" ", vv[j], end="")
    print("")

def print_dict(graph):
  """
  Printing of a graph (given as a dictionary/neighbouring list)
  """
  for v in graph:
    print(v, ":", end="")
    for u in graph[v]:
      print(" ", u, end="")
    print("")

## Class *Graph*

In [None]:
class Graph:
    def __init__(self, graph=None):
        if graph is None:
            graph = {}
        self.graph = graph

    # dict initializer
    @classmethod
    def from_dict(cls, graph):
        return cls(graph)

    # array initializer
    @classmethod
    def from_matrix(cls, matrix, vertices = None):
        if (vertices is None) or (len(vertices) != len(matrix)):
            vertices = [*range(1, len(matrix) + 1)]
        return cls.from_dict(cls._matrix_to_dict(matrix, vertices))

    # two private methods matrix <-> dictionaries
    def _matrix_to_dict(matrix, vertices: list) -> dict:
        """
        Converts a graph given as an adjacency matrix to a graph in dict form.
        """
        res_dict = {}
        for i, v in enumerate(vertices):
            neighbours = [vertices[j] for j, edge in enumerate(matrix[i]) if edge]
            res_dict[v] = neighbours
        return res_dict

    def _dict_to_matrix(self, _dict: dict) -> np.array:
        """
        Converts a graph in dict form to its adjacency matrix.
        """
        n = len(_dict)
        vertices = [*_dict.keys()]
        matrix = np.zeros(shape = (n, n), dtype=int)
        for u,v in [
            (vertices.index(u), vertices.index(v))
            for u, row in _dict.items() for v in row
        ]:
            matrix[u][v] += 1
        return matrix

    def vertices(self) -> list:
        """
        Returns list of vertices of the graph.
        """
        return [*self.graph.keys()]

    def matrix(self) -> np.array:
        """
        Returns the adjacency matrix of the graph.
        """
        return self._dict_to_matrix(self.graph)

    # redefinition of print for objects of class Graph
    def __str__(self):
        res = ""
        for v in self.graph:
            res += f"{v}:"
            for u in self.graph[v]:
                res += f" {u}"
            res += "\n"
        return res

    # The following is for free thanks to the above
    def to_neighbourlist(self, filename: str):
        """
        Saves a graphs to a text file as a neighbour dict.\n
        Filename is a file path.
        """
        file = open(filename, "w")  # open textfile for writing
        file.write(str(self))
        file.close()

    # Modyfying graphs
    def add_vertex(self, vertex):
        """
        Adds a new vertex to the graph.
        """
        if vertex not in self.graph:
            self.graph[vertex] = []

    def del_vertex(self, vertex):
        """
        Removes a vertex from the graph.
        """
        if vertex in self.graph:
            self.graph.pop(vertex)
            for u in self.graph:
                if vertex in self.graph[u]:
                    self.graph[u].remove(vertex)

    def add_arc(self, arc):
        """
        Given pair of vertices (arc variable) add an arc to the graph
        We consider simple, directed graphs.
        """
        u, v = arc
        self.add_vertex(u)
        self.add_vertex(v)
        if v not in self.graph[u]:
            self.graph[u].append(v)

    def add_edge(self, edge: list):
        """
        Given pair of vertices (edge variable) add an edge to existing graph.
        We consider simple, undirected graphs, as symmetric digraphs without loops.
        """
        u, v = edge
        if u == v:
            raise ValueError("Loops are not allowed!")
        self.add_vertex(u)
        self.add_vertex(v)
        if v not in self.graph[u]:
            self.graph[u].append(v)
        if u not in self.graph[v]:
            self.graph[v].append(u)

    # reading from a file
    @staticmethod
    def from_edges(filename: str, directed = 0):
        """
        Read the graph from file, that in each line contains either
        the description of a vertex (one word) or
        the description of an edge/arc (at least 2 words).
        The resulting graph is returned as a neighbourhood list.
        Variable "filename" contains the whole path to the file.
        """
        graph = Graph()
        file = open(filename, "r")          # open the file to read
        for line in file:                   # for each line of the file
          words = line.strip().split()      # splits the line into words
          if len(words) == 1:               # one word - vertex description
            graph.add_vertex(words[0])
          elif len(words) >= 2:             # at least two words, first two are the edge description
            if directed:
              graph.add_arc([words[0], words[1]])
            else:
              graph.add_edge([words[0], words[1]])
        file.close()
        return graph

    @staticmethod
    def random_graph(n: int, p: float):
        """
        Creates a random graph in G(n, p) model.
        """
        rand_graph = Graph()
        for i in range(1, n + 1):
            rand_graph.add_vertex(i)
            for j in range(1, i):
                if random() < p:
                    rand_graph.add_edge([i, j])
        return rand_graph

    @staticmethod
    def cycle(n: int):
        """
        Creates a cycle C_n on n vertices
        """
        cycle = Graph()
        for i in range(n-1):
          cycle.add_edge([i+1, i+2])
        cycle.add_edge([1, n])
        return cycle


    def Prufer(self):
      """
      Returns the Prufer code of a tree.
      It is necessary that the graph is a tree (it is not checked).
      Result is given as a string (empty for trees on 1 or 2 vertices).
      """
      tr = deepcopy(self.graph)   # copy of a tree, as we destroy it
      code = ""
      for i in range(len(self.graph) - 2):
        for x in sorted(tr):
          if len(tr[x]) == 1:   # least leaf
            break
        v = tr[x][0]            # the unique neighbour of x
        code = code + f"{v} "
        tr[v].remove(x)         # remove x from neighbours of v
        tr.pop(x)               # remove x from the tree
      return code.strip()

    @staticmethod
    def tree_from_Prufer(code: str):
        """
        Creating a tree from a Prufer code.
        """
        tree = Graph()
        clist = [int(x) for x in code.strip().split()]   # code as a list of numbers
        n = len(clist) + 2                  # number of vertices
        vert = [*range(1, n+1)]             # list of numbers 1..n
        for v in vert:
          tree.add_vertex(v)
        for i in range(n-2):
          for x in vert:
            if not x in clist:    # x - least leaf
              break
          v = clist.pop(0)    # remove the first element from the code - the neighbour of x
          tree.add_edge((x, v))
          vert.remove(x)
        tree.add_edge(vert)
        return tree


    def connected_components(self):
      """
      Looks for connected components of undirected graph.
      Returns a list of its vertex-sets.
      Remark: the first element contains the set of all graph vertices
      """
      def DFS(u):
        """
        Deep first search (as internal method).
        """
        for w in self.graph[u]:
          if w not in VT[0]:      # w - not visited yet
            VT[0].add(w)          # already visited
            VT[-1].add(w)         # w - in the last connected component
            DFS(w)

      """
      VT - list of vertex sets VT[i] for i > 0 - is a vertex set of i-th connected component
      VT[0] - is a vertex set of the spanning forest (or during the algorithm list of visited vertices).
      """
      VT = [set([])]
      for v in self.graph:
        if v not in VT[0]:      # v is not visited
          VT[0].add(v)
          VT.append(set([v]))   # statring point of new conected component
          DFS(v)
      return VT

    def preorder(self, v, visited=None):
        # first we print a given vertex, then we traverse the subtree rooted in it
        """
        Prints the vertices of the graph in preorder traversal starting from vertex v.
        """
        if visited is None:
            visited = set()

        # Visit the current node
        visited.add(v)
        print(v, end=' ')

        # Recursively visit all unvisited neighbors
        for neighbor in self.graph[v]:
            if neighbor not in visited:
                self.preorder(neighbor, visited)


    def postorder(self, v, visited=None):
        # postorder — first we traverse the subtree rooted a given vertex, then we list the vertex
        """
        Prints the vertices of the graph in postorder traversal starting from vertex v.
        """
        if visited is None:
            visited = set()

        # Mark the current node as visited
        visited.add(v)

        # Recursively visit all unvisited neighbors first
        for neighbor in self.graph[v]:
            if neighbor not in visited:
                self.postorder(neighbor, visited)

        # Print the current node after visiting its neighbors
        print(v, end=' ')


    def connected_components_graphs(self):
        """
        Returns a list of Graph objects, each representing a connected component of the original graph.
        """
        components = self.connected_components()  # Get the list of connected components as vertex sets
        component_graphs = []

        for component in components[1:]:  # Skip the first element as it contains all vertices
            subgraph = Graph()            # Create a new Graph instance
            for vertex in component:
                subgraph.graph[vertex] = self.graph[vertex]
#           equivalent
#                subgraph.add_vertex(vertex)
#                for neighbor in self.graph[vertex]:
#                    if neighbor in component:
#                        subgraph.add_edge([vertex, neighbor])
            component_graphs.append(subgraph)

        return component_graphs


    @staticmethod
    def random_bipartite_graph(m, n, p):
        """
        Generates a random bipartite graph with m + n vertices.
        The two sets U and V have m and n vertices respectively.
        Each edge between a vertex in U and a vertex in V is included with probability p.
        """
        bipartite_graph = Graph()
        U = [f"U{i}" for i in range(1, m + 1)]  # Label vertices in set U as U1, U2, ..., Um
        V = [f"V{i}" for i in range(1, n + 1)]  # Label vertices in set V as V1, V2, ..., Vn

        # Add vertices to the graph
        for vertex in U + V:
            bipartite_graph.add_vertex(vertex)

        # Add edges between vertices in U and V with probability p
        for u in U:
            for v in V:
                if random.random() < p:
                    bipartite_graph.add_edge([u, v])

        return bipartite_graph


    def distance(self, v):
      """
      Computes distances from vertex v to each vertex reachable from v.
      It uses a BFS approach.
      Result is given as a dictionary of distances
      """
      dist = {v:0}    # starting point of a dictionary
      queue = [v]
      while len(queue) > 0:
        u = queue.pop(0)
        for w in self.graph[u]:
          if not w in dist:
            dist[w] = dist[u] + 1
            queue.append(w)
      return dist


    def ConnectedComponentsBFS(self):

        visited = set()  # keep track of visited vertices
        components = []  # store connected components

        for start_vertex in self.graph:

            if start_vertex not in visited:
                # if vertex was not visited, create a new component
                component = set()
                queue = deque([start_vertex])  # BFS queue

                while queue:
                    vertex = queue.popleft()    # take the vertex from the bottom

                    if vertex not in visited:
                        visited.add(vertex)
                        component.add(vertex)

                        # add all unvisited neighbors to the queue
                        for neighbor in self.graph[vertex]:

                            if neighbor not in visited:
                                queue.append(neighbor)

                # store the current connected component
                components.append(component)

        return components


    def floyd_warshall_small_world(self):

        vertices = self.vertices()
        n = len(vertices)
        vertex_index = {v: i for i, v in enumerate(vertices)}  # map vertices to indices

        dist = np.full((n, n), np.inf)  # set all distances to infinity in distance matrix
        np.fill_diagonal(dist, 0)       # distance from a vertex to itself is 0

        # populate initial distances based on edges
        for u in self.graph:
            for v in self.graph[u]:
                dist[vertex_index[u], vertex_index[v]] = 1  # edge weight is 1

        # Floyd-Warshall algorithm
        for k in range(n):
            for i in range(n):
                for j in range(n):
                    # update shortest path
                    dist[i, j] = min(dist[i, j], dist[i, k] + dist[k, j])

        # small-world metrics
        finite_distances = dist[np.isfinite(dist)]  # filter out infinite distances (unreachable nodes)
        diameter = np.max(finite_distances)         # maximum distance
        radius = np.min(np.max(dist, axis=1))       # minimum eccentricity
        avg_path_length = np.mean(finite_distances) # mean of all finite distances

        return radius, diameter, avg_path_length


    def topological_sort(self):
        visited = set()
        stack = []              # store the topologically sorted order
        on_stack = set()        # detect cycles
        self.has_cycle = False  # store cycle detection status

        # perform depth-first search for a single vertex
        def dfs(vertex):
            if vertex in on_stack:  # cycle detected
                self.has_cycle = True
                return
            if vertex in visited:  # skip already visited nodes
                return

            visited.add(vertex)
            on_stack.add(vertex)

            # visit all neighbors
            for neighbor in self.graph.get(vertex, []):
                if self.has_cycle:  # stop further exploration on detecting a cycle
                    return
                dfs(neighbor)

            on_stack.remove(vertex)
            stack.append(vertex)

        # perform DFS for all vertices
        for vertex in self.graph:
            if vertex not in visited:
                dfs(vertex)
                if self.has_cycle:  # stop further exploration globally on detecting a cycle
                    return []

        return stack[::-1]  # reverse the stack for topological order

## Use of code

#### Milgram's experiment &mdash; Small World Phenomenon

We will use graph in $G(n,p)$ (namely $G(2000, 1/300)$) as an illustration.

In [None]:
import sys

n = 100
p = 1/30
sys.setrecursionlimit(n+500)

rgraph = Graph.random_graph(n, p)
graph = rgraph.connected_components_graphs()[0]
print(len(graph.vertices()))

94


### Task 1

In [None]:
graph_data = {
    1: [2],
    2: [1, 3, 4],
    3: [2, 4],
    4: [2, 3],
    5: [6],
    6: [5]
}
graph = Graph(graph_data)

connected_components = graph.ConnectedComponentsBFS()
print("Connected Components:", connected_components)

Connected Components: [{1, 2, 3, 4}, {5, 6}]


### Task 2

In [None]:
# Method 1: BFS-based calculation
start_time = time.time()
md = {}
ecc = {}
for v in graph.vertices():
    dist = graph.distance(v)
    ecc[v] = max(dist.values())
    md[v] = sum(dist.values()) / len(dist.values())
radius_bfs = min(ecc.values())
diameter_bfs = max(ecc.values())
average_bfs = sum(md.values()) / len(md.values())
end_time = time.time()
bfs_time = end_time - start_time

print("BFS-based method results:")
print(f"Radius: {radius_bfs}, Diameter: {diameter_bfs}, Average Path Length: {average_bfs:.4f}")
print(f"Time taken: {bfs_time:.4f} seconds\n")

# Method 2: Floyd-Warshall algorithm
start_time = time.time()
radius_fw, diameter_fw, average_fw = graph.floyd_warshall_small_world()
end_time = time.time()
fw_time = end_time - start_time

print("Floyd-Warshall method results:")
print(f"Radius: {radius_fw}, Diameter: {diameter_fw}, Average Path Length: {average_fw:.4f}")
print(f"Time taken: {fw_time:.4f} seconds\n")

# Comparison
if bfs_time < fw_time:
    print("BFS-based method is faster.")
else:
    print("Floyd-Warshall method is faster.")

BFS-based method results:
Radius: 5, Diameter: 8, Average Path Length: 3.8205
Time taken: 0.0098 seconds

Floyd-Warshall method results:
Radius: 5.0, Diameter: 8.0, Average Path Length: 3.8205
Time taken: 0.8690 seconds

BFS-based method is faster.


### Task 3

In [None]:
graph = Graph()

graph.add_arc(('A', 'C'))
graph.add_arc(('B', 'C'))
graph.add_arc(('B', 'D'))
graph.add_arc(('C', 'E'))
graph.add_arc(('D', 'F'))
graph.add_arc(('E', 'F'))

topological_order = graph.topological_sort()
print("Topological Order:", topological_order)


Topological Order: ['B', 'D', 'A', 'C', 'E', 'F']
