#  Load Graph from CSV Files

This notebook reads all CSV files in the `data/` folder and builds an undirected graph.

### Rules:
- Each **node** is a unique value from the `full name` column.
- **Company names and all other columns are ignored.**
- If multiple full names appear in a file, they are treated as mutually connected.
- The graph is saved to `graph_1.pkl` for use in other notebooks.

🛠️ Now includes flexible column name detection to avoid issues if column names vary slightly.


In [223]:

import pandas as pd
import os
import re

# Define your folder path containing all CSV files
folder_path = "/Users/ritiksingh/Downloads/MFC/Graph_project/linkedin_data"  # replace with actual path
file_paths = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith(".csv")]

# Keywords used to identify companies
company_keywords = ['Pvt', 'Ltd', 'LLP', 'Inc', 'Technologies', 'Solutions', 'Company', 'Corp', 'Enterprises']

# Helper function to exclude company names
def is_not_company(name):
    return isinstance(name, str) and not any(keyword.lower() in name.lower() for keyword in company_keywords)

# Initialize containers for graph
nodes = set()
edges = set()

# Parse each CSV file
for file_path in file_paths:
    try:
        df = pd.read_csv(file_path)
        file_owner = os.path.basename(file_path).split(" - ")[-1].replace(".csv", "").strip()

        if not is_not_company(file_owner):
            continue

        nodes.add(file_owner)

        for name in df.iloc[:, 0]:  # First column assumed to contain names
            if is_not_company(name):
                cleaned_name = name.strip()
                nodes.add(cleaned_name)
                edges.add(tuple(sorted((file_owner, cleaned_name))))  # undirected edge

    except Exception as e:
        print(f"Failed to process {file_path}: {e}")

# Final graph stats
print(f"Total unique nodes: {len(nodes)}")
print(f"Total unique edges: {len(edges)}")


Total unique nodes: 29540
Total unique edges: 105674


In [224]:
import os
import pandas as pd
import pickle
from collections import defaultdict
import networkx as nx
import matplotlib.pyplot as plt

# Define your folder path containing all CSV files
folder_path = "/Users/ritiksingh/Downloads/MFC/MFC_project/linkedin_data"

# Keywords used to identify companies
company_keywords = ['Pvt', 'Ltd', 'LLP', 'Inc', 'Technologies', 'Solutions', 'Company', 'Corp', 'Enterprises']

# Helper function to exclude company names
def is_not_company(name):
    return isinstance(name, str) and not any(keyword.lower() in name.lower() for keyword in company_keywords)

def load_graph_from_csvs(folder_path):
    # Initialize graph as a defaultdict to store adjacency list
    graph = defaultdict(set)
    nodes = set()
    edges = set()

    for filename in os.listdir(folder_path):
        if filename.endswith('.csv'):
            path = os.path.join(folder_path, filename)
            try:
                df = pd.read_csv(path)
                # Extract file owner from filename (e.g., "something - John Doe.csv" -> "John Doe")
                file_owner = os.path.basename(filename).split(" - ")[-1].replace(".csv", "").strip()

                # Skip if file_owner is a company
                if not is_not_company(file_owner):
                    continue

                nodes.add(file_owner)

                # Assume first column contains names; adjust column name if needed
                name_column = df.columns[0]  # Use first column dynamically
                names = df[name_column].dropna().str.strip().tolist()

                for name in names:
                    if is_not_company(name):
                        cleaned_name = name.strip()
                        nodes.add(cleaned_name)
                        # Add undirected edge between file_owner and name
                        edges.add(tuple(sorted((file_owner, cleaned_name))))
                        # Update adjacency list
                        graph[file_owner].add(cleaned_name)
                        graph[cleaned_name].add(file_owner)

            except Exception as e:
                print(f"Error reading {filename}: {e}")

    return dict(graph), nodes, edges

# Load the graph from CSVs
graph, nodes, edges = load_graph_from_csvs(folder_path)

# Print basic info
print(f"Graph loaded with {len(nodes)} nodes and {len(edges)} edges.")

# Save the graph
with open("graph_1.pkl", "wb") as f:
    pickle.dump({"graph": graph, "nodes": nodes, "edges": edges}, f)

print("Graph saved to 'graph_1.pkl'")

# Preview a sample of the graph
sample_graph = {k: list(v)[:5] for k, v in list(graph.items())[:5]}
print("\nSample of graph (first 10 nodes and up to 5 connections each):")
print(pd.DataFrame.from_dict(sample_graph, orient='index'))

Graph loaded with 29540 nodes and 105674 edges.
Graph saved to 'graph_1.pkl'

Sample of graph (first 10 nodes and up to 5 connections each):
                            0               1              2  \
Yuvraj Bhati     Nikhil Yadav  Saksham Bharti  Pankaj  Yadav   
Ajay Kumar      Sauhard kumar   NEERAJ PARMAR  N. Arun Kumar   
Rahul Kumar     Mudasir Ahmad     Mahtab Alam   Nikhil Yadav   
Pushpraj Singh   Sharad Kumar   Khushi Kumari     Amar Kumar   
SANDEEP KUMAR   Sauhard kumar    Satish Mahto  NEERAJ PARMAR   

                               3                     4  
Yuvraj Bhati        Ranjan Singh          Satish Mahto  
Ajay Kumar      UPPARA MAITHREYI         Ujjval Baijal  
Rahul Kumar       Saksham Bharti            Raja Yadav  
Pushpraj Singh     Preeti Shukla  Ram Bhanwar Bhadiyar  
SANDEEP KUMAR      N. Arun Kumar      UPPARA MAITHREYI  


In [225]:
import pickle
import pandas as pd
import matplotlib.pyplot as plt

# Load the graph from saved file
with open("graph_1.pkl", "rb") as f:
    data = pickle.load(f)
    graph = data["graph"]  # Adjacency list
    nodes = data["nodes"]  # Set of nodes
    edges = data["edges"]  # Set of edges

print(f" Graph loaded with {len(graph)} nodes and {len(edges)} edges.")

# Calculate degree for each node
degree_dict = {node: len(neighbors) for node, neighbors in graph.items()}

# Convert to DataFrame
degree_df = pd.DataFrame(list(degree_dict.items()), columns=["Node", "Degree"])
degree_df.sort_values(by="Degree", ascending=False, inplace=True)

# Display top 5 nodes by degree
print("\nTop 5 nodes by degree:")
print(degree_df.head())

# Group nodes by degree for detailed analysis
grouped_by_degree = {}
for node, degree in degree_dict.items():
    grouped_by_degree.setdefault(degree, []).append(node)

# Convert to DataFrame for display
grouped_df = pd.DataFrame([
    {"Degree": degree, "Nodes": nodes, "Count": len(nodes)}
    for degree, nodes in sorted(grouped_by_degree.items(), reverse=True)
])

print("\nNodes grouped by degree:")
print(grouped_df)

# Find nodes with maximum and minimum degree
max_degree = degree_df["Degree"].max()
min_degree = degree_df["Degree"].min()

nodes_with_max_degree = degree_df[degree_df["Degree"] == max_degree]["Node"].tolist()
nodes_with_min_degree = degree_df[degree_df["Degree"] == min_degree]["Node"].tolist()

print(f"\nMaximum Degree: {max_degree}")
print(f"Nodes with Maximum Degree: {nodes_with_max_degree[:5]}")  # Show first 5
print(f"Minimum Degree: {min_degree}")
print(f"Nodes with Minimum Degree: {nodes_with_min_degree[:5]}")  # Show first 5

# Check if two nodes are adjacent
def are_adjacent(node1, node2, graph):
    """Returns True if node1 and node2 are directly connected."""
    return node2 in graph.get(node1, set())

# Example nodes (replace with actual names from your dataset)
node_a = "Aaditya Raj"
node_b = "Ajit Yadav"

if are_adjacent(node_a, node_b, graph):
    print(f" '{node_a}' and '{node_b}' are adjacent.")
else:
    print(f"'{node_a}' and '{node_b}' are NOT adjacent.")

# Check if the graph is complete
def is_complete_graph(graph):
    n = len(graph)
    for node, neighbors in graph.items():
        # In a complete graph, each node should be connected to all others
        if len(neighbors) != n - 1:
            return False
    return True

if is_complete_graph(graph):
    print(" The graph is COMPLETE — every node is connected to every other node.")
else:
    print(" The graph is NOT complete — some nodes are not directly connected.")

 Graph loaded with 29540 nodes and 105674 edges.

Top 5 nodes by degree:
                Node  Degree
110    Rohit Malviya    4370
7465     RAVI RAJPUT    4090
16125         Sheet1    4080
112     Ramraj Nagar    3800
11008  NIRMAL MEWADA    3561

Nodes grouped by degree:
     Degree                                              Nodes  Count
0      4370                                    [Rohit Malviya]      1
1      4090                                      [RAVI RAJPUT]      1
2      4080                                           [Sheet1]      1
3      3800                                     [Ramraj Nagar]      1
4      3561                                    [NIRMAL MEWADA]      1
..      ...                                                ...    ...
234       5  [Sukhamay  Singha Roy, Kumar Shubham, Ayush Br...    667
235       4  [Sachin Gupta, Antriksh Gupta (he/him), VIKASH...   1088
236       3  [Shivam Solanki, vinit chaudhary, BABLU KUMAR,...   1834
237       2  [Moksh Kandpal

#  Degree Analysis

This notebook loads the graph from `graph_1.pkl` and performs degree analysis.

### Tasks:
- Calculate the degree of each node (number of connections).
- Find the nodes with the maximum and minimum degree.
- Visualize the degree distribution using a histogram.


In [226]:
# calculate_degrees.py
import pickle
import pandas as pd

# Load the graph
with open("graph_1.pkl", "rb") as f:
    data = pickle.load(f)
    graph = data["graph"]

# Calculate degree for each node
degree_dict = {node: len(neighbors) for node, neighbors in graph.items()}

# Convert to DataFrame
degree_df = pd.DataFrame(list(degree_dict.items()), columns=["Node", "Degree"])
degree_df.sort_values(by="Degree", ascending=False, inplace=True)

# Display top 5 nodes by degree
print("\nTop 5 nodes by degree:")
print(degree_df.head())


Top 5 nodes by degree:
                Node  Degree
110    Rohit Malviya    4370
7465     RAVI RAJPUT    4090
16125         Sheet1    4080
112     Ramraj Nagar    3800
11008  NIRMAL MEWADA    3561


In [227]:
# group_nodes_by_degree.py
import pickle
import pandas as pd

# Load the graph
with open("graph_1.pkl", "rb") as f:
    data = pickle.load(f)
    graph = data["graph"]

# Calculate degree for each node
degree_dict = {node: len(neighbors) for node, neighbors in graph.items()}

# Group nodes by degree
grouped_by_degree = {}
for node, degree in degree_dict.items():
    grouped_by_degree.setdefault(degree, []).append(node)

# Convert to DataFrame for display
grouped_df = pd.DataFrame([
    {"Degree": degree, "Nodes": nodes, "Count": len(nodes)}
    for degree, nodes in sorted(grouped_by_degree.items(), reverse=False)
])

# Display results
print("\nNodes grouped by degree:")
print(grouped_df)


Nodes grouped by degree:
     Degree                                              Nodes  Count
0         1  [Vijay Thakur, MAJOR GENERAL AJAY PAL SINGH, G...  18502
1         2  [Moksh Kandpal, Sadhana Chaudhary, Divyanshi O...   4048
2         3  [Shivam Solanki, vinit chaudhary, BABLU KUMAR,...   1834
3         4  [Sachin Gupta, Antriksh Gupta (he/him), VIKASH...   1088
4         5  [Sukhamay  Singha Roy, Kumar Shubham, Ayush Br...    667
..      ...                                                ...    ...
234    3561                                    [NIRMAL MEWADA]      1
235    3800                                     [Ramraj Nagar]      1
236    4080                                           [Sheet1]      1
237    4090                                      [RAVI RAJPUT]      1
238    4370                                    [Rohit Malviya]      1

[239 rows x 3 columns]


In [228]:
# find_max_min_degrees.py
import pickle
import pandas as pd

# Load the graph
with open("graph_1.pkl", "rb") as f:
    data = pickle.load(f)
    graph = data["graph"]

# Calculate degree for each node
degree_dict = {node: len(neighbors) for node, neighbors in graph.items()}

# Convert to DataFrame
degree_df = pd.DataFrame(list(degree_dict.items()), columns=["Node", "Degree"])
degree_df.sort_values(by="Degree", ascending=False, inplace=True)

# Find nodes with maximum and minimum degree
max_degree = degree_df["Degree"].max()
min_degree = degree_df["Degree"].min()

nodes_with_max_degree = degree_df[degree_df["Degree"] == max_degree]["Node"].tolist()
nodes_with_min_degree = degree_df[degree_df["Degree"] == min_degree]["Node"].tolist()

# Display results
print(f"\nMaximum Degree: {max_degree}")
print(f"Nodes with Maximum Degree: {nodes_with_max_degree[:5]}")  # Show first 5
print(f"Minimum Degree: {min_degree}")
print(f"Nodes with Minimum Degree: {nodes_with_min_degree[:10]}")  # Show first 5


Maximum Degree: 4370
Nodes with Maximum Degree: ['Rohit Malviya']
Minimum Degree: 1
Nodes with Minimum Degree: ['Asif Shah', 'Nishanth B Jain', 'Souveek Roy', 'Adarsh Kumar, Ph. D.', 'Nitin Arora, Ph. D.', 'Nitya Nandini Garg', 'Arpit Lohani', 'Trandali Kashyap', 'Syamlal S S', 'Sayantan Maiti']


#  Completeness Checker

This notebook checks whether the graph is complete.

### A complete graph means:
Every node is directly connected to every other node.


In [229]:
# check_adjacency.py
import pickle

# Load the graph
with open("graph_1.pkl", "rb") as f:
    data = pickle.load(f)
    graph = data["graph"]

# Function to check adjacency
def are_adjacent(node1, node2, graph):
    """Returns True if node1 and node2 are directly connected."""
    return node2 in graph.get(node1, set())

# Example nodes (replace with actual names from your dataset)
node_a = "Aman Singh"
node_b = "Ritik Singh"

# Check and display result
if are_adjacent(node_a, node_b, graph):
    print(f" '{node_a}' and '{node_b}' are adjacent.")
else:
    print(f" '{node_a}' and '{node_b}' are NOT adjacent.")

 'Aman Singh' and 'Ritik Singh' are NOT adjacent.


In [230]:
# check_completeness.py
import pickle

# Load the graph
with open("graph_1.pkl", "rb") as f:
    data = pickle.load(f)
    graph = data["graph"]

# Function to check if graph is complete
def is_complete_graph(graph):
    n = len(graph)
    for node, neighbors in graph.items():
        # In a complete graph, each node should be connected to all others
        if len(neighbors) != n - 1:
            return False
    return True

# Check and display result
if is_complete_graph(graph):
    print(" The graph is COMPLETE — every node is connected to every other node.")
else:
    print(" The graph is NOT complete — some nodes are not directly connected.")

 The graph is NOT complete — some nodes are not directly connected.


In [231]:
# Load the graph
with open("graph_1.pkl", "rb") as f:
    graph = pickle.load(f)

print(f" Graph loaded with {len(graph)} nodes.")

 Graph loaded with 3 nodes.


In [232]:
# Load the graph
with open("graph_1.pkl", "rb") as f:
    data = pickle.load(f)
    graph = data["graph"]  # Extract adjacency list
    nodes = data["nodes"]
    edges = data["edges"]

print(f" Graph loaded with {len(graph)} nodes and {len(edges)} edges.")

 Graph loaded with 29540 nodes and 105674 edges.


In [233]:
import pickle
from collections import deque

# Load the graph
with open("graph_1.pkl", "rb") as f:
    data = pickle.load(f)
    graph = data["graph"]  # Extract adjacency list
    nodes = data["nodes"]
    edges = data["edges"]

print(f" Graph loaded with {len(graph)} nodes and {len(edges)} edges.")

def is_connected(graph):
    if not graph:
        return True
    
    visited = set()
    queue = deque()
    
    # Start BFS from any node
    start_node = next(iter(graph))
    queue.append(start_node)
    visited.add(start_node)
    
    while queue:
        current = queue.popleft()
        for neighbor in graph[current]:
            if neighbor not in visited:
                visited.add(neighbor)
                queue.append(neighbor)
    
    return len(visited) == len(graph)

if is_connected(graph):
    print(" The graph is CONNECTED — there is a path between all pairs of nodes.")
else:
    print("The graph is NOT connected — some nodes cannot be reached from others.")

 Graph loaded with 29540 nodes and 105674 edges.
 The graph is CONNECTED — there is a path between all pairs of nodes.


In [234]:
import pickle
from collections import deque

# Load the graph
with open("graph_1.pkl", "rb") as f:
    data = pickle.load(f)
    graph = data["graph"]  # Extract adjacency list
    nodes = data["nodes"]
    edges = data["edges"]

print(f"Graph loaded with {len(graph)} nodes and {len(edges)} edges.")

#  Fast BFS-based path finder
def bfs_shortest_path(graph, start, end):
    if start not in graph or end not in graph:
        return None
    
    visited = set()
    queue = deque([[start]])

    while queue:
        path = queue.popleft()
        node = path[-1]

        if node == end:
            return path

        if node not in visited:
            visited.add(node)
            for neighbor in graph[node]:
                if neighbor not in visited:
                    queue.append(path + [neighbor])
    return None

#  Walk/Trail/Path checks
def is_walk(graph, sequence):
    for i in range(len(sequence) - 1):
        if sequence[i+1] not in graph.get(sequence[i], set()):
            return False
    return True

def is_trail(graph, sequence):
    seen_edges = set()
    for i in range(len(sequence) - 1):
        edge = tuple(sorted((sequence[i], sequence[i+1])))
        if edge in seen_edges or sequence[i+1] not in graph.get(sequence[i], set()):
            return False
        seen_edges.add(edge)
    return True

def is_path(graph, sequence):
    return is_trail(graph, sequence) and len(set(sequence)) == len(sequence)

#  Main function
def check_path(graph, node1, node2):
    path = bfs_shortest_path(graph, node1, node2)

    if not path:
        print("No path found between the given nodes.")
        return

    print(" Path found:")
    print(" → ".join(path))
    print(" Is Walk?", is_walk(graph, path))
    print(" Is Trail?", is_trail(graph, path))
    print(" Is Path?", is_path(graph, path))

# 🧪 Test here
check_path(graph, "Aaditya Raj", "Ajit Yadav")  # Replace with real names

Graph loaded with 29540 nodes and 105674 edges.
 Path found:
Aaditya Raj → Ajit Yadav
 Is Walk? True
 Is Trail? True
 Is Path? True


In [235]:
import pickle
from collections import deque

# Load the graph
with open("graph_1.pkl", "rb") as f:
    data = pickle.load(f)
    graph = data["graph"]  # Extract adjacency list
    nodes = data["nodes"]
    edges = data["edges"]

print(f" Graph loaded with {len(graph)} nodes and {len(edges)} edges.")

def is_tree(graph):
    visited = set()
    parent = {}
    start = next(iter(graph))
    
    def dfs(node, parent_node):
        visited.add(node)
        for neighbor in graph[node]:
            if neighbor not in visited:
                parent[neighbor] = node
                if not dfs(neighbor, node):
                    return False
            elif neighbor != parent_node:
                return False
        return True

    if not dfs(start, None):
        return False

    return len(visited) == len(graph)

def count_leaf_nodes(graph):
    return sum(1 for node in graph if len(graph[node]) == 1)

def is_binary_tree(graph):
    return all(len(neighbors) <= 3 for neighbors in graph.values())  # 2 children + 1 parent max

def compute_tree_height(graph, root):
    visited = set()
    queue = deque([(root, 0)])
    max_depth = 0

    while queue:
        node, depth = queue.popleft()
        visited.add(node)
        max_depth = max(max_depth, depth)
        for neighbor in graph[node]:
            if neighbor not in visited:
                queue.append((neighbor, depth + 1))

    return max_depth

# Run Tree Analysis
if is_tree(graph):
    print(" The graph is a TREE.")
    
    leaf_count = count_leaf_nodes(graph)
    print(f"Number of leaf nodes: {leaf_count}")

    if is_binary_tree(graph):
        print(" It is a BINARY TREE.")
    else:
        print(" It is NOT a binary tree.")

    root_node = next(iter(graph))
    height = compute_tree_height(graph, root_node)
    print(f" Height of the tree (from '{root_node}'): {height}")

else:
    print(" The graph is NOT a tree.")

 Graph loaded with 29540 nodes and 105674 edges.
 The graph is NOT a tree.


In [236]:
import pickle

# Load the graph
with open("graph_1.pkl", "rb") as f:
    data = pickle.load(f)
    graph = data["graph"]  # Extract adjacency list
    nodes = data["nodes"]
    edges = data["edges"]

print(f" Graph loaded with {len(graph)} nodes and {len(edges)} edges.")

def to_edge_list(graph):
    edges = set()
    for u in graph:
        for v in graph[u]:
            if (v, u) not in edges:
                edges.add((u, v))
    return list(edges)

edge_list = to_edge_list(graph)
print(f"Edge List (Total {len(edge_list)} edges):\n", edge_list[:5])  # Show first 5

import numpy as np

def to_adjacency_matrix(graph):
    nodes = sorted(graph.keys())
    idx = {node: i for i, node in enumerate(nodes)}
    n = len(nodes)
    matrix = np.zeros((n, n), dtype=int)

    for u in graph:
        for v in graph[u]:
            i, j = idx[u], idx[v]
            matrix[i][j] = 1

    return matrix, nodes

adj_matrix, node_list = to_adjacency_matrix(graph)
print("Adjacency Matrix Shape:", adj_matrix.shape)
print(adj_matrix[:5, :5])  # Show top-left corner

from collections import defaultdict

def edge_list_to_adj_list(edge_list):
    adj = defaultdict(set)
    for u, v in edge_list:
        adj[u].add(v)
        adj[v].add(u)
    return dict(adj)

converted_adj_list = edge_list_to_adj_list(edge_list)
print("Converted Adjacency List (first 5 entries):")
for i, (k, v) in enumerate(converted_adj_list.items()):
    if i >= 5:
        break
    print(f"{k}: {list(v)}")

 Graph loaded with 29540 nodes and 105674 edges.
Edge List (Total 105674 edges):
 [('Anand Pandey', 'Karthik A S'), ('Suyash Yadav', 'Himendra Singh'), ('Pinkee Singh', 'NIRMAL MEWADA'), ('Shilpi Shaw', 'Priya Mahatha'), ('RAVI RAJPUT', 'Nikhil Pandey')]
Adjacency Matrix Shape: (29540, 29540)
[[0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]
 [0 0 0 0 0]]
Converted Adjacency List (first 5 entries):
Anand Pandey: ['Anjanee Kumar', 'Amar Kumar', 'Shivam Prajapati', 'Preeti Shukla', 'Ram Bhanwar Bhadiyar', 'Nikhil Naik', 'Purushottam Modi', 'Prachi Dhakad', 'Ritik Singh', 'Greesh Raj Patairiya', 'Himanshu Kumar', 'Deepali Tomar', 'Nilankar Deb', 'Shahin Akhter', 'Abhishek Manjhi', 'TALLA SAI SURYA', 'Gaurav  Tiwari', 'Priya Saini', 'VIKAS KUMAR BIND', 'ANITA RAWAT', 'Shakti Kumar Mishra', 'Monu Bhargav', 'Stuti Singh', 'Ajaz Ul Haq', 'Aryan Saini', 'Chisa G Momin', 'SHAINO  SAJIMON', 'Suraj Singh', 'Rahul Kumar Verma', 'Mr_Saurabh Mishra', 'Durga Prasad', 'Suresh Singh Panwar', 'Rohit K

In [237]:
import pickle

# Load the graph
with open("graph_1.pkl", "rb") as f:
    data = pickle.load(f)
    graph = data["graph"]  # Extract adjacency list
    nodes = data["nodes"]
    edges = data["edges"]

print(f"Graph loaded with {len(graph)} nodes and {len(edges)} edges.")

from collections import defaultdict

def dfs_spanning_tree_iterative(graph, start):
    visited = set()
    tree = defaultdict(set)
    stack = [start]

    while stack:
        node = stack.pop()
        if node not in visited:
            visited.add(node)
            for neighbor in graph[node]:
                if neighbor not in visited:
                    tree[node].add(neighbor)
                    tree[neighbor].add(node)
                    stack.append(neighbor)
                    
    return dict(tree)

# Use the iterative DFS
start_node = next(iter(graph))
spanning_tree = dfs_spanning_tree_iterative(graph, start_node)

print(f" Spanning Tree has {len(spanning_tree)} nodes.")
for i, (k, v) in enumerate(spanning_tree.items()):
    if i >= 5:
        break
    print(f"{k}: {list(v)}")

Graph loaded with 29540 nodes and 105674 edges.
 Spanning Tree has 29540 nodes.
Yuvraj Bhati: ['Nikhil Yadav', 'Saksham Bharti', 'Pankaj  Yadav', 'Ranjan Singh', 'Satish Mahto', 'Pankaj Rishi', 'Preeti Shukla', 'Ujjval Baijal', 'Ram Bhanwar Bhadiyar', 'Sunny Kumar', 'Ayush Yadav', 'Abhishek Tripathi', 'Challa Trivedh Kumar', 'Raushan Kumar', 'Sandeep kumar', 'Ashwin Yadav', 'Rajiv Kumar', 'Aakash Kumar', 'Yash Chittora', 'Shreyank Sthavaramath', 'Mani Kumar', 'Nallamothu Arun Kumar', 'Himanshu Kumar', 'Sneha Shaw', 'Anamika Kumari', 'Bhaskar Mahato', 'Gaurav  Tiwari', 'Shravan Ram', 'Bharat Suthar', 'Aman Verma', 'Aarti Patil', 'Yash Yadav', 'Pawan Kushwah', 'Hariom Parmar', 'Rohit Malviya', 'Achal Agrawal, PhD', 'Rani Kumari', 'Aryan Saini', 'Nikhil Chaurasiya', 'Naman Damami', 'CHANDAN GIRI', 'Rahul Kumar', 'Afzal Raza', 'Arpita Tripathi', 'Rohit Kumar', 'Rakshita K Biradar', 'JAMAL AKHTAR', 'Prabhat Patidar', 'Prerana  Rajnag', 'Vineela Deepti Naidu', 'Pushpraj Singh', 'Mehtab Alam'

In [238]:
import pickle
from collections import deque

# Load the graph
with open("graph_1.pkl", "rb") as f:
    data = pickle.load(f)
    graph = data["graph"]  # Extract adjacency list
    nodes = data["nodes"]
    edges = data["edges"]

print(f" Graph loaded with {len(graph)} nodes and {len(edges)} edges.")

#  DFS-based all paths finder with limits
def find_all_paths(graph, start, end, path=None, max_paths=100, max_length=10):
    if path is None:
        path = [start]
    
    if start not in graph or end not in graph:
        return []
    
    if len(path) > max_length:  # Stop if path exceeds max_length
        return []
    
    paths = []
    
    if start == end:
        paths.append(path)
        return paths
    
    for neighbor in graph[start]:
        if neighbor not in path:  # Avoid cycles
            if len(paths) >= max_paths:  # Stop if max_paths reached
                return paths
            new_paths = find_all_paths(graph, neighbor, end, path + [neighbor], max_paths, max_length)
            paths.extend(new_paths)
    
    return paths

#  Walk/Trail/Path checks
def is_walk(graph, sequence):
    for i in range(len(sequence) - 1):
        if sequence[i+1] not in graph.get(sequence[i], set()):
            return False
    return True

def is_trail(graph, sequence):
    seen_edges = set()
    for i in range(len(sequence) - 1):
        edge = tuple(sorted((sequence[i], sequence[i+1])))
        if edge in seen_edges or sequence[i+1] not in graph.get(sequence[i], set()):
            return False
        seen_edges.add(edge)
    return True

def is_path(graph, sequence):
    return is_trail(graph, sequence) and len(set(sequence)) == len(sequence)

#  Main function
def check_path(graph, node1, node2, max_paths=100, max_length=10):
    paths = find_all_paths(graph, node1, node2, max_paths=max_paths, max_length=max_length)

    if not paths:
        print(f" No paths found between {node1} and {node2}.")
        return

    print(f" Found {len(paths)} path(s) between {node1} and {node2} (limited to {max_paths} paths, max length {max_length}):\n")
    for idx, path in enumerate(paths, 1):
        print(f" Path {idx} (Length: {len(path)-1} edges):")
        print("  Nodes in path:")
        for i, node in enumerate(path, 1):
            print(f"    {i}. {node}")
        print("  Path sequence: " + " → ".join(path))
        print("  Properties:")
        print(f"     Is Walk? {is_walk(graph, path)}")
        print(f"     Is Trail? {is_trail(graph, path)}")
        print(f"     Is Path? {is_path(graph, path)}")
        print()

# 🧪 Test here
check_path(graph, "Aaditya Raj", "Ajit Yadav", max_paths=10, max_length=5)  # Replace with real names

 Graph loaded with 29540 nodes and 105674 edges.
 Found 10 path(s) between Aaditya Raj and Ajit Yadav (limited to 10 paths, max length 5):

 Path 1 (Length: 4 edges):
  Nodes in path:
    1. Aaditya Raj
    2. Mudasir Ahmad
    3. VISHAL BHARDWAJ
    4. Preeti Shukla
    5. Ajit Yadav
  Path sequence: Aaditya Raj → Mudasir Ahmad → VISHAL BHARDWAJ → Preeti Shukla → Ajit Yadav
  Properties:
     Is Walk? True
     Is Trail? True
     Is Path? True

 Path 2 (Length: 4 edges):
  Nodes in path:
    1. Aaditya Raj
    2. Mudasir Ahmad
    3. VISHAL BHARDWAJ
    4. Prachi Dhakad
    5. Ajit Yadav
  Path sequence: Aaditya Raj → Mudasir Ahmad → VISHAL BHARDWAJ → Prachi Dhakad → Ajit Yadav
  Properties:
     Is Walk? True
     Is Trail? True
     Is Path? True

 Path 3 (Length: 4 edges):
  Nodes in path:
    1. Aaditya Raj
    2. Mudasir Ahmad
    3. VISHAL BHARDWAJ
    4. Himanshu Kumar
    5. Ajit Yadav
  Path sequence: Aaditya Raj → Mudasir Ahmad → VISHAL BHARDWAJ → Himanshu Kumar → Ajit Yada