# adjacency file 

In [9]:
import os  # Handles folder and file operations

# Folder where all cleaned CSV files are kept
path_to_clean_profiles = "clean"

# This dictionary will store our social network
# Think of it like: "who is connected to whom"
social_web = {}

#  Get all file names in the folder
list_of_cleaned_files = os.listdir(path_to_clean_profiles)

# Loop through each file (one per person)
for profile_csv in list_of_cleaned_files:
    
    #  We're only interested in .csv files
    if profile_csv.endswith(".csv"):
        
        #  Person’s name is the file name without ".csv"
        central_human = profile_csv[:-4]

        #  Full path to that file
        full_path_to_file = path_to_clean_profiles + "/" + profile_csv

        #  Read file line-by-line using UTF-8 to avoid weird encoding bugs
        with open(full_path_to_file, "r", encoding="utf-8") as opened_file:
            all_lines = opened_file.readlines()

        #  Start with an empty list of people this person is connected to
        human_connections = []

        #  Start reading from the second line (skip header)
        for row_number in range(1, len(all_lines)):
            single_line = all_lines[row_number]

            # Break the line by commas (CSV = Comma Separated Values)
            name_parts = single_line.strip().split(",")

            if len(name_parts) == 0:
                continue  # Skip empty lines just in case

            possible_friend = name_parts[0]

            #  Manually strip leading/trailing spaces
            while len(possible_friend) > 0 and possible_friend[0] == " ":
                possible_friend = possible_friend[1:]
            while len(possible_friend) > 0 and possible_friend[-1] == " ":
                possible_friend = possible_friend[:-1]

            #  Add to connections only if it's not the same person and not a duplicate
            if possible_friend != "" and possible_friend != central_human:
                if possible_friend not in human_connections:
                    human_connections.append(possible_friend)

        #  Add the person to our social web if they’re not already there
        if central_human not in social_web:
            social_web[central_human] = []

        #  Add each friend to their list of connections
        for friend in human_connections:
            if friend not in social_web[central_human]:
                social_web[central_human].append(friend)

        #  Also, connect the friend back to the central person (undirected graph)
        for friend in human_connections:
            if friend not in social_web:
                social_web[friend] = []
            if central_human not in social_web[friend]:
                social_web[friend].append(central_human)

#  Time to write the whole web into a new CSV
final_output_path = path_to_clean_profiles + "/adjacency_list.csv"
with open(final_output_path, "w", encoding="utf-8") as final_csv:
    final_csv.write("Person,Connections\n")
    
    # Write each person and their list of friends
    for individual in social_web:
        line_to_write = individual + ","
        for index in range(len(social_web[individual])):
            line_to_write += social_web[individual][index]
            if index != len(social_web[individual]) - 1:
                line_to_write += "; "  # Use semicolon for better separation
        final_csv.write(line_to_write + "\n")

print(" Adjacency list saved to:", final_output_path)


 Adjacency list saved to: clean/adjacency_list.csv


# degree 

In [19]:
import pandas as pd
import os

# Ask the user for the file name (without full path)
file_name = "adjacency_list.csv"  # You can also use input("Enter file name: ") if you want to type it each time

# Construct the full path assuming the file is in the current working directory
input_csv_path = os.path.join(os.getcwd(), file_name)

# Check if the file exists
if not os.path.exists(input_csv_path):
    print(" File not found:", input_csv_path)
else:
    try:
        # Load the CSV file
        data = pd.read_csv(input_csv_path)

        # Check if required columns are present
        if 'Person' not in data.columns or 'Connections' not in data.columns:
            print("CSV must have 'Person' and 'Connections' columns.")
        else:
            # Calculate degree by splitting the Connections column on semicolon
            data['Degree'] = data['Connections'].fillna('').apply(
                lambda x: len([name.strip() for name in x.split(';') if name.strip()])
            )

            # Keep only 'Person' and 'Degree' columns
            degree_data = data[['Person', 'Degree']]

            # Output file path in the same directory
            output_csv_path = os.path.join(os.getcwd(), "degree.csv")
            degree_data.to_csv(output_csv_path, index=False)
            print(" Degree CSV saved at:", output_csv_path)

    except Exception as e:
        print(" Error processing the file:", e)


 Degree CSV saved at: c:\Users\Prachi\Downloads\mfc_project\degree.csv


# 3 random waalk and pruned path

In [7]:
import csv  # For reading the CSV file
import random  # To randomly choosing adjacet node from connections

# Function to get the graph data from a CSV file
def get_network(file_path):
    # Dictionary to store the graph in adjacency list form
    graph_dic = {}
    # open the csv and read
    with open(file_path, 'r', encoding='utf-8') as f:
        csv_reader = csv.DictReader(f)  # Read CSV rows into a dictionary
        # loop through ech row in that file 
        for line in csv_reader:
            person_name = line['Person'].strip()  # remove extra space from person name
            connection_list = line['Connections'].split(';')  # Split adjacent person fro connections using ;
            # Remove extra space or empty name 
            clean_connection = [friend.strip() for friend in connection_list if friend.strip()]
            # now add the person as key and connectiosn as value in the graph_dic
            graph_dic[person_name] = clean_connection
    return graph_dic  # return the final dictionary with cleaned data .

# Function to pick one neighbor randomly from a person's connections
def pick_random_connection(current_person, social_map):
    connections = social_map.get(current_person, [])  # Get the connections (empty if none)
    if not connections:
        return None  # No friends to pick from — dead end
    return random.choice(connections)  # Randomly pick one of the available connections

# Function to perform a random walk through the network
def random_path_walk(graph_dic, start, target, limit=100):
    # Start from the starting person
    position = start
    # List to keep track of our journey through the network
    journey = [position]

    # Now we'll "walk" through the network up to 'limit' steps
    for step in range(limit):
        # Pick a random connection from the current person
        position = pick_random_connection(position, graph_dic)
        if position is None:
            break  # No friends left to visit — so stop walking
        # Add the new person to our journey
        journey.append(position)
        # If we found our target person, stop the walk early and return the journey
        if position == target:
            return journey
    # If we never found the target within the step limit, return an empty list
    return []

# Function to try walking multiple times, hoping for a lucky run
def attempt_multiple_walks(graph_dic, start, target, tries=15, limit=100):
    # Try up to 'tries' number of times
    for trial in range(tries):
        print(f"Attempt number {trial+1}...")  # Keep the user informed
        trial_walk = random_path_walk(graph_dic, start, target, limit)
        if trial_walk:
            return trial_walk  # As soon as we get a successful walk, return it
    return []  # If none of the attempts worked, return an empty list

# Function to clean up the journey by removing any cycles/loops
def prune(route):
    # Dictionary to keep track of first occurrence positions of each person
    visited = {}
    # This will be our final cleaned-up journey
    cleaned_route = []

    for i in route:
        # If we already visited this person, we found a loop
        if i in visited:
            # Remove everything after their first appearance to break the cycle
            cleaned_route = cleaned_route[:visited[i]+1]
        else:
            # Remember where we first saw this person
            visited[i] = len(cleaned_route)
            cleaned_route.append(i)

    # Return the pruned, loop-free journey
    return cleaned_route

# Note: Not using 'if __name__ == "__main__":' — this will run as soon as loaded
# Quick scripts sometimes work this way, though not ideal for larger projects

# File path to the adjacency list (CSV graph)
file_path = r"adjacency_list.csv"

# Load the graph/network from the file
network = get_network(file_path)
# Prompt the user to enter the start and end student names
start_input = input("Enter the START student name (or press Enter to skip): ").strip()
end_input = input("Enter the END student name (or press Enter to skip): ").strip()

# Check if both inputs are provided
if not start_input or not end_input:
    print(" Code executed successfully. No input provided.")
else:
    # Proceed with the existing logic
    if start_input not in network:
        print(f"'{start_input}' doesn't seem to be in the network.")
    elif end_input not in network:
        print(f"'{end_input}' isn't in the network either.")
    else:
        # Try multiple random walks and see if any of them get us from start to end
        final_walk = attempt_multiple_walks(network, start_input, end_input)

        # If we never found a path, let the user know
        if not final_walk:
            print("No walk found, try again.")
        else:
            # If a path was found, display it
            print("Random Path:", final_walk)

            # Now clean it up to remove any loops
            cleaned_path = prune(final_walk)
            print("Pruned path (removed cycles):", cleaned_path)


 Code executed successfully. No input provided.


# estimating data

In [4]:
import networkx as nx  # Used for graph operations like nodes and edges
import random          # Used to pick random items like people and neighbors
import pandas as pd    # Used to read CSV data into a table-like structure

# Function to read a graph from a CSV file
def read_graph(file_path):
    graph = nx.Graph()  # Create an empty graph
    try:
        data = pd.read_csv(file_path, sep=',', encoding='utf-8')  # Read CSV
    except Exception as error:
        print("Error reading CSV file:", error)
        return None
    
    # Go through each row in the CSV file
    for _, row in data.iterrows():
        person = str(row['Person']).strip()  # Get the main person
        graph.add_node(person)  # Add the person to the graph as a node
        
        # Check if person has any connections listed
        if pd.notna(row['Connections']):
            # Split all connections by semicolon
            friends = str(row['Connections']).split(';')
            for friend in friends:
                friend = friend.strip()  # Clean up extra spaces
                if friend and friend != 'nan':
                    graph.add_node(friend)  # Add friend node
                    graph.add_edge(person, friend)  # Connect both nodes
    if graph.number_of_nodes() == 0:
        print("Graph is empty. Please check the CSV format.")
        return None
    return graph

# Function to perform a random walk between two people
def random_walk(graph, start_person, end_person, max_steps=1000):
    path = [start_person]  # Start path with the first person
    current = start_person
    steps = 0

    # Keep walking until we reach the target person or reach the step limit
    while current != end_person and steps < max_steps:
        neighbors = list(graph.neighbors(current))  # Get friends of current person
        if not neighbors:
            return []  # If no neighbors, dead end
        next_friend = neighbors[random.randint(0, len(neighbors) - 1)]  # Pick a random neighbor
        path.append(next_friend)  # Add to path
        current = next_friend  # Move to the next person
        steps += 1
    return path if current == end_person else []  # Return only if path reaches target

# Function to shorten the path using shortest path logic
def shorten_path(graph, full_path):
    if not full_path:
        return []
    try:
        # Try to get the shortest path between start and end of the full path
        best_path = nx.shortest_path(graph, source=full_path[0], target=full_path[-1])
        if len(full_path) > 2 * len(best_path):
            return best_path  # Use the short one if original is way longer
    except:
        return []  # If no path exists

    # Otherwise, manually clean up repeated nodes in the path
    cleaned = [full_path[0]]
    for i in range(1, len(full_path)):
        if full_path[i] != cleaned[-1]:
            cleaned.append(full_path[i])
    return cleaned

# Custom function to calculate statistics like mean, median, mode, max, and min
def calculate_stats(length_list):
    stats = {}

    # Mean
    total = 0
    for val in length_list:
        total += val
    stats["mean"] = total / len(length_list)

    # Median
    sorted_list = sorted(length_list)
    middle = len(length_list) // 2
    if len(length_list) % 2 == 1:
        stats["median"] = sorted_list[middle]
    else:
        stats["median"] = (sorted_list[middle - 1] + sorted_list[middle]) / 2

    # Mode
    count_dict = {}
    for val in length_list:
        if val not in count_dict:
            count_dict[val] = 0
        count_dict[val] += 1
    highest_count = 0
    mode_value = length_list[0]
    for key in count_dict:
        if count_dict[key] > highest_count:
            highest_count = count_dict[key]
            mode_value = key
    stats["mode"] = mode_value

    # Min and Max
    min_val = length_list[0]
    max_val = length_list[0]
    for val in length_list:
        if val < min_val:
            min_val = val
        if val > max_val:
            max_val = val
    stats["min"] = min_val
    stats["max"] = max_val

    return stats

# Function to perform multiple random walks and print results
def estimate_random_walks(file_path, max_pairs=1000, max_walks=100):
    graph = read_graph(file_path)
    if graph is None:
        return

    people = list(graph.nodes())
    total_people = len(people)
    print("Graph has", total_people, "people and", graph.number_of_edges(), "connections")

    if nx.is_connected(graph):
        print("Graph is fully connected.")
    else:
        print("Graph is not fully connected.")

    pair_count = 0         # Number of person pairs we tried
    walk_count = 0         # Number of successful walks we found
    full_walks = []        # Stores all the original walks
    short_walks = []       # Stores the cleaned-up (shortened) versions
    visited_people = set() # To track how many unique people were involved

    while pair_count < max_pairs and walk_count < max_walks:
        p1 = people[random.randint(0, total_people - 1)]
        p2 = people[random.randint(0, total_people - 1)]
        if p1 == p2:
            continue  # Skip if both persons are the same
        pair_count += 1

        for _ in range(10):  # Try 10 times for this pair
            walk = random_walk(graph, p1, p2)
            if walk:
                short = shorten_path(graph, walk)
                if short:
                    full_walks.append(walk)
                    short_walks.append(short)
                    for name in walk:
                        visited_people.add(name)
                    walk_count += 1
                    break  # Move to next pair

    # Display the results
    print("\n--- Random Walk Estimation Results ---")
    print("Total Pairs Tried:", pair_count)
    print("Successful Walks Found:", walk_count)

    if full_walks:
        original_lengths = [len(walk) for walk in full_walks]
        cleaned_lengths = [len(walk) for walk in short_walks]

        stats_full = calculate_stats(original_lengths)
        stats_clean = calculate_stats(cleaned_lengths)

        print("\nOriginal Walk Stats:")
        print("Mean Length:", round(stats_full["mean"], 2))
        print("Median Length:", round(stats_full["median"], 2))
        print("Mode Length:", stats_full["mode"])
        print("Max Length:", stats_full["max"])
        print("Min Length:", stats_full["min"])

        print("\nCleaned Walk Stats:")
        print("Mean Length:", round(stats_clean["mean"], 2))
        print("Median Length:", round(stats_clean["median"], 2))
        print("Mode Length:", stats_clean["mode"])
        print("Max Length:", stats_clean["max"])
        print("Min Length:", stats_clean["min"])

        coverage = (len(visited_people) / total_people) * 100
        print("\nVisited People Coverage: {:.2f}%".format(coverage))

# Run this with your CSV file
estimate_random_walks("adjacency_list.csv")


Graph has 28459 people and 101339 connections
Graph is fully connected.

--- Random Walk Estimation Results ---
Total Pairs Tried: 781
Successful Walks Found: 100

Original Walk Stats:
Mean Length: 492.33
Median Length: 494.0
Mode Length: 80
Max Length: 993
Min Length: 7

Cleaned Walk Stats:
Mean Length: 3.75
Median Length: 4.0
Mode Length: 4
Max Length: 7
Min Length: 3

Visited People Coverage: 36.57%
