In [1]:
# Graph and Tree Utilities

def compute_vertex_degrees(adj_list):
    degrees = {}
    for node, neighbors in adj_list.items():
        degrees[node] = len(neighbors)
    sorted_degrees = dict(sorted(degrees.items(), key=lambda item: item[1]))
    return sorted_degrees

def adj_list_to_adj_matrix(adj_list):
    nodes = list(adj_list.keys())
    index_map = {node: i for i, node in enumerate(nodes)}
    n = len(nodes)
    matrix = [[0 for _ in range(n)] for _ in range(n)]
    for u in adj_list:
        for v in adj_list[u]:
            i, j = index_map[u], index_map[v]
            matrix[i][j] = 1
    return matrix, nodes

def adj_matrix_to_adj_list(matrix, nodes):
    adj_list = {node: [] for node in nodes}
    for i in range(len(matrix)):
        for j in range(len(matrix[i])):
            if matrix[i][j] == 1:
                adj_list[nodes[i]].append(nodes[j])
    return adj_list

def check_adjacency(adj_list, node1, node2):
    return node2 in adj_list.get(node1, [])

def is_graph_complete(adj_list):
    total_nodes = len(adj_list)
    for node in adj_list:
        if len(adj_list[node]) != total_nodes - 1:
            return False
    return True

def is_graph_connected(adj_list):
    visited = set()
    def dfs(current):
        visited.add(current)
        for neighbor in adj_list[current]:
            if neighbor not in visited:
                dfs(neighbor)
    starting_node = next(iter(adj_list))
    dfs(starting_node)
    return len(visited) == len(adj_list)

def classify_vertex_sequence(graph, sequence):
    seen_edges = set()
    seen_vertices = set()
    is_path = True
    for i in range(len(sequence) - 1):
        u = sequence[i]
        v = sequence[i + 1]
        if v not in graph.get(u, []):
            return "Not a walk"
        edge = tuple(sorted((u, v)))
        if edge in seen_edges:
            is_path = False
        seen_edges.add(edge)
        if v in seen_vertices:
            is_path = False
        seen_vertices.add(u)
    if is_path:
        return "Path"
    elif len(seen_edges) == len(sequence) - 1:
        return "Trail"
    else:
        return "Walk"

def is_graph_a_tree(adj_list):
    visited = set()
    def dfs(current, parent):
        visited.add(current)
        for neighbor in adj_list[current]:
            if neighbor == parent:
                continue
            if neighbor in visited or not dfs(neighbor, current):
                return False
        return True
    start_node = next(iter(adj_list))
    if not dfs(start_node, None):
        return False
    return len(visited) == len(adj_list)

def generate_spanning_tree(graph):
    tree = {node: [] for node in graph}
    visited = set()
    def dfs(current):
        visited.add(current)
        for neighbor in graph[current]:
            if neighbor not in visited:
                tree[current].append(neighbor)
                tree[neighbor].append(current)
                dfs(neighbor)
    start = next(iter(graph))
    dfs(start)
    return tree

def count_leaf_nodes_in_tree(tree):
    leaf_count = 0
    for node, neighbors in tree.items():
        if len(neighbors) == 1:
            leaf_count += 1
    return leaf_count

def is_binary_tree(tree):
    for node, neighbors in tree.items():
        if len(neighbors) > 3:
            return False
    return True

def get_tree_height(tree, root):
    def dfs(node, parent):
        heights = []
        for child in tree[node]:
            if child != parent:
                heights.append(dfs(child, node))
        return 1 + max(heights, default=0)
    return dfs(root, None)

def get_node_depth(tree, root, target):
    def dfs(node, parent, depth):
        if node == target:
            return depth
        for neighbor in tree[node]:
            if neighbor != parent:
                result = dfs(neighbor, node, depth + 1)
                if result != -1:
                    return result
        return -1
    return dfs(root, None, 0)


# === Example Usage ===

graph = {
    'A': ['B', 'C'],
    'B': ['A', 'C'],
    'C': ['A', 'B', 'D'],
    'D': ['C']
}

tree = {
    'A': ['B', 'C'],
    'B': ['A', 'D', 'E'],
    'C': ['A'],
    'D': ['B'],
    'E': ['B']
}

print("Vertex Degrees:", compute_vertex_degrees(graph))

matrix, nodes = adj_list_to_adj_matrix(graph)
print("Adjacency Matrix:", matrix)
print("Adjacency List from Matrix:", adj_matrix_to_adj_list(matrix, nodes))

print("Are A and B adjacent?", check_adjacency(graph, 'A', 'B'))
print("Is Graph Complete?", is_graph_complete(graph))
print("Is Graph Connected?", is_graph_connected(graph))
print("Sequence Classification:", classify_vertex_sequence(graph, ['A', 'B', 'C', 'D']))
print("Is Graph a Tree?", is_graph_a_tree(graph))

spanning_tree = generate_spanning_tree(graph)
print("Spanning Tree:", spanning_tree)
print("Leaf Nodes in Tree:", count_leaf_nodes_in_tree(tree))
print("Is Binary Tree?", is_binary_tree(tree))
print("Tree Height:", get_tree_height(tree, 'A'))
print("Depth of Node E:", get_node_depth(tree, 'A', 'E'))


Vertex Degrees: {'D': 1, 'A': 2, 'B': 2, 'C': 3}
Adjacency Matrix: [[0, 1, 1, 0], [1, 0, 1, 0], [1, 1, 0, 1], [0, 0, 1, 0]]
Adjacency List from Matrix: {'A': ['B', 'C'], 'B': ['A', 'C'], 'C': ['A', 'B', 'D'], 'D': ['C']}
Are A and B adjacent? True
Is Graph Complete? False
Is Graph Connected? True
Sequence Classification: Path
Is Graph a Tree? False
Spanning Tree: {'A': ['B'], 'B': ['A', 'C'], 'C': ['B', 'D'], 'D': ['C']}
Leaf Nodes in Tree: 3
Is Binary Tree? True
Tree Height: 3
Depth of Node E: 2


In [2]:
import os
import pandas as pd

# Input and output folder paths
input_folder = "files_cleaned"
output_folder = "Cleaned_Files"

# Create output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# List all CSV files in the input folder
csv_files = [f for f in os.listdir(input_folder) if f.endswith('.csv')]

for file_name in csv_files:
    input_path = os.path.join(input_folder, file_name)
    output_path = os.path.join(output_folder, file_name)

    try:
        # Try reading with utf-8, fallback to ISO-8859-1
        try:
            df = pd.read_csv(input_path)
        except UnicodeDecodeError:
            df = pd.read_csv(input_path, encoding='ISO-8859-1')

        # Check for required columns
        if 'First Name' not in df.columns or 'Last Name' not in df.columns:
            raise KeyError("Missing 'First Name' or 'Last Name'")

        # Create full name
        df['Full Name'] = df['First Name'].fillna('') + ' ' + df['Last Name'].fillna('')
        df['Full Name'] = df['Full Name'].str.strip()

        # If 'Company' is missing, create empty column
        if 'Company' not in df.columns:
            df['Company'] = ""

        # Keep only desired columns
        cleaned_df = df[['Full Name', 'Company']]
        cleaned_df = cleaned_df[cleaned_df['Full Name'] != '']  # Remove empty names

        # Save cleaned CSV
        cleaned_df.to_csv(output_path, index=False)
        print(f"✅ Cleaned and saved: {file_name}")

    except Exception as e:
        print(f"❌ Skipping {file_name}: {e}")


✅ Cleaned and saved: YuvrajSingh_Bhati - Yuvraj Bhati.csv
✅ Cleaned and saved: Vishal_Bhardwaj - VISHAL BHARDWAJ.csv
✅ Cleaned and saved: khushi_narwariya - Khushi Narwariya.csv
✅ Cleaned and saved: Ayush_Kumar - Ayush Kumar.csv
✅ Cleaned and saved: Mausam_kumari - Mausam kumari.csv
✅ Cleaned and saved: connection1-1 - Aman Adarsh.csv
✅ Cleaned and saved: Mohit_Sharma - Mohit Sharma.csv
✅ Cleaned and saved: Aman_Singh - Aman Singh.csv
✅ Cleaned and saved: Prerana_Rajnag - PRERANA RAJNAG.csv
✅ Cleaned and saved: Shivam_Shukla.csv
✅ Cleaned and saved: Disha_Sahu - Disha Sahu.csv
✅ Cleaned and saved: Challa_Trivedh_Kumar - CHALLA TRIVEDH KUMAR.csv
✅ Cleaned and saved: Vinay_Kumar - VINAY KUMAR.csv
✅ Cleaned and saved: Divyanshi_Rathour - Divyanshi Rathour.csv
✅ Cleaned and saved: Samina_Sultana.csv
✅ Cleaned and saved: Arjun Kadam - Arjun Kadam.csv
✅ Cleaned and saved: Connections - Aman Verma.csv
✅ Cleaned and saved: Linked_in_connection - Samina Sultana.csv
✅ Cleaned and saved: Connecti