# Homework 1: Six Degrees of Kevin Bacon

Read [bfs_six_degrees.pdf](bfs_six_degrees.pdf) for the assignment spec. Submit this .ipynb to the professor to demonstrate your solution. This is unofficial and is maintained by a fellow student, Justin, aka the glizzy goblin.

In [98]:
from graph_tools import graph
from collections import deque

test = graph()
test.vertices.add(5)
print(test.vertices.pop())

5


In [99]:
# Helper Functions
def process_credits_file_to_graph(graph:graph, file_path:str):
    try:
        with  open(file_path, 'r', encoding='utf-8') as list_edges:
            for line in list_edges:
                process_line(graph, line)
    except FileNotFoundError:
        print(f"Error: was not able to find {file_path}")
    except UnicodeDecodeError as e:
        print(f"Encoding error while reading the file: {e}")

def process_line(graph:graph, line:str):
    split_line = line.strip().split()
    if not split_line: return
    
    movie_name, actors = split_line[0], split_line[1:]
    
    for actor_1 in actors:
        co_stars = [actor for actor in actors if actor != actor_1]
        for actor_2 in co_stars:
            graph.Add_und_edge(actor_1, actor_2, movie_name)

In [100]:
def format_output(merged_result:list):
    if not merged_result: return "No path found"
    out=""
    for i in range(len(merged_result)):
        if i % 2 == 1:
            out += f"-({merged_result[i]})-"
            continue
        out += merged_result[i]
    return out

def reconstruct_path(adj, current, graph):
        # reconstruct path from start to end given adjacency list        
        total_path = []
        while current in adj and current != None:
            total_path.append(current)
            current = adj[current]
        total_path = total_path[::-1]
        movies = [graph.adj_list[total_path[i]][total_path[i + 1]] for i in range(len(total_path) - 1)]
        
        # merge results between actor names and movie edges
        merged_result = [item for pair in zip(total_path, movies) for item in pair]
        merged_result.extend(total_path[len(movies):])

        # format oout put as string
        return format_output(merged_result)        

**Vanilla BFS**

In [101]:
def bfs(start:str, end:str, graph:graph):
    if start not in graph.vertices or end not in graph.vertices:
        return "Not present"    
    queue = deque([])
    visited = {actor: False for actor in graph.vertices}
    pred = {actor: None for actor in graph.vertices}
    
    visited[start] = True
    queue.append(start)
    # while queue is not empty
    while queue:
        # pop from queue
        current = queue.popleft()
        if(current == end):
            return reconstruct_path(adj=pred, current=current, graph=graph)
        for neighbor in graph.adj_list[current]:
                if not visited[neighbor]:
                    visited[neighbor] = True
                    pred[neighbor] = current
                    queue.append(neighbor)
               
                
    return "Not present"
     

In [102]:
actor_graph = graph()
file_path = "smaller_imdb_cleaned.txt"
process_credits_file_to_graph(actor_graph, file_path)

In [103]:
input_file = "more-input.txt"
output_file = "more-output.txt"

input_parameters = open(input_file, 'r', encoding='utf-8')
expected_output = open(output_file, 'r', encoding='utf-8')

print("BFS Output\n")
for param, out in zip(input_parameters, expected_output):
    striped_param = param.strip().split()
    start, end = striped_param[0], striped_param[1]
    path = bfs(start=start, end=end, graph=actor_graph)
    print(f"{path}\n")

BFS Output

Brad_Pitt-(On_Location:_Fight_Club)-Edward_Norton-(The_Making_of_'The_Illusionist')-Jessica_Biel-(JT:_Reflections)-Justin_Timberlake-(Justin_Timberlake:_Justified_-_The_Videos)-Nelly-(Nelly_&_St._Lunatics:_Batter_Up)-Ali-(Khaidi_No._150)-Chiranjeevi-(Sye_Raa_Narasimha_Reddy)-Amitabh_Bachchan

Brad_Pitt-(Troy:_In_the_Thick_of_Battle)-Simon_Crane-(A_World_on_the_'Edge_of_Tomorrow')-Tom_Cruise

Will_Smith-(Club_Oscar)-Robert_De_Niro-(Scorsese's_Goodfellas)-Leonardo_DiCaprio-(Inception)-Ellen_Page

Not present

Not present

Marion_Cotillard-(Blood_Ties:_Behind_the_Scenes)-Clive_Owen-(King_Arthur:_A_Roundtable_Discussion)-Antoine_Fuqua-(The_Magnificent_Seven:_Gunslingers)-Ethan_Hawke

Kevin_Bacon-(Sundance_Skippy)-Zooey_Deschanel-(Winter_Passing)-Will_Ferrell-(The_Zoolander_Legacy)-Justin_Bieber

Frank_Sinatra_Jr.-(Do_It_in_the_Dirt)-Suzan_Averitt-(Rebel_Dabble_Babble)-James_Franco-(Love_Conquers_All:_The_Making_of_Tristan_+_Isolde)-Jim_Lemley-(Through_the_Eyes_of_Director_Timur

**Bi-Directional BFS**

In [104]:
# Refrenced https://www.geeksforgeeks.org/bidirectional-search/

def reconstruct_bidi_path(start_pred, end_pred, start,end, mid, graph):
    
	total_path = []
	current = mid
	while current in start_pred and current != None:
		total_path.append(current)
		current = start_pred[current]
  
	total_path = total_path[::-1]
	current = end_pred[mid]
 
	while current in end_pred and current != None:
		total_path.append(current)
		current = end_pred[current]
  
	movies = [graph.adj_list[total_path[i]][total_path[i + 1]] for i in range(len(total_path) - 1)]

	# merge results between actor names and movie edges
	merged_result = [item for pair in zip(total_path, movies) for item in pair]
	merged_result.extend(total_path[len(movies):])

	# format oout put as string
	return format_output(merged_result) 

def BiDi_BFS(start: str, end: str, graph: graph):
	if start not in graph.vertices or end not in graph.vertices:
		return "Not present"
	start_queue = deque([])
	end_queue = deque([])

	# initializes start and end visted nodes as false
	start_visited = {actor: False for actor in graph.vertices}
	end_visited = {actor: False for actor in graph.vertices}

	#initialize start and end adj list
	start_pred = {actor: None for actor in graph.vertices}
	end_pred = {actor: None for actor in graph.vertices}
	
	start_queue.append(start)
	start_visited[start] = True
 
	end_queue.append(end)
	end_visited[end] = True
 
	while len(start_queue) > 0  and len(end_queue) > 0:
		# Forward BFS
		curr_start = start_queue.popleft()
		for neighbor in graph.adj_list[curr_start]:
			if not start_visited[neighbor]:
				start_visited[neighbor] = True
				start_pred[neighbor] = curr_start
				start_queue.append(neighbor)
			if(end_visited[neighbor]): # intersection has been found
				return reconstruct_bidi_path(start_pred, end_pred, start, end, neighbor, graph)
			
		# Backward BFS
		curr_end = end_queue.popleft()
		for neighbor in graph.adj_list[curr_end]:
			if not end_visited[neighbor]:
				end_visited[neighbor] = True
				end_pred[neighbor] = curr_end
				end_queue.append(neighbor)
			if(start_visited[neighbor]): # intersection has been found
				return reconstruct_bidi_path(start_pred, end_pred, start, end, neighbor, graph)
	
	return "No Path Found"

In [105]:
input_file = "more-input.txt"
output_file = "more-output.txt"

input_parameters = open(input_file, 'r', encoding='utf-8')
expected_output = open(output_file, 'r', encoding='utf-8')

print("Bi-Directional BFS Output\n")
for param, out in zip(input_parameters, expected_output):
    striped_param = param.strip().split()
    start, end = striped_param[0], striped_param[1]
    path = BiDi_BFS(start=start, end=end, graph=actor_graph)
    print(f"{path}\n")

Bi-Directional BFS Output

Brad_Pitt-(On_Location:_Fight_Club)-Edward_Norton-(The_Making_of_'The_Illusionist')-Jessica_Biel-(JT:_Reflections)-Justin_Timberlake-(Justin_Timberlake:_Justified_-_The_Videos)-Nelly-(Nelly_&_St._Lunatics:_Batter_Up)-Ali-(Khaidi_No._150)-Chiranjeevi-(Sye_Raa_Narasimha_Reddy)-Amitabh_Bachchan

Brad_Pitt-(Moneyball:_Playing_the_Game)-Philip_Seymour_Hoffman-(The_Making_of_the_Mission)-Tom_Cruise

Will_Smith-(Club_Oscar)-Martin_Scorsese-(Crossing_Criminal_Cultures)-Leonardo_DiCaprio-(Inception)-Ellen_Page

Not present

Not present

Marion_Cotillard-(David_Bowie:_The_Next_Day)-Gary_Oldman-(State_of_Grace)-Ed_Harris-(NT2:_Evolution_of_a_Golden_City)-Nicolas_Cage-(The_Making_of_'Lord_of_War')-Ethan_Hawke

Kevin_Bacon-(My_One_and_Only)-Renée_Zellweger-(Club_Oscar)-Robert_De_Niro-(International_Jazz_Day)-Tony_Bennett-(Artists_for_Haiti:_We_Are_the_World_25_for_Haiti)-Justin_Bieber

Frank_Sinatra_Jr.-(Do_It_in_the_Dirt)-Suzan_Averitt-(The_Hitch-Hikers)-Patty_Duke-(Gift