# Shortest path between actors

What's the shortest path between two actors, via films they've acted together?

In [1]:
import networkx as nx
import pandas as pd

In [2]:
# Load the titles
title = pd.read_csv('title.basics.tsv.gz', sep='\t', low_memory=False).set_index('tconst')[['titleType', 'primaryTitle', 'startYear']]

# Load the actors in each film
principals = pd.read_csv('title.principals.tsv.gz', sep='\t')[['tconst', 'nconst', 'category']]

In [3]:
# Only consider actors, not directors, composers, etc. Shrinks data to about 40%
actors = principals
actors = actors[actors.category.isin({'actor', 'actress'})]

In [4]:
# Only consider movies, not TV series, etc. Shrinks data to ~5%
movies = title[title['titleType'] == 'movie']
actors = actors[actors['tconst'].isin(movies.index)]

In [5]:
# Delete the original data to save memory
del principals

In [6]:
# This is what the network looks like
actors.head()

Unnamed: 0,tconst,nconst,category
24,tt0000009,nm0063086,actress
25,tt0000009,nm0183823,actor
26,tt0000009,nm1309758,actor
531,tt0000335,nm1010955,actress
532,tt0000335,nm1012612,actor


In [7]:
# Create a networkx graph from this
G = nx.from_pandas_edgelist(actors, 'tconst', 'nconst')

In [8]:
# Load the name data
name = pd.read_csv('name.basics.tsv.gz', sep='\t').set_index('nconst')[['primaryName', 'birthYear']]

Here are some actors whose network we could explore.

Bollywood Actors

- Rajinikanth: nm0707425
- Kamal Haasan
- Chiranjeevi
- Govinda
- Jitendra
- Brahmanandam

Hollywood Actresses

- Anjelina Jolie: nm0001401
- Scarlett Johansson
- Jessica Alba
- Emma Watson
- Julia Roberts

Crossovers

- Priyanka Chopra (Baywatch)
- Deepika Padukone (Return of Xander Cage)
- Irrfan Khan (Inferno, Jurassic World, The Amazing Spider-Man, A Mighty Heart)
- Anil Kapoor (Mission Impossible)
- Amrish Puri (Indiana Jones)
- Anupam Kher (Bend it Like Beckham, Silver Linings Playbook, Lust)
- Om Puri (City of Joy, Wolf, Ghost and the Darkness, Charlie Wilson's War)

In [9]:
def path(source, target):
    '''Returns the shortest path between two actors'''
    source = name[name['primaryName'] == source].index[0]
    target = name[name['primaryName'] == target].index[0]
    return _n(nx.shortest_path(G, source, target))

def _n(path):
    return [
        (title['primaryTitle'][p] if p.startswith('tt') else name['primaryName'][p])
        for p in path
    ]

In [10]:
def paths(source, target):
    '''Returns all the shortest paths between two actors'''
    source = name[name['primaryName'] == source].index[0]
    target = name[name['primaryName'] == target].index[0]
    return [_n(p) for p in nx.all_shortest_paths(G, source, target)]

In [12]:
# How can we connect Manorama and Angelina Jolie?
# This lists the actor - movie - actor - movie - ... - actor
path('Aachi Manorama', 'Angelina Jolie')

['Aachi Manorama',
 'Rikshavodu',
 'Paresh Rawal',
 'What If?',
 'Irrfan Khan',
 'A Mighty Heart',
 'Angelina Jolie']

In [13]:
# Rajinikanth is 2 people away from Angelina
path('Rajinikanth', 'Angelina Jolie')

['Rajinikanth',
 'Andhaa Kaanoon',
 'Amitabh Bachchan',
 'Piku',
 'Irrfan Khan',
 'A Mighty Heart',
 'Angelina Jolie']

In [15]:
# Brahmanandam is 3 people away from Katherine Hepburn
path('Brahmanandam', 'Katharine Hepburn')

['Brahmanandam',
 'Little Soldiers',
 'Rohini Hattangadi',
 'Gandhi',
 'John Gielgud',
 'Secret Agent',
 'Robert Young',
 'Spitfire',
 'Katharine Hepburn']

In [17]:
# Govinda is 2 people away from Angelina Jolie
path('Govinda', 'Angelina Jolie')

['Govinda',
 'Awaargi',
 'Anil Kapoor',
 'Besan',
 'Irrfan Khan',
 'A Mighty Heart',
 'Angelina Jolie']

In [19]:
# These are the paths from Gerard Butler and Priyanka Chopra
# (He wants to act with her)
paths('Gerard Butler', 'Priyanka Chopra')

[['Gerard Butler',
  'Reign of Fire',
  'Matthew McConaughey',
  'The Paperboy',
  'Zac Efron',
  'Baywatch',
  'Priyanka Chopra'],
 ['Gerard Butler',
  'The Game of Their Lives',
  'Wes Bentley',
  'We Are Your Friends',
  'Zac Efron',
  'Baywatch',
  'Priyanka Chopra'],
 ['Gerard Butler',
  'Chasing Mavericks',
  'Jonny Weston',
  'We Are Your Friends',
  'Zac Efron',
  'Baywatch',
  'Priyanka Chopra'],
 ['Gerard Butler',
  'P.S. I Love You',
  'Lisa Kudrow',
  'Neighbors',
  'Zac Efron',
  'Baywatch',
  'Priyanka Chopra'],
 ['Gerard Butler',
  'Playing for Keeps',
  'Dennis Quaid',
  'At Any Price',
  'Zac Efron',
  'Baywatch',
  'Priyanka Chopra'],
 ['Gerard Butler',
  'Timeline',
  'Paul Walker',
  'Fast Five',
  'Dwayne Johnson',
  'Baywatch',
  'Priyanka Chopra'],
 ['Gerard Butler',
  'Timeline',
  'Paul Walker',
  'Fast & Furious 6',
  'Dwayne Johnson',
  'Baywatch',
  'Priyanka Chopra'],
 ['Gerard Butler',
  'Timeline',
  'Paul Walker',
  'Furious 7',
  'Dwayne Johnson',
  'Ba

In [21]:
# Stallone wants to act with Salman
paths('Sylvester Stallone', 'Salman Khan')

[['Sylvester Stallone',
  'Incredible Love',
  'Akshay Kumar',
  'Mujhse Shaadi Karogi',
  'Salman Khan'],
 ['Sylvester Stallone',
  'Incredible Love',
  'Akshay Kumar',
  "Jaan-E-Mann: Let's Fall in Love... Again",
  'Salman Khan']]

In [23]:
# Kristen wants to act with Hrithik
paths('Kristen Stewart', 'Hrithik Roshan')

[['Kristen Stewart',
  'The Messengers',
  'Dylan McDermott',
  'The Mistress of Spices',
  'Aishwarya Rai Bachchan',
  'Dhoom 2',
  'Hrithik Roshan'],
 ['Kristen Stewart',
  'The Messengers',
  'Dylan McDermott',
  'The Mistress of Spices',
  'Aishwarya Rai Bachchan',
  'Jodhaa Akbar',
  'Hrithik Roshan'],
 ['Kristen Stewart',
  'The Messengers',
  'Dylan McDermott',
  'The Mistress of Spices',
  'Aishwarya Rai Bachchan',
  'Guzaarish',
  'Hrithik Roshan']]

In [24]:
# Daniel Radcliffe wants to act with Shah Rukh Khan
paths('Daniel Radcliffe', 'Shah Rukh Khan')

[['Daniel Radcliffe',
  "Harry Potter and the Sorcerer's Stone",
  'Richard Harris',
  'The Molly Maguires',
  'Sean Connery',
  'The Man Who Would Be King',
  'Saeed Jaffrey',
  'English Babu Desi Mem',
  'Shah Rukh Khan'],
 ['Daniel Radcliffe',
  'Harry Potter and the Chamber of Secrets',
  'Richard Harris',
  'The Molly Maguires',
  'Sean Connery',
  'The Man Who Would Be King',
  'Saeed Jaffrey',
  'English Babu Desi Mem',
  'Shah Rukh Khan'],
 ['Daniel Radcliffe',
  "Harry Potter and the Sorcerer's Stone",
  'Richard Harris',
  'Robin and Marian',
  'Sean Connery',
  'The Man Who Would Be King',
  'Saeed Jaffrey',
  'English Babu Desi Mem',
  'Shah Rukh Khan'],
 ['Daniel Radcliffe',
  'Harry Potter and the Chamber of Secrets',
  'Richard Harris',
  'Robin and Marian',
  'Sean Connery',
  'The Man Who Would Be King',
  'Saeed Jaffrey',
  'English Babu Desi Mem',
  'Shah Rukh Khan'],
 ['Daniel Radcliffe',
  "Harry Potter and the Sorcerer's Stone",
  'Richard Harris',
  'Highpoint',


In [20]:
# These are the paths from Govinda to Irrfan Khan
paths('Govinda', 'Irrfan Khan')

[['Govinda', 'Awaargi', 'Anil Kapoor', 'Besan', 'Irrfan Khan'],
 ['Govinda', 'Deewana Mastana', 'Anil Kapoor', 'Besan', 'Irrfan Khan'],
 ['Govinda', 'Kali Ganga', 'Gulshan Grover', 'Knock Out', 'Irrfan Khan'],
 ['Govinda',
  'Shola Aur Shabnam',
  'Gulshan Grover',
  'Knock Out',
  'Irrfan Khan'],
 ['Govinda', 'Bhabhi', 'Gulshan Grover', 'Knock Out', 'Irrfan Khan'],
 ['Govinda', 'Karz Chukana Hai', 'Gulshan Grover', 'Knock Out', 'Irrfan Khan'],
 ['Govinda', 'Gambler', 'Gulshan Grover', 'Knock Out', 'Irrfan Khan'],
 ['Govinda', 'Halaal Ki Kamai', 'Gulshan Grover', 'Knock Out', 'Irrfan Khan'],
 ['Govinda', 'Raeeszada', 'Gulshan Grover', 'Knock Out', 'Irrfan Khan'],
 ['Govinda',
  'Jaisi Karni Waisi Bharni',
  'Gulshan Grover',
  'Knock Out',
  'Irrfan Khan'],
 ['Govinda', 'Zordaar', 'Gulshan Grover', 'Knock Out', 'Irrfan Khan'],
 ['Govinda', 'Mera Lahoo', 'Gulshan Grover', 'Knock Out', 'Irrfan Khan'],
 ['Govinda', 'Naya Khoon', 'Gulshan Grover', 'Knock Out', 'Irrfan Khan'],
 ['Govinda',


In [25]:
# Gulshan Grover and Irrfan Khan acted in only one movie together
paths('Gulshan Grover', 'Irrfan Khan')

[['Gulshan Grover', 'Knock Out', 'Irrfan Khan']]

In [26]:
# Sanjay Dutt and Irrfan Khan acted in only one movie together
paths('Sanjay Dutt', 'Irrfan Khan')

[['Sanjay Dutt', 'Knock Out', 'Irrfan Khan']]

In [27]:
# Juhi and Irrfan acted in 2 movies
paths('Juhi Chawla', 'Irrfan Khan')

[['Juhi Chawla', '7 1/2 Phere: More Than a Wedding', 'Irrfan Khan'],
 ['Juhi Chawla', 'Krazzy 4', 'Irrfan Khan']]

In [31]:
# Tabu and Irrfan acted in 4 movies
paths('Tabu', 'Irrfan Khan')

[['Tabu', 'Maqbool', 'Irrfan Khan'],
 ['Tabu', 'The Namesake', 'Irrfan Khan'],
 ['Tabu', 'Life of Pi', 'Irrfan Khan'],
 ['Tabu', 'Tariq and His Daughters', 'Irrfan Khan']]

In [32]:
# Tabu and Govinda acted in 4 movies
paths('Govinda', 'Tabu')

[['Govinda', 'Shikari', 'Tabu'],
 ['Govinda', 'Saajan Chale Sasural', 'Tabu'],
 ['Govinda', 'Dil Ne Phir Yaad Kiya', 'Tabu'],
 ['Govinda', 'Aamdani Atthanni Kharcha Rupaiya', 'Tabu']]

In [118]:
# Let's explore the co-stars of Angelina (nm0001401)
c = Counter()
for movie in G['nm0001401']:
    print(movie)
    for n in G[movie]:
        c[name.primaryName[n]] += 1

tt0113243
tt0116353
tt0117070
tt0119906
tt0120797
tt0129136
tt0145681
tt0146316
tt0172493
tt0176326
tt0187078
tt0218922
tt0282687
tt0294357
tt0307453
tt0325703
tt0343737
tt0346156
tt0346491
tt0356910
tt0364045
tt0374289
tt0441773
tt0442933
tt0493464
tt0824747
tt0829459
tt0944835
tt1243957
tt1302011
tt1587310
tt2267968
tt3215824
tt3661394
tt3707106
tt4777008
tt4827948
tt5714470
tt9014258
tt9032400


In [120]:
# These are the top co-stars across her 40 films
c.most_common(100)

[('Angelina Jolie', 40),
 ('Jack Black', 3),
 ('Dustin Hoffman', 3),
 ('Giovanni Ribisi', 2),
 ('Robert De Niro', 2),
 ('Brad Pitt', 2),
 ('Elle Fanning', 2),
 ('Bryan Cranston', 2),
 ('Jonny Lee Miller', 1),
 ('Jesse Bradford', 1),
 ('Matthew Lillard', 1),
 ('Hedy Burress', 1),
 ('Jenny Lewis', 1),
 ('Jenny Shimizu', 1),
 ('Danny Aiello', 1),
 ('Anne Archer', 1),
 ('Michael Biehn', 1),
 ('David Duchovny', 1),
 ('Timothy Hutton', 1),
 ('Michael Massee', 1),
 ('John Cusack', 1),
 ('Billy Bob Thornton', 1),
 ('Cate Blanchett', 1),
 ('Rosanna Arquette', 1),
 ('William Forsythe', 1),
 ('Mekhi Phifer', 1),
 ('Denzel Washington', 1),
 ('Queen Latifah', 1),
 ('Michael Rooker', 1),
 ('Jon Voight', 1),
 ('Iain Glen', 1),
 ('Noah Taylor', 1),
 ('Winona Ryder', 1),
 ('Clea DuVall', 1),
 ('Brittany Murphy', 1),
 ('Scott Plank', 1),
 ('Anna Gunn', 1),
 ('Andrew Prine', 1),
 ('Nicolas Cage', 1),
 ('T.J. Cross', 1),
 ('Antonio Banderas', 1),
 ('Thomas Jane', 1),
 ('Jack Thompson', 1),
 ('Edward Burns