# Shortest path between actors

What's the shortest path between two actors, via films they've acted together?

In [35]:
import networkx as nx
import pandas as pd
from collections import Counter

In [40]:
# Load the titles
title = pd.read_csv('title.basics.tsv.gz', sep='\t', low_memory=False).set_index('tconst')[['titleType', 'primaryTitle', 'startYear']]
title.head()

Unnamed: 0_level_0,titleType,primaryTitle,startYear
tconst,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
tt0000001,short,Carmencita,1894
tt0000002,short,Le clown et ses chiens,1892
tt0000003,short,Pauvre Pierrot,1892
tt0000004,short,Un bon bock,1892
tt0000005,short,Blacksmith Scene,1893


In [41]:
# Load the actors in each film
principals = pd.read_csv('title.principals.tsv.gz', sep='\t')[['tconst', 'nconst', 'category']]
principals.head()

Unnamed: 0,tconst,nconst,category
0,tt0000001,nm1588970,self
1,tt0000001,nm0005690,director
2,tt0000001,nm0374658,cinematographer
3,tt0000002,nm0721526,director
4,tt0000002,nm1335271,composer


In [3]:
# Only consider actors, not directors, composers, etc. Shrinks data to about 40%
actors = principals
actors = actors[actors.category.isin({'actor', 'actress'})]

In [4]:
# Only consider movies, not TV series, etc. Shrinks data to ~5%
movies = title[title['titleType'] == 'movie']
actors = actors[actors['tconst'].isin(movies.index)]

In [5]:
# Delete the original data to save memory
del principals

In [6]:
# This is what the network looks like
actors.head()

Unnamed: 0,tconst,nconst,category
24,tt0000009,nm0063086,actress
25,tt0000009,nm0183823,actor
26,tt0000009,nm1309758,actor
531,tt0000335,nm1010955,actress
532,tt0000335,nm1012612,actor


In [7]:
# Create a networkx graph from this
G = nx.from_pandas_edgelist(actors, 'tconst', 'nconst')

In [8]:
# Load the name data
name = pd.read_csv('name.basics.tsv.gz', sep='\t').set_index('nconst')[['primaryName', 'birthYear']]

Here are some actors whose network we could explore.

Bollywood Actors

- Rajinikanth: nm0707425
- Kamal Haasan
- Chiranjeevi
- Govinda
- Jitendra
- Brahmanandam

Hollywood Actresses

- Anjelina Jolie: nm0001401
- Scarlett Johansson
- Jessica Alba
- Emma Watson
- Julia Roberts

Crossovers

- Priyanka Chopra (Baywatch)
- Deepika Padukone (Return of Xander Cage)
- Irrfan Khan (Inferno, Jurassic World, The Amazing Spider-Man, A Mighty Heart)
- Anil Kapoor (Mission Impossible)
- Amrish Puri (Indiana Jones)
- Anupam Kher (Bend it Like Beckham, Silver Linings Playbook, Lust)
- Om Puri (City of Joy, Wolf, Ghost and the Darkness, Charlie Wilson's War)

In [9]:
def path(source, target):
    '''Returns the shortest path between two actors'''
    source = name[name['primaryName'] == source].index[0]
    target = name[name['primaryName'] == target].index[0]
    return _n(nx.shortest_path(G, source, target))

def _n(path):
    return [
        (title['primaryTitle'][p] if p.startswith('tt') else name['primaryName'][p])
        for p in path
    ]

In [28]:
def paths(source, target):
    '''Returns all the shortest paths between two actors'''
    source = name[name['primaryName'] == source].index[0]
    target = name[name['primaryName'] == target].index[0]
    return [_n(p) for p in nx.all_shortest_paths(G, source, target)]

In [23]:
# How can we connect Manorama and Angelina Jolie?
# This lists the actor - movie - actor - movie - ... - actor
path('Aachi Manorama', 'Angelina Jolie')

['Aachi Manorama',
 'Rikshavodu',
 'Paresh Rawal',
 'What If?',
 'Irrfan Khan',
 'A Mighty Heart',
 'Angelina Jolie']

In [24]:
# Rajinikanth is 2 people away from Angelina
path('Rajinikanth', 'Angelina Jolie')

['Rajinikanth',
 'Andhaa Kaanoon',
 'Amitabh Bachchan',
 'Piku',
 'Irrfan Khan',
 'A Mighty Heart',
 'Angelina Jolie']

In [25]:
# Brahmanandam is 3 people away from Katherine Hepburn
path('Brahmanandam', 'Katharine Hepburn')

['Brahmanandam',
 'Little Soldiers',
 'Rohini Hattangadi',
 'Gandhi',
 'John Gielgud',
 'Secret Agent',
 'Robert Young',
 'Spitfire',
 'Katharine Hepburn']

In [26]:
# Govinda is 2 people away from Angelina Jolie
path('Govinda', 'Angelina Jolie')

['Govinda',
 'Awaargi',
 'Anil Kapoor',
 'Besan',
 'Irrfan Khan',
 'A Mighty Heart',
 'Angelina Jolie']

In [30]:
# These are the paths from Gerard Butler and Priyanka Chopra
# (He wants to act with her)
len(paths('Gerard Butler', 'Priyanka Chopra')), path('Gerard Butler', 'Priyanka Chopra')

(39,
 ['Gerard Butler',
  "Na Nai'a: Legend of the Dolphins",
  'Whoopi Goldberg',
  'Incredible Love',
  'Akshay Kumar',
  'Style',
  'Priyanka Chopra'])

In [31]:
# Stallone wants to act with Salman
paths('Sylvester Stallone', 'Salman Khan')

[['Sylvester Stallone',
  'Incredible Love',
  'Akshay Kumar',
  'Mujhse Shaadi Karogi',
  'Salman Khan'],
 ['Sylvester Stallone',
  'Incredible Love',
  'Akshay Kumar',
  "Jaan-E-Mann: Let's Fall in Love... Again",
  'Salman Khan']]

In [32]:
# Kristen wants to act with Hrithik
paths('Kristen Stewart', 'Hrithik Roshan')

[['Kristen Stewart',
  'The Messengers',
  'Dylan McDermott',
  'The Mistress of Spices',
  'Aishwarya Rai Bachchan',
  'Dhoom 2',
  'Hrithik Roshan'],
 ['Kristen Stewart',
  'The Messengers',
  'Dylan McDermott',
  'The Mistress of Spices',
  'Aishwarya Rai Bachchan',
  'Jodhaa Akbar',
  'Hrithik Roshan'],
 ['Kristen Stewart',
  'The Messengers',
  'Dylan McDermott',
  'The Mistress of Spices',
  'Aishwarya Rai Bachchan',
  'Guzaarish',
  'Hrithik Roshan']]

In [33]:
# Daniel Radcliffe wants to act with Shah Rukh Khan
len(paths('Daniel Radcliffe', 'Shah Rukh Khan')), path('Daniel Radcliffe', 'Shah Rukh Khan')

(254,
 ['Daniel Radcliffe',
  "Harry Potter and the Sorcerer's Stone",
  'Richard Harris',
  'The Molly Maguires',
  'Sean Connery',
  'The Man Who Would Be King',
  'Saeed Jaffrey',
  'English Babu Desi Mem',
  'Shah Rukh Khan'])

In [38]:
# Let's explore the co-stars of Angelina (nm0001401)
c = Counter()
for movie in G['nm0001401']:
    for n in G[movie]:
        c[name.primaryName[n]] += 1

# These are the top co-stars across her 40 films
c.most_common(10)

[('Angelina Jolie', 40),
 ('Jack Black', 3),
 ('Dustin Hoffman', 3),
 ('Giovanni Ribisi', 2),
 ('Robert De Niro', 2),
 ('Brad Pitt', 2),
 ('Elle Fanning', 2),
 ('Bryan Cranston', 2),
 ('Jonny Lee Miller', 1),
 ('Jesse Bradford', 1)]