# Set up

In [1]:
import requests

from bs4 import BeautifulSoup

In [2]:
base_url = "https://editorial.rottentomatoes.com/guide/140-essential-action-movies-to-watch-now/2/"

In [3]:
response = requests.get(base_url)
response

<Response [200]>

In [4]:
response.status_code

200

In [5]:
html = response.content
html

b'<!DOCTYPE html>\n<html lang="en-US">\n    <head prefix="og: http://ogp.me/ns# flixstertomatoes: http://ogp.me/ns/apps/flixstertomatoes#">\n    <meta http-equiv="content-type" content="text/html; charset=UTF-8" />\n        <meta property=\'og.description\' content="From John Wick and Die Hard to Mad Max and Atomic Blonde, these best action movies ever will thrill you and get the adrenaline pumping!" />\n    <meta name=\'description\' content="From John Wick and Die Hard to Mad Max and Atomic Blonde, these best action movies ever will thrill you and get the adrenaline pumping!" />\n    <meta property=\'og:title\' content="140 Essential Action Movies To Watch Now" />\n    <meta property=\'og:type\' content="article" />\n    <meta property=\'og:image\' content="https://s3-us-west-2.amazonaws.com/flx-editorial-wordpress/wp-content/uploads/2019/06/06180025/RT_140_ESSENTIAL_ACTION_600x314.jpg" />\n    <meta property=\'og:url\' content="https://editorial.rottentomatoes.com/guide/140-essentia

# Choosing a Parser

In [6]:
soup = BeautifulSoup(html, "lxml")

In [7]:
print(soup.prettify())

<!DOCTYPE html>
<html lang="en-US">
 <head prefix="og: http://ogp.me/ns# flixstertomatoes: http://ogp.me/ns/apps/flixstertomatoes#">
  <meta content="text/html; charset=utf-8" http-equiv="content-type"/>
  <meta content="From John Wick and Die Hard to Mad Max and Atomic Blonde, these best action movies ever will thrill you and get the adrenaline pumping!" property="og.description"/>
  <meta content="From John Wick and Die Hard to Mad Max and Atomic Blonde, these best action movies ever will thrill you and get the adrenaline pumping!" name="description"/>
  <meta content="140 Essential Action Movies To Watch Now" property="og:title"/>
  <meta content="article" property="og:type"/>
  <meta content="https://s3-us-west-2.amazonaws.com/flx-editorial-wordpress/wp-content/uploads/2019/06/06180025/RT_140_ESSENTIAL_ACTION_600x314.jpg" property="og:image"/>
  <meta content="https://editorial.rottentomatoes.com/guide/140-essential-action-movies-to-watch-now/" property="og:url"/>
  <meta content="

# Finding an element containing all the data

In [8]:
# find all gives list
div = soup.find_all('div', class_ = 'col-sm-18 col-full-xs countdown-item-content')


In [9]:
len(div)

140

In [10]:
# The title, year anc score of each movie are contained in the h2 tag
# explore the first div

div[0].find('h2')

<h2><a href="https://www.rottentomatoes.com/m/1018009-running_scared/">Running Scared</a> <span class="subtle start-year">(1986)</span> <span class="icon tiny rotten" title="Rotten"></span> <span class="tMeterScore">57%</span></h2>

In [11]:
# Extract all h2 tags
# div_list = []
# for divs in div:
#     total_div = divs.find("h2")
#     div_list.append(total_div)
# print(div_list)

headings = [divs.find("h2") for divs in div]
headings

[<h2><a href="https://www.rottentomatoes.com/m/1018009-running_scared/">Running Scared</a> <span class="subtle start-year">(1986)</span> <span class="icon tiny rotten" title="Rotten"></span> <span class="tMeterScore">57%</span></h2>,
 <h2><a href="https://www.rottentomatoes.com/m/equilibrium/">Equilibrium</a> <span class="subtle start-year">(2002)</span> <span class="icon tiny rotten" title="Rotten"></span> <span class="tMeterScore">41%</span></h2>,
 <h2><a href="https://www.rottentomatoes.com/m/hero/">Hero</a> <span class="subtle start-year">(2004)</span> <span class="icon tiny certified" title="Certified Fresh"></span> <span class="tMeterScore">94%</span></h2>,
 <h2><a href="https://www.rottentomatoes.com/m/1017666-road_house/">Road House</a> <span class="subtle start-year">(1989)</span> <span class="icon tiny rotten" title="Rotten"></span> <span class="tMeterScore">37%</span></h2>,
 <h2><a href="https://www.rottentomatoes.com/m/unstoppable-2010/">Unstoppable</a> <span class="subtle 

In [12]:
# Check the text inside the headings

[heading.text for heading in headings]

['Running Scared (1986)  57%',
 'Equilibrium (2002)  41%',
 'Hero (2004)  94%',
 'Road House (1989)  37%',
 'Unstoppable (2010)  87%',
 'Shaft (1971)  88%',
 'The Villainess (Ak-Nyeo) (2017)  85%',
 'Highlander (1986)  69%',
 'Die Hard 2 (1990)  69%',
 'National Treasure (2004)  46%',
 'The Protector (Tom yum goong) (Warrior King) (2005)  53%',
 'Revenge (2018)  93%',
 'El Mariachi (1993)  94%',
 'A Touch of Zen (1969)  96%',
 'Top Gun (1986)  57%',
 'Con Air (1997)  56%',
 'The Expendables 2 (2012)  67%',
 'The Mummy (1999)  61%',
 'Mr. & Mrs. Smith (2005)  60%',
 'Rush Hour (1998)  61%',
 'The Equalizer (2014)  60%',
 'Captain America: Civil War (2016)  90%',
 'Air Force One (1997)  78%',
 'Bloodsport (1988)  40%',
 'Blade (1998)  55%',
 'Bad Boys (1995)  42%',
 'Die Hard: With a Vengeance (1995)  59%',
 'The Running Man (1987)  64%',
 'Code of Silence (1985)  68%',
 "Shoot 'Em Up (2007)  67%",
 'Crank (2006)  61%',
 'Machete (2010)  72%',
 'Drive (2011)  93%',
 'Batman (1989)  71%',

In [13]:
headings[0]

<h2><a href="https://www.rottentomatoes.com/m/1018009-running_scared/">Running Scared</a> <span class="subtle start-year">(1986)</span> <span class="icon tiny rotten" title="Rotten"></span> <span class="tMeterScore">57%</span></h2>

#### The movie title is in the 'a' tag
#### The year is in a 'span' with class 'start-year'
#### The score is in a 'span' with class 'tMeterScore'

In [129]:
# check all heading links

[heading.find('a') for heading in headings]

[<a href="https://www.rottentomatoes.com/m/1018009-running_scared/">Running Scared</a>,
 <a href="https://www.rottentomatoes.com/m/equilibrium/">Equilibrium</a>,
 <a href="https://www.rottentomatoes.com/m/hero/">Hero</a>,
 <a href="https://www.rottentomatoes.com/m/1017666-road_house/">Road House</a>,
 <a href="https://www.rottentomatoes.com/m/unstoppable-2010/">Unstoppable</a>,
 <a href="https://www.rottentomatoes.com/m/1018699-shaft/">Shaft</a>,
 <a href="https://www.rottentomatoes.com/m/the_villainess/">The Villainess (Ak-Nyeo)</a>,
 <a href="https://www.rottentomatoes.com/m/highlander/">Highlander</a>,
 <a href="https://www.rottentomatoes.com/m/die_hard_2_1990/">Die Hard 2</a>,
 <a href="https://www.rottentomatoes.com/m/national_treasure/">National Treasure</a>,
 <a href="https://www.rottentomatoes.com/m/protector/">The Protector (Tom yum goong) (Warrior King)</a>,
 <a href="https://www.rottentomatoes.com/m/revenge_2018/">Revenge</a>,
 <a href="https://www.rottentomatoes.com/m/el_ma

In [130]:
# Extract movies name from link
movie_name = [heading.find('a').string for heading in headings]

movie_name

['Running Scared',
 'Equilibrium',
 'Hero',
 'Road House',
 'Unstoppable',
 'Shaft',
 'The Villainess (Ak-Nyeo)',
 'Highlander',
 'Die Hard 2',
 'National Treasure',
 'The Protector (Tom yum goong) (Warrior King)',
 'Revenge',
 'El Mariachi',
 'A Touch of Zen',
 'Top Gun',
 'Con Air',
 'The Expendables 2',
 'The Mummy',
 'Mr. & Mrs. Smith',
 'Rush Hour',
 'The Equalizer',
 'Captain America: Civil War',
 'Air Force One',
 'Bloodsport',
 'Blade',
 'Bad Boys',
 'Die Hard: With a Vengeance',
 'The Running Man',
 'Code of Silence',
 "Shoot 'Em Up",
 'Crank',
 'Machete',
 'Drive',
 'Batman',
 'Under Siege',
 'Independence Day',
 'Bullitt',
 'Wanted',
 'Superman',
 'Ronin',
 'They Live',
 'Cliffhanger',
 "Marvel's The Avengers",
 'Hot Fuzz',
 'The Warriors',
 'Starship Troopers',
 'Elite Squad: The Enemy Within',
 'Point Break',
 'The Long Kiss Goodnight',
 'The Guest',
 'Taken',
 '300',
 'True Lies',
 'Demolition Man',
 'Hardcore Henry',
 'Police Story (Ging chaat goo si) (Police Force)',
 '

In [17]:
# Extracting years
years = [heading.find('span', class_='start-year').string for heading in headings]

years

['(1986)',
 '(2002)',
 '(2004)',
 '(1989)',
 '(2010)',
 '(1971)',
 '(2017)',
 '(1986)',
 '(1990)',
 '(2004)',
 '(2005)',
 '(2018)',
 '(1993)',
 '(1969)',
 '(1986)',
 '(1997)',
 '(2012)',
 '(1999)',
 '(2005)',
 '(1998)',
 '(2014)',
 '(2016)',
 '(1997)',
 '(1988)',
 '(1998)',
 '(1995)',
 '(1995)',
 '(1987)',
 '(1985)',
 '(2007)',
 '(2006)',
 '(2010)',
 '(2011)',
 '(1989)',
 '(1992)',
 '(1996)',
 '(1968)',
 '(2008)',
 '(1978)',
 '(1998)',
 '(1988)',
 '(1993)',
 '(2012)',
 '(2007)',
 '(1979)',
 '(1997)',
 '(2011)',
 '(1991)',
 '(1996)',
 '(2014)',
 '(2009)',
 '(2007)',
 '(1994)',
 '(1993)',
 '(2016)',
 '(1985)',
 '(2001)',
 '(2015)',
 '(1997)',
 '(1986)',
 '(2017)',
 '(1995)',
 '(2006)',
 '(1984)',
 '(2005)',
 '(2004)',
 '(2001)',
 '(1981)',
 '(2000)',
 '(2004)',
 '(2011)',
 '(1992)',
 '(1989)',
 '(2005)',
 '(2010)',
 '(2008)',
 '(2018)',
 '(2017)',
 '(1964)',
 '(1976)',
 '(2017)',
 '(1972)',
 '(2014)',
 '(2005)',
 '(1971)',
 '(2015)',
 '(1990)',
 '(1996)',
 '(1971)',
 '(2014)',
 '(2003)',

In [131]:
# remove (data cleaning) bracket () from year
years = [year.strip('()') for year in years]

years

['1986',
 '2002',
 '2004',
 '1989',
 '2010',
 '1971',
 '2017',
 '1986',
 '1990',
 '2004',
 '2005',
 '2018',
 '1993',
 '1969',
 '1986',
 '1997',
 '2012',
 '1999',
 '2005',
 '1998',
 '2014',
 '2016',
 '1997',
 '1988',
 '1998',
 '1995',
 '1995',
 '1987',
 '1985',
 '2007',
 '2006',
 '2010',
 '2011',
 '1989',
 '1992',
 '1996',
 '1968',
 '2008',
 '1978',
 '1998',
 '1988',
 '1993',
 '2012',
 '2007',
 '1979',
 '1997',
 '2011',
 '1991',
 '1996',
 '2014',
 '2009',
 '2007',
 '1994',
 '1993',
 '2016',
 '1985',
 '2001',
 '2015',
 '1997',
 '1986',
 '2017',
 '1995',
 '2006',
 '1984',
 '2005',
 '2004',
 '2001',
 '1981',
 '2000',
 '2004',
 '2011',
 '1992',
 '1989',
 '2005',
 '2010',
 '2008',
 '2018',
 '2017',
 '1964',
 '1976',
 '2017',
 '1972',
 '2014',
 '2005',
 '1971',
 '2015',
 '1990',
 '1996',
 '1971',
 '2014',
 '2003',
 '1993',
 '2018',
 '2010',
 '1995',
 '2002',
 '2019',
 '2012',
 '2002',
 '2010',
 '1997',
 '1985',
 '2008',
 '2011',
 '2011',
 '1987',
 '1996',
 '1987',
 '2017',
 '2006',
 '2017',
 

# SCORE

In [19]:
headings[0]

<h2><a href="https://www.rottentomatoes.com/m/1018009-running_scared/">Running Scared</a> <span class="subtle start-year">(1986)</span> <span class="icon tiny rotten" title="Rotten"></span> <span class="tMeterScore">57%</span></h2>

In [20]:
score = [heading.find('span', class_ = 'tMeterScore').string for heading in headings]
score

['57%',
 '41%',
 '94%',
 '37%',
 '87%',
 '88%',
 '85%',
 '69%',
 '69%',
 '46%',
 '53%',
 '93%',
 '94%',
 '96%',
 '57%',
 '56%',
 '67%',
 '61%',
 '60%',
 '61%',
 '60%',
 '90%',
 '78%',
 '40%',
 '55%',
 '42%',
 '59%',
 '64%',
 '68%',
 '67%',
 '61%',
 '72%',
 '93%',
 '71%',
 '79%',
 '67%',
 '98%',
 '71%',
 '94%',
 '68%',
 '85%',
 '69%',
 '91%',
 '91%',
 '88%',
 '65%',
 '91%',
 '70%',
 '70%',
 '91%',
 '59%',
 '61%',
 '71%',
 '60%',
 '52%',
 '93%',
 '73%',
 '75%',
 '71%',
 '78%',
 '78%',
 '80%',
 '80%',
 '83%',
 '85%',
 '86%',
 '91%',
 '86%',
 '88%',
 '93%',
 '95%',
 '88%',
 '87%',
 '91%',
 '93%',
 '94%',
 '91%',
 '93%',
 '99%',
 '98%',
 '93%',
 '83%',
 '90%',
 '81%',
 '98%',
 '82%',
 '89%',
 '96%',
 '89%',
 '91%',
 '85%',
 '96%',
 '96%',
 '87%',
 '78%',
 '90%',
 '94%',
 '79%',
 '83%',
 '86%',
 '92%',
 '91%',
 '94%',
 '93%',
 '77%',
 '80%',
 '66%',
 '90%',
 '89%',
 '94%',
 '92%',
 '100%',
 '98%',
 '82%',
 '95%',
 '71%',
 '85%',
 '94%',
 '100%',
 '77%',
 '85%',
 '74%',
 '94%',
 '84%',
 '86%'

# Extract Consensus

In [133]:
# critics consensus text are in class 'info critics-consensus'
consensus = [divs.find('div', class_ = 'info critics-consensus') for divs in div]

consensus[0]

<div class="info critics-consensus"><span class="descriptor">Critics Consensus:</span> Running Scared struggles to strike a consistent balance between violent action and humor, but the chemistry between its well-matched leads keeps things entertaining.</div>

In [36]:
# every movie text starting with (Critics Consensus: ), we don't need it
con = [con.text for con in consensus]

con[0]

'Critics Consensus: Running Scared struggles to strike a consistent balance between violent action and humor, but the chemistry between its well-matched leads keeps things entertaining.'

In [136]:
# as we see required text is in second child (tag) we can get this by contents[1], first child is contents[0] 
# strip functions is used to remove starting and ending space
consensus[0].contents[1].strip()

'Running Scared struggles to strike a consistent balance between violent action and humor, but the chemistry between its well-matched leads keeps things entertaining.'

In [137]:
# getting all critics consensus

consensus_text = [con.contents[1].strip() for con in consensus] 

consensus_text

['Running Scared struggles to strike a consistent balance between violent action and humor, but the chemistry between its well-matched leads keeps things entertaining.',
 'Equilibrium is a reheated mishmash of other sci-fi movies.',
 'With death-defying action sequences and epic historic sweep, Hero offers everything a martial arts fan could ask for.',
 "Whether Road House is simply bad or so bad it's good depends largely on the audience's fondness for Swayze -- and tolerance for violently cheesy action.",
 "As fast, loud, and relentless as the train at the center of the story, Unstoppable is perfect popcorn entertainment -- and director Tony Scott's best movie in years.",
 'This is the man that would risk his neck for his brother, man. Can you dig it?',
 'The Villainess offers enough pure kinetic thrills to satisfy genre enthusiasts -- and carve out a bloody niche for itself in modern Korean action cinema.',
 "People hate Highlander because it's cheesy, bombastic, and absurd. And peop

# Extract Adjusted Score

In [82]:
adj_scr = [divs.find("div", {"class": "info countdown-adjusted-score"}) for divs in div]

adj_scr[0]

<div class="info countdown-adjusted-score"><span class="descriptor">Adjusted Score: </span>58.198% <span class="glyphicon glyphicon-question-sign" data-html="true" data-original-title="The Adjusted Score comes from a weighted formula (Bayesian) that we use that accounts for variation in the number of reviews per movie." data-placement="top" data-toggle="tooltip" rel="tooltip" title=""></span></div>

In [83]:
adjusted_scr[0].contents[1]

'58.198% '

In [84]:
adjusted_score = [scr.contents[1] for scr in adj_scr]

adjusted_score

['58.198% ',
 '43.044% ',
 '99.955% ',
 '39.911% ',
 '92.712% ',
 '92.114% ',
 '87.922% ',
 '71.767% ',
 '73.278% ',
 '50.928% ',
 '55.512% ',
 '98.174% ',
 '95.799% ',
 '98.435% ',
 '62.07% ',
 '59.727% ',
 '71.515% ',
 '65.173% ',
 '67.298% ',
 '64.752% ',
 '67.924% ',
 '106.91% ',
 '81.225% ',
 '41.181% ',
 '60.614% ',
 '45.845% ',
 '63.602% ',
 '66.927% ',
 '68.657% ',
 '72.216% ',
 '63.712% ',
 '77.747% ',
 '101.569% ',
 '76.912% ',
 '80.503% ',
 '71.051% ',
 '101.966% ',
 '78.919% ',
 '101.111% ',
 '71.431% ',
 '89.671% ',
 '72.876% ',
 '105.084% ',
 '97.827% ',
 '91.347% ',
 '68.727% ',
 '91.649% ',
 '74.198% ',
 '72.771% ',
 '95.099% ',
 '64.523% ',
 '69.055% ',
 '73.96% ',
 '62.882% ',
 '57.284% ',
 '94.442% ',
 '76.63% ',
 '85.02% ',
 '74.87% ',
 '81.709% ',
 '93.509% ',
 '83.164% ',
 '83.532% ',
 '86.846% ',
 '88.207% ',
 '92.849% ',
 '93.704% ',
 '90.911% ',
 '88.995% ',
 '101.075% ',
 '98.695% ',
 '60.765% ',
 '92.457% ',
 '97.333% ',
 '45.048% ',
 '104.949% ',
 '91.474% '

# Extract Synopsis

In [86]:
synopsis = [divs.find('div', class_='synopsis') for divs in div]

synopsis[0]

<div class="info synopsis"><span class="descriptor">Synopsis:</span> Distinguished by a sharp, witty dialogue between its two cop protagonists, Ray and Danny (Gregory Hines and Billy Crystal), this...<a class="" data-pageheader="" href="https://www.rottentomatoes.com/m/1018009-running_scared/" target="_top"> [More]</a></div>

In [87]:
synopsis[0].contents[1]

' Distinguished by a sharp, witty dialogue between its two cop protagonists, Ray and Danny (Gregory Hines and Billy Crystal), this...'

In [88]:
synopsis_text = [syn.contents[1] for syn in synopsis]

synopsis_text

[' Distinguished by a sharp, witty dialogue between its two cop protagonists, Ray and Danny (Gregory Hines and Billy Crystal), this...',
 ' In the nation of Libria, there is always peace among men. The rules of the Librian system are simple. If...',
 " Hero is two-time Academy Award nominee Zhang Yimou's directorial attempt at exploring the concept of a Chinese hero. During the...",
 ' Dalton (Swayze) is a true gentleman with a degree in philosophy from NYU. He also has a flip side -...',
 ' In this action thriller from director Tony Scott, rookie train operator Will (Chris Pine) and grizzled veteran engineer Frank (Denzel...',
 ' Shaft, a highly successful film, spawned an industry of sequels and imitations. The daughter (Sherri Brewer) of Bumpy Jones (Moses...',
 ' Since she was a little girl, Sook-hee was raised to be a deadly assassin. She gladly accepts the chance to...',
 ' Among humans for centuries, an immortal specie existed. Connor MacLeod is a member of this specie. Unaware 

# Extract Starring / Actors

In [98]:
cast_info = [divs.find('div', class_ = 'cast') for divs in div] 

cast_info

[<div class="info cast">
 <span class="descriptor">Starring:</span> <a class="" href="/celebrity/gregory_hines/">Gregory Hines</a>, <a class="" href="/celebrity/billy_crystal/">Billy Crystal</a>, <a class="" href="/celebrity/jimmy_smits/">Jimmy Smits</a>, <a class="" href="/celebrity/steven_bauer/">Steven Bauer</a></div>,
 <div class="info cast">
 <span class="descriptor">Starring:</span> <a class="" href="/celebrity/christian_bale/">Christian Bale</a>, <a class="" href="/celebrity/emily_watson/">Emily Watson</a>, <a class="" href="/celebrity/taye_diggs/">Taye Diggs</a>, <a class="" href="/celebrity/angus_macfadyen/">Angus Macfadyen</a></div>,
 <div class="info cast">
 <span class="descriptor">Starring:</span> <a class="" href="/celebrity/jet_li/">Jet Li</a>, <a class="" href="/celebrity/leung_chiu_wai/">Tony Leung Chiu Wai</a>, <a class="" href="/celebrity/maggie_cheung_manyuk/">Maggie Cheung</a>, <a class="" href="/celebrity/daoming_chen/">Daoming Chen</a></div>,
 <div class="info ca

In [102]:
cast_links = cast_info[0].find_all('a')

cast_links

[<a class="" href="/celebrity/gregory_hines/">Gregory Hines</a>,
 <a class="" href="/celebrity/billy_crystal/">Billy Crystal</a>,
 <a class="" href="/celebrity/jimmy_smits/">Jimmy Smits</a>,
 <a class="" href="/celebrity/steven_bauer/">Steven Bauer</a>]

In [104]:
cast_names = [link.text for link in cast_links]

cast_names

['Gregory Hines', 'Billy Crystal', 'Jimmy Smits', 'Steven Bauer']

In [115]:
cast = ", ".join(cast_names)

cast

'Tom Hardy, Charlize Theron, Nicholas Hoult, Rosie Huntington-Whiteley'

# Extract all movies actors

In [111]:
cast = []

for c in cast_info:
    cast_links = c.find_all('a')
    cast_names = [link.string for link in cast_links]
    result = ", ".join(cast_names)
    
    cast.append(result)
    
cast

['Gregory Hines, Billy Crystal, Jimmy Smits, Steven Bauer',
 'Christian Bale, Emily Watson, Taye Diggs, Angus Macfadyen',
 'Jet Li, Tony Leung Chiu Wai, Maggie Cheung, Daoming Chen',
 'Patrick Swayze, Kelly Lynch, Sam Elliott, Ben Gazzara',
 'Denzel Washington, Chris Pine, Rosario Dawson, Ethan Suplee',
 'Richard Roundtree, Moses Gunn, Gwen Mitchell, Christopher St. John',
 'Ok-bin Kim, Kim Seo-hyeong, Shin Ha-kyun, Bang Sung-jun',
 'Christopher Lambert, Sean Connery, Roxanne Hart, Clancy Brown',
 'Bruce Willis, Bonnie Bedelia, William Atherton, Reginald VelJohnson',
 'Nicolas Cage, Diane Kruger, Justin Bartha, Sean Bean',
 'Tony Jaa, Petchtai Wongkamlao, Bongkoj Khongmalai, Bongkoo Kongmalai',
 'Matilda Anna Ingrid Lutz, Kevin Janssens, Vincent Colombe, Guillaume Bouchède',
 'Carlos Gallardo, Consuelo Gómez, Reinol Martinez, Peter Marquardt',
 'Feng Hsu, Chun Shih, Pai Ying, Tien Peng',
 'Tom Cruise, Kelly McGillis, Anthony Edwards, Val Kilmer',
 'Nicolas Cage, John Cusack, John Malko

# Extract Director names

In [62]:
director = soup.find_all('div', class_ = 'info director')

director

[<div class="info director">
 <span class="descriptor">Directed By:</span> <a class="" href="/celebrity/peter_hyams/">Peter Hyams</a></div>,
 <div class="info director">
 <span class="descriptor">Directed By:</span> <a class="" href="/celebrity/kurt_wimmer/">Kurt Wimmer</a></div>,
 <div class="info director">
 <span class="descriptor">Directed By:</span> <a class="" href="/celebrity/zhang_yimou/">Zhang Yimou</a></div>,
 <div class="info director">
 <span class="descriptor">Directed By:</span> <a class="" href="/celebrity/rowdy_herrington/">Rowdy Herrington</a></div>,
 <div class="info director">
 <span class="descriptor">Directed By:</span> <a class="" href="/celebrity/tony_scott/">Tony Scott</a></div>,
 <div class="info director">
 <span class="descriptor">Directed By:</span> <a class="" href="/celebrity/gordon_parks/">Gordon Parks</a></div>,
 <div class="info director">
 <span class="descriptor">Directed By:</span> <a class="" href="/celebrity/jung_byoung_gil/">Jung Byung-gil</a></di

In [91]:
director_name =   [None if dn.find('a') is None else dn.find('a').text for dn in director]

director_name

['Peter Hyams',
 'Kurt Wimmer',
 'Zhang Yimou',
 'Rowdy Herrington',
 'Tony Scott',
 'Gordon Parks',
 'Jung Byung-gil',
 'Russell Mulcahy',
 'Renny Harlin',
 'Jon Turteltaub',
 'Prachya Pinkaew',
 'Coralie Fargeat',
 'Robert Rodriguez',
 'King Hu',
 'Tony Scott',
 'Simon West',
 'Simon West',
 'Stephen Sommers',
 'Doug Liman',
 'Brett Ratner',
 'Antoine Fuqua',
 'Anthony Russo',
 'Wolfgang Petersen',
 'Newt Arnold',
 'Stephen Norrington',
 'Michael Bay',
 'John McTiernan',
 'Paul Michael Glaser',
 'Andrew Davis',
 'Michael Davis',
 'Mark Neveldine',
 'Ethan Maniquis',
 'Nicolas Winding Refn',
 'Tim Burton',
 'Andrew Davis',
 'Roland Emmerich',
 'Peter Yates',
 'Timur Bekmambetov',
 'Richard Donner',
 'John Frankenheimer',
 'John Carpenter',
 'Renny Harlin',
 None,
 'Edgar Wright',
 'Walter Hill',
 'Paul Verhoeven',
 'José Padilha',
 'Kathryn Bigelow',
 'Renny Harlin',
 'Adam Wingard',
 'Pierre Morel',
 'Zack Snyder',
 'James Cameron',
 'Marco Brambilla',
 'Ilya Naishuller',
 'Jackie Ch

# Store data in structured form

In [69]:
# pandas for storing data in different formats in order to analysis
import pandas as pd

In [118]:
# creating a dataframe named data 
data = pd.DataFrame()

In [124]:
# each column of dataframe

data['Movie Name'] = movie_name
data['Year'] = years
data['Score'] = score
data['Critics_Consensus'] = consensus_text
data['adjusted_score'] = adjusted_score
data['synopsis'] = synopsis_text
data['Starring'] = cast
data['Director'] = director_name

In [125]:
# counting null values in data
data.isnull().sum()

Movie Name           0
Year                 0
Score                0
Critics_Consensus    0
adjusted_score       0
synopsis             0
Starring             0
Director             3
dtype: int64

In [138]:
data

Unnamed: 0,Movie Name,Year,Score,Critics_Consensus,adjusted_score,synopsis,Starring,Director
0,Running Scared,1986,57%,Running Scared struggles to strike a consisten...,58.198%,"Distinguished by a sharp, witty dialogue betw...","Tom Hardy, Charlize Theron, Nicholas Hoult, Ro...",Peter Hyams
1,Equilibrium,2002,41%,Equilibrium is a reheated mishmash of other sc...,43.044%,"In the nation of Libria, there is always peac...","Tom Hardy, Charlize Theron, Nicholas Hoult, Ro...",Kurt Wimmer
2,Hero,2004,94%,With death-defying action sequences and epic h...,99.955%,Hero is two-time Academy Award nominee Zhang ...,"Tom Hardy, Charlize Theron, Nicholas Hoult, Ro...",Zhang Yimou
3,Road House,1989,37%,Whether Road House is simply bad or so bad it'...,39.911%,Dalton (Swayze) is a true gentleman with a de...,"Tom Hardy, Charlize Theron, Nicholas Hoult, Ro...",Rowdy Herrington
4,Unstoppable,2010,87%,"As fast, loud, and relentless as the train at ...",92.712%,In this action thriller from director Tony Sc...,"Tom Hardy, Charlize Theron, Nicholas Hoult, Ro...",Tony Scott
...,...,...,...,...,...,...,...,...
135,Lat sau san taam (Hard-Boiled),1992,94%,Boasting impactful action as well as surprisin...,96.107%,"Yun-Fat portrays a maverick, clarinet-playing...","Tom Hardy, Charlize Theron, Nicholas Hoult, Ro...",John Woo
136,The Matrix,1999,88%,"Thanks to the Wachowskis' imaginative vision, ...",94.939%,"What if virtual reality wasn't just for fun, ...","Tom Hardy, Charlize Theron, Nicholas Hoult, Ro...",Lilly Wachowski
137,Terminator 2: Judgment Day,1991,93%,T2 features thrilling action sequences and eye...,99.104%,A sequel to the sci-fi action thriller that m...,"Tom Hardy, Charlize Theron, Nicholas Hoult, Ro...",James Cameron
138,Die Hard,1988,94%,Its many imitators (and sequels) have never co...,99.805%,"It's Christmas time in L.A., and there's an e...","Tom Hardy, Charlize Theron, Nicholas Hoult, Ro...",John McTiernan


# Exporting the data to CSV

In [126]:
data.to_csv('movies_info.csv', index=False, header=True)

In [139]:
# reading data from computer
df = pd.read_csv('movies_info.csv')

In [140]:
df

Unnamed: 0,Movie Name,Year,Score,Critics_Consensus,adjusted_score,synopsis,Starring,Director
0,Running Scared,1986,57%,Running Scared struggles to strike a consisten...,58.198%,"Distinguished by a sharp, witty dialogue betw...","Tom Hardy, Charlize Theron, Nicholas Hoult, Ro...",Peter Hyams
1,Equilibrium,2002,41%,Equilibrium is a reheated mishmash of other sc...,43.044%,"In the nation of Libria, there is always peac...","Tom Hardy, Charlize Theron, Nicholas Hoult, Ro...",Kurt Wimmer
2,Hero,2004,94%,With death-defying action sequences and epic h...,99.955%,Hero is two-time Academy Award nominee Zhang ...,"Tom Hardy, Charlize Theron, Nicholas Hoult, Ro...",Zhang Yimou
3,Road House,1989,37%,Whether Road House is simply bad or so bad it'...,39.911%,Dalton (Swayze) is a true gentleman with a de...,"Tom Hardy, Charlize Theron, Nicholas Hoult, Ro...",Rowdy Herrington
4,Unstoppable,2010,87%,"As fast, loud, and relentless as the train at ...",92.712%,In this action thriller from director Tony Sc...,"Tom Hardy, Charlize Theron, Nicholas Hoult, Ro...",Tony Scott
...,...,...,...,...,...,...,...,...
135,Lat sau san taam (Hard-Boiled),1992,94%,Boasting impactful action as well as surprisin...,96.107%,"Yun-Fat portrays a maverick, clarinet-playing...","Tom Hardy, Charlize Theron, Nicholas Hoult, Ro...",John Woo
136,The Matrix,1999,88%,"Thanks to the Wachowskis' imaginative vision, ...",94.939%,"What if virtual reality wasn't just for fun, ...","Tom Hardy, Charlize Theron, Nicholas Hoult, Ro...",Lilly Wachowski
137,Terminator 2: Judgment Day,1991,93%,T2 features thrilling action sequences and eye...,99.104%,A sequel to the sci-fi action thriller that m...,"Tom Hardy, Charlize Theron, Nicholas Hoult, Ro...",James Cameron
138,Die Hard,1988,94%,Its many imitators (and sequels) have never co...,99.805%,"It's Christmas time in L.A., and there's an e...","Tom Hardy, Charlize Theron, Nicholas Hoult, Ro...",John McTiernan
