## Scraping the data

From here: http://www.starringthecomputer.com/features.html

In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests

%matplotlib inline
pd.set_option('max_colwidth', -1)

from user_agent import generate_user_agent
headers = {'User-Agent': generate_user_agent(device_type="desktop", os=('mac', 'linux'))}

First, let's get a list of all manufacturers, and the urls to the featured models.

In [2]:
url = 'http://www.starringthecomputer.com/computers.html'

raw_html = requests.get(url, headers=headers)
soup_doc = BeautifulSoup(raw_html.content, 'html.parser')

In [3]:
soup_doc.findAll('ul')

[<ul>
 <li>
 <a href="/computer.html?c=172">Acer MPF-1</a>
 <article class="sublist">
 	      • Runaway (1984) 
 	    </article>
 </li>
 </ul>, <ul>
 <li>
 <a href="/computer.html?c=68">Acorn Archimedes</a>
 <article class="sublist">
 	      • Dark Season - Season 1 (1991) • Bergerac - Season 8, Episode 11, "There for the Picking" (1990) • Paddington (2014) • Strapless (1989) • Watt on Earth - Season 1, Episode 1 (1991) • Capital City - Season 1, Episode 2, "Insider Trading" (1989) 
 	    </article>
 </li>
 <li>
 <a href="/computer.html?c=162">Acorn Atom</a>
 <article class="sublist">
 	      • Micro Men (2009) 
 	    </article>
 </li>
 <li>
 <a href="/computer.html?c=136">Acorn BBC Master</a>
 <article class="sublist">
 	      • Only Fools and Horses - "The Frog's Legacy" (1987) • Micro Men (2009) • Smart Money (1986) 
 	    </article>
 </li>
 <li>
 <a href="/computer.html?c=2">Acorn BBC Micro</a>
 <article class="sublist">
 	      • The Fourth Protocol (1987) • Clockwise (1986) • The

In [4]:
computer_movie = []
for each in soup_doc.findAll('ul'):
    computer_movie_dict = {}
    computer_movie_dict['computer'] = each.find('li').a.string
    computer_movie_dict['url'] = 'http://www.starringthecomputer.com/' + each.find('li').a.attrs['href']
    #computer_movie_dict['appearances_raw'] = each.find('li').article.text
    #computer_movie_dict['appearances'] = []
    # print('http://www.starringthecomputer.com/' + each.find('li').a.attrs['href'])
    computer_movie.append(computer_movie_dict)

In [5]:
computer_movie

[{'computer': 'Acer MPF-1',
  'url': 'http://www.starringthecomputer.com//computer.html?c=172'},
 {'computer': 'Acorn Archimedes',
  'url': 'http://www.starringthecomputer.com//computer.html?c=68'},
 {'computer': 'Alienware 15',
  'url': 'http://www.starringthecomputer.com//computer.html?c=416'},
 {'computer': 'Amstrad CPC 464',
  'url': 'http://www.starringthecomputer.com//computer.html?c=140'},
 {'computer': 'Apple Aluminum iMac',
  'url': 'http://www.starringthecomputer.com//computer.html?c=233'},
 {'computer': 'Applied Technologies MicroBee 32',
  'url': 'http://www.starringthecomputer.com//computer.html?c=360'},
 {'computer': 'Apricot PC',
  'url': 'http://www.starringthecomputer.com//computer.html?c=258'},
 {'computer': 'Arduino Duemilanove',
  'url': 'http://www.starringthecomputer.com//computer.html?c=415'},
 {'computer': 'Arma Torpedo Data Computer',
  'url': 'http://www.starringthecomputer.com//computer.html?c=253'},
 {'computer': 'Asus EEE PC',
  'url': 'http://www.starringt

In [6]:
df = pd.DataFrame(computer_movie)
df.head()

Unnamed: 0,computer,url
0,Acer MPF-1,http://www.starringthecomputer.com//computer.html?c=172
1,Acorn Archimedes,http://www.starringthecomputer.com//computer.html?c=68
2,Alienware 15,http://www.starringthecomputer.com//computer.html?c=416
3,Amstrad CPC 464,http://www.starringthecomputer.com//computer.html?c=140
4,Apple Aluminum iMac,http://www.starringthecomputer.com//computer.html?c=233


In [7]:
df.to_csv('computers_featured.csv', index=False)

### And now to grab the scores for each computer

In [8]:
test_url = 'http://www.starringthecomputer.com/computer.html?c=68'

raw_html = requests.get(test_url, headers=headers)
soup_doc = BeautifulSoup(raw_html.content, 'html.parser')

In [9]:
appears_in = []

for movie in soup_doc.find_all('article'):
    appearance_ratings = {}
    appearance_ratings['title'] = movie.h3.text[1:-1]
    appearance_ratings['importance'] = len(movie.find_all('img')[2].get('alt'))
    appearance_ratings['realism'] = len(movie.find_all('img')[4].get('alt'))
    appearance_ratings['visibility'] = len(movie.find_all('img')[6].get('alt'))
    #print(movie.find_all('img')[4])
    appears_in.append(appearance_ratings)

In [10]:
appears_in

[{'title': 'Dark Season - Season 1 (1991)',
  'importance': 5,
  'realism': 2,
  'visibility': 4},
 {'title': 'Bergerac - Season 8, Episode 11, "There for the Picking" (1990)',
  'importance': 2,
  'realism': 5,
  'visibility': 2},
 {'title': 'Paddington (2014)',
  'importance': 3,
  'realism': 4,
  'visibility': 4},
 {'title': 'Strapless (1989)', 'importance': 2, 'realism': 4, 'visibility': 2},
 {'title': 'Watt on Earth - Season 1, Episode 1 (1991)',
  'importance': 3,
  'realism': 5,
  'visibility': 3},
 {'title': 'Capital City - Season 1, Episode 2, "Insider Trading" (1989)',
  'importance': 3,
  'realism': 5,
  'visibility': 3}]

In [11]:
for row in computer_movie:
    url = row['url']
    #print(row['url'])
    raw_html = requests.get(url, headers=headers)
    soup_doc = BeautifulSoup(raw_html.content, 'html.parser')

    appears_in = []

    for movie in soup_doc.find_all('article'):
        appearance_ratings = {}
        appearance_ratings['title'] = movie.h3.text[1:-1]
        appearance_ratings['importance'] = len(movie.find_all('img')[2].get('alt'))
        appearance_ratings['realism'] = len(movie.find_all('img')[4].get('alt'))
        appearance_ratings['visibility'] = len(movie.find_all('img')[6].get('alt'))

        appears_in.append(appearance_ratings)
    
    row['appearances'] = appears_in

In [12]:
computer_movie

[{'computer': 'Acer MPF-1',
  'url': 'http://www.starringthecomputer.com//computer.html?c=172',
  'appearances': [{'title': 'Runaway (1984)',
    'importance': 1,
    'realism': 4,
    'visibility': 3}]},
 {'computer': 'Acorn Archimedes',
  'url': 'http://www.starringthecomputer.com//computer.html?c=68',
  'appearances': [{'title': 'Dark Season - Season 1 (1991)',
    'importance': 5,
    'realism': 2,
    'visibility': 4},
   {'title': 'Bergerac - Season 8, Episode 11, "There for the Picking" (1990)',
    'importance': 2,
    'realism': 5,
    'visibility': 2},
   {'title': 'Paddington (2014)',
    'importance': 3,
    'realism': 4,
    'visibility': 4},
   {'title': 'Strapless (1989)',
    'importance': 2,
    'realism': 4,
    'visibility': 2},
   {'title': 'Watt on Earth - Season 1, Episode 1 (1991)',
    'importance': 3,
    'realism': 5,
    'visibility': 3},
   {'title': 'Capital City - Season 1, Episode 2, "Insider Trading" (1989)',
    'importance': 3,
    'realism': 5,
    'v

In [13]:
df = pd.DataFrame(computer_movie)
df.head()

Unnamed: 0,appearances,computer,url
0,"[{'title': 'Runaway (1984)', 'importance': 1, 'realism': 4, 'visibility': 3}]",Acer MPF-1,http://www.starringthecomputer.com//computer.html?c=172
1,"[{'title': 'Dark Season - Season 1 (1991)', 'importance': 5, 'realism': 2, 'visibility': 4}, {'title': 'Bergerac - Season 8, Episode 11, ""There for the Picking"" (1990)', 'importance': 2, 'realism': 5, 'visibility': 2}, {'title': 'Paddington (2014)', 'importance': 3, 'realism': 4, 'visibility': 4}, {'title': 'Strapless (1989)', 'importance': 2, 'realism': 4, 'visibility': 2}, {'title': 'Watt on Earth - Season 1, Episode 1 (1991)', 'importance': 3, 'realism': 5, 'visibility': 3}, {'title': 'Capital City - Season 1, Episode 2, ""Insider Trading"" (1989)', 'importance': 3, 'realism': 5, 'visibility': 3}]",Acorn Archimedes,http://www.starringthecomputer.com//computer.html?c=68
2,"[{'title': 'Undercover Grandpa (2017)', 'importance': 3, 'realism': 5, 'visibility': 3}]",Alienware 15,http://www.starringthecomputer.com//computer.html?c=416
3,"[{'title': 'The IT Crowd - Season 3 (2008)', 'importance': 1, 'realism': 4, 'visibility': 1}, {'title': 'Micro Men (2009)', 'importance': 1, 'realism': 4, 'visibility': 1}]",Amstrad CPC 464,http://www.starringthecomputer.com//computer.html?c=140
4,"[{'title': 'The Proposal (2009)', 'importance': 1, 'realism': 5, 'visibility': 3}, {'title': 'Flickan som lekte med elden (2009)', 'importance': 2, 'realism': 5, 'visibility': 2}, {'title': 'Luftslottet som sprängdes (2009)', 'importance': 1, 'realism': 5, 'visibility': 2}, {'title': 'The Stepfather (2009)', 'importance': 3, 'realism': 4, 'visibility': 3}, {'title': 'Chloe (2009)', 'importance': 2, 'realism': 5, 'visibility': 3}, {'title': 'Bedtime Stories (2008)', 'importance': 1, 'realism': 5, 'visibility': 2}, {'title': 'The Spy Next Door (2010)', 'importance': 4, 'realism': 3, 'visibility': 3}, {'title': 'Tamara Drew (2010)', 'importance': 3, 'realism': 5, 'visibility': 4}, {'title': 'The Mechanic (2011)', 'importance': 3, 'realism': 5, 'visibility': 3}, {'title': 'The Descendants (2011)', 'importance': 1, 'realism': 5, 'visibility': 1}, {'title': 'Over Kanten (2012)', 'importance': 2, 'realism': 5, 'visibility': 2}, {'title': 'Stay Cool (2009)', 'importance': 1, 'realism': 5, 'visibility': 2}, {'title': 'He's Just Not That Into You (2009)', 'importance': 3, 'realism': 5, 'visibility': 3}]",Apple Aluminum iMac,http://www.starringthecomputer.com//computer.html?c=233


In [30]:
# df.to_csv('computers_featured.csv', index=False)

In [14]:
df.to_json('moviecomputer.json')

In [26]:
appearance_count = []
for each in df.appearances:
    print(len(each))
    appearance_count.append(len(each))

1
6
1
2
13
1
1
1
1
1
10
1
1
1
1
1
1
1
1
1
5
1
1
1
1
1
1
2
1
1
1
1
1
1
1
5
1
2
1
3
1
1
1
4
1
1
1
3
1
1
1
7
2
1
3
1
5
1
1
1
1
1
1
1
1
1
1
1
2
3
1
1
1
2
1
1
1
1
1
3
12
1
1
1
1
1
1
1
1
1
5
1
1
3
1
1
1
2
1
1
1
2
3
2
2
1
1
1
1
1
1
1
1
1
1
5


In [28]:
df['appearance_count'] = appearance_count

In [32]:
df.sort_values(by='appearance_count', ascending=False)

Unnamed: 0,appearances,computer,url,appearance_count
4,"[{'title': 'The Proposal (2009)', 'importance': 1, 'realism': 5, 'visibility': 3}, {'title': 'Flickan som lekte med elden (2009)', 'importance': 2, 'realism': 5, 'visibility': 2}, {'title': 'Luftslottet som sprängdes (2009)', 'importance': 1, 'realism': 5, 'visibility': 2}, {'title': 'The Stepfather (2009)', 'importance': 3, 'realism': 4, 'visibility': 3}, {'title': 'Chloe (2009)', 'importance': 2, 'realism': 5, 'visibility': 3}, {'title': 'Bedtime Stories (2008)', 'importance': 1, 'realism': 5, 'visibility': 2}, {'title': 'The Spy Next Door (2010)', 'importance': 4, 'realism': 3, 'visibility': 3}, {'title': 'Tamara Drew (2010)', 'importance': 3, 'realism': 5, 'visibility': 4}, {'title': 'The Mechanic (2011)', 'importance': 3, 'realism': 5, 'visibility': 3}, {'title': 'The Descendants (2011)', 'importance': 1, 'realism': 5, 'visibility': 1}, {'title': 'Over Kanten (2012)', 'importance': 2, 'realism': 5, 'visibility': 2}, {'title': 'Stay Cool (2009)', 'importance': 1, 'realism': 5, 'visibility': 2}, {'title': 'He's Just Not That Into You (2009)', 'importance': 3, 'realism': 5, 'visibility': 3}]",Apple Aluminum iMac,http://www.starringthecomputer.com//computer.html?c=233,13
80,"[{'title': 'The Pacifier (2005)', 'importance': 3, 'realism': 4, 'visibility': 4}, {'title': 'Lara Croft Tomb Raider: The Cradle of Life (2003)', 'importance': 3, 'realism': 4, 'visibility': 3}, {'title': 'Mr. & Mrs. Smith (2005)', 'importance': 4, 'realism': 4, 'visibility': 4}, {'title': 'Rendition (2007)', 'importance': 3, 'realism': 5, 'visibility': 3}, {'title': 'Shooter (2007)', 'importance': 2, 'realism': 5, 'visibility': 3}, {'title': 'Sahara (2005)', 'importance': 2, 'realism': 5, 'visibility': 3}, {'title': 'Be Cool (2005)', 'importance': 2, 'realism': 5, 'visibility': 3}, {'title': 'Hunt to Kill (2010)', 'importance': 4, 'realism': 3, 'visibility': 4}, {'title': 'Outpost 2: Black Sun (2012)', 'importance': 2, 'realism': 4, 'visibility': 3}, {'title': 'Fast Five (2011)', 'importance': 3, 'realism': 3, 'visibility': 3}, {'title': 'The Martian (2015)', 'importance': 4, 'realism': 3, 'visibility': 3}, {'title': 'Savages (2012)', 'importance': 4, 'realism': 4, 'visibility': 2}]",Panasonic Toughbook,http://www.starringthecomputer.com//computer.html?c=181,12
10,"[{'title': 'The Secret of My Success (1987)', 'importance': 1, 'realism': 5, 'visibility': 4}, {'title': 'Wall Street (1987)', 'importance': 2, 'realism': 5, 'visibility': 2}, {'title': 'The Hunt for Red October (1990)', 'importance': 1, 'realism': 5, 'visibility': 1}, {'title': 'Defense Play (1988)', 'importance': 4, 'realism': 4, 'visibility': 3}, {'title': 'The Naked Gun: From the Files of Police Squad! (1988)', 'importance': 1, 'realism': 5, 'visibility': 2}, {'title': 'Doin' Time on Planet Earth (1988)', 'importance': 4, 'realism': 4, 'visibility': 2}, {'title': 'Twister's Revenge (1987)', 'importance': 5, 'realism': 1, 'visibility': 3}, {'title': 'MacGyver - Season 2 (1986)', 'importance': 4, 'realism': 4, 'visibility': 4}, {'title': 'Tales from the Darkside - Season 4, Episode 9, ""Sorry, Right Number"" (1987)', 'importance': 2, 'realism': 5, 'visibility': 2}, {'title': 'Tales from the Darkside - Season 4, Episode 10, ""Payment Overdue"" (1988)', 'importance': 4, 'realism': 5, 'visibility': 3}]",AT&T PC 6300,http://www.starringthecomputer.com//computer.html?c=49,10
51,"[{'title': 'The Man with the Golden Gun (1974)', 'importance': 2, 'realism': 3, 'visibility': 2}, {'title': 'Blakes 7 - Season 1, Episode 2, ""Space Fall"" (1978)', 'importance': 1, 'realism': 1, 'visibility': 3}, {'title': 'The Pink Panther Strikes Again (1976)', 'importance': 3, 'realism': 2, 'visibility': 2}, {'title': 'Doctor Who - Season 8, ""Terror of the Autons"" (1971)', 'importance': 4, 'realism': 2, 'visibility': 4}, {'title': 'For Your Eyes Only (1981)', 'importance': 1, 'realism': 2, 'visibility': 2}, {'title': 'The Satanic Rites of Dracula (1974)', 'importance': 2, 'realism': 2, 'visibility': 3}, {'title': 'Doctor Who - Season 10, ""The Green Death"" (1973)', 'importance': 5, 'realism': 2, 'visibility': 3}]",ICT 1301,http://www.starringthecomputer.com//computer.html?c=108,7
1,"[{'title': 'Dark Season - Season 1 (1991)', 'importance': 5, 'realism': 2, 'visibility': 4}, {'title': 'Bergerac - Season 8, Episode 11, ""There for the Picking"" (1990)', 'importance': 2, 'realism': 5, 'visibility': 2}, {'title': 'Paddington (2014)', 'importance': 3, 'realism': 4, 'visibility': 4}, {'title': 'Strapless (1989)', 'importance': 2, 'realism': 4, 'visibility': 2}, {'title': 'Watt on Earth - Season 1, Episode 1 (1991)', 'importance': 3, 'realism': 5, 'visibility': 3}, {'title': 'Capital City - Season 1, Episode 2, ""Insider Trading"" (1989)', 'importance': 3, 'realism': 5, 'visibility': 3}]",Acorn Archimedes,http://www.starringthecomputer.com//computer.html?c=68,6
115,"[{'title': 'Her Alibi (1989)', 'importance': 3, 'realism': 5, 'visibility': 4}, {'title': 'Think Big (1989)', 'importance': 2, 'realism': 4, 'visibility': 2}, {'title': 'Step By Step - Season 1, Episode 13, ""Getting Organized"" (1991)', 'importance': 4, 'realism': 3, 'visibility': 3}, {'title': 'She-Devil (1989)', 'importance': 2, 'realism': 4, 'visibility': 3}, {'title': 'Dutch (1991)', 'importance': 2, 'realism': 5, 'visibility': 2}]",Zenith Supersport,http://www.starringthecomputer.com//computer.html?c=82,5
35,"[{'title': 'Micro Men (2009)', 'importance': 1, 'realism': 5, 'visibility': 2}, {'title': 'Parker Lewis Can't Lose - Season 1, Episode 1 (1990)', 'importance': 1, 'realism': 5, 'visibility': 2}, {'title': 'Iron Thunder (1998)', 'importance': 1, 'realism': 4, 'visibility': 1}, {'title': 'Dance Me to My Song (1998)', 'importance': 4, 'realism': 5, 'visibility': 4}, {'title': 'Alice - Season 7, Episode 21, ""Sweet Erasable Mel"" (1982)', 'importance': 5, 'realism': 2, 'visibility': 2}]",Epson HX-20,http://www.starringthecomputer.com//computer.html?c=171,5
90,"[{'title': 'The Monkees - Season 1, Episode 3, ""Monkee vs. Machine"" (1966)', 'importance': 4, 'realism': 1, 'visibility': 3}, {'title': 'I Dream of Jeannie - Season 2, Episode 10, ""The Girl Who Never Had a Birthday"" (1966)', 'importance': 3, 'realism': 1, 'visibility': 2}, {'title': 'City Beneath the Sea (1971)', 'importance': 2, 'realism': 1, 'visibility': 3}, {'title': 'The Time Machine (1978)', 'importance': 4, 'realism': 3, 'visibility': 2}, {'title': 'The Ultimate Imposter (1979)', 'importance': 5, 'realism': 1, 'visibility': 2}]",SDS Sigma 7,http://www.starringthecomputer.com//computer.html?c=199,5
56,"[{'title': 'Arrested Development - Season 3, ""Family Ties"" (2006)', 'importance': 3, 'realism': 3, 'visibility': 4}, {'title': 'Magnum, P.I. - Season 5, Episode 12, ""Little Games"" (1985)', 'importance': 3, 'realism': 3, 'visibility': 2}, {'title': 'Access Code (1984)', 'importance': 3, 'realism': 3, 'visibility': 3}, {'title': 'Computer Chess (2013)', 'importance': 5, 'realism': 5, 'visibility': 2}, {'title': 'Altamont Now (2008)', 'importance': 1, 'realism': 3, 'visibility': 2}]",Kaypro 10,http://www.starringthecomputer.com//computer.html?c=55,5
20,"[{'title': 'Crash and Burn (1990)', 'importance': 4, 'realism': 2, 'visibility': 3}, {'title': 'Klatwa doliny wezy (1987)', 'importance': 1, 'realism': 5, 'visibility': 2}, {'title': 'Stealth Hunters (1991)', 'importance': 3, 'realism': 2, 'visibility': 3}, {'title': 'Matlock - Season 1, Episode 14, ""The Author"" (1987)', 'importance': 3, 'realism': 5, 'visibility': 2}, {'title': 'Like Father Like Son (1987)', 'importance': 1, 'realism': 5, 'visibility': 2}]",Commodore 128,http://www.starringthecomputer.com//computer.html?c=150,5


In [55]:
mean_importance_list = []
mean_realism_list = []
mean_visibility_list = []
    
for each in df.appearances:
    print(each)
    mean_importance = 0
    mean_realism = 0
    mean_visibility = 0
    for n in each:
        mean_importance += n['importance']
        mean_realism += n['realism']
        mean_visibility += n['visibility']
        # print(n['importance'])
    print("the sum is ", mean_importance)
    print('len of each is', len(each))
    mean_importance = mean_importance/len(each)
    mean_realism = mean_realism/len(each)
    mean_visibility = mean_visibility/len(each)
    
    mean_importance_list.append(mean_importance)
    mean_realism_list.append(mean_realism)
    mean_visibility_list.append(mean_visibility)
        

[{'title': 'Runaway (1984)', 'importance': 1, 'realism': 4, 'visibility': 3}]
the sum is  1
len of each is 1
[{'title': 'Dark Season - Season 1 (1991)', 'importance': 5, 'realism': 2, 'visibility': 4}, {'title': 'Bergerac - Season 8, Episode 11, "There for the Picking" (1990)', 'importance': 2, 'realism': 5, 'visibility': 2}, {'title': 'Paddington (2014)', 'importance': 3, 'realism': 4, 'visibility': 4}, {'title': 'Strapless (1989)', 'importance': 2, 'realism': 4, 'visibility': 2}, {'title': 'Watt on Earth - Season 1, Episode 1 (1991)', 'importance': 3, 'realism': 5, 'visibility': 3}, {'title': 'Capital City - Season 1, Episode 2, "Insider Trading" (1989)', 'importance': 3, 'realism': 5, 'visibility': 3}]
the sum is  18
len of each is 6
[{'title': 'Undercover Grandpa (2017)', 'importance': 3, 'realism': 5, 'visibility': 3}]
the sum is  3
len of each is 1
[{'title': 'The IT Crowd - Season 3 (2008)', 'importance': 1, 'realism': 4, 'visibility': 1}, {'title': 'Micro Men (2009)', 'importan

In [59]:
df['mean_importance'] = mean_importance_list
df['mean_visibility'] = mean_visibility_list
df['mean_realism'] = mean_realism_list

In [60]:
df

Unnamed: 0,appearances,computer,url,appearance_count,mean_importance,mean_visibility,mean_realism
0,"[{'title': 'Runaway (1984)', 'importance': 1, 'realism': 4, 'visibility': 3}]",Acer MPF-1,http://www.starringthecomputer.com//computer.html?c=172,1,1.000000,3.000000,4.000000
1,"[{'title': 'Dark Season - Season 1 (1991)', 'importance': 5, 'realism': 2, 'visibility': 4}, {'title': 'Bergerac - Season 8, Episode 11, ""There for the Picking"" (1990)', 'importance': 2, 'realism': 5, 'visibility': 2}, {'title': 'Paddington (2014)', 'importance': 3, 'realism': 4, 'visibility': 4}, {'title': 'Strapless (1989)', 'importance': 2, 'realism': 4, 'visibility': 2}, {'title': 'Watt on Earth - Season 1, Episode 1 (1991)', 'importance': 3, 'realism': 5, 'visibility': 3}, {'title': 'Capital City - Season 1, Episode 2, ""Insider Trading"" (1989)', 'importance': 3, 'realism': 5, 'visibility': 3}]",Acorn Archimedes,http://www.starringthecomputer.com//computer.html?c=68,6,3.000000,3.000000,4.166667
2,"[{'title': 'Undercover Grandpa (2017)', 'importance': 3, 'realism': 5, 'visibility': 3}]",Alienware 15,http://www.starringthecomputer.com//computer.html?c=416,1,3.000000,3.000000,5.000000
3,"[{'title': 'The IT Crowd - Season 3 (2008)', 'importance': 1, 'realism': 4, 'visibility': 1}, {'title': 'Micro Men (2009)', 'importance': 1, 'realism': 4, 'visibility': 1}]",Amstrad CPC 464,http://www.starringthecomputer.com//computer.html?c=140,2,1.000000,1.000000,4.000000
4,"[{'title': 'The Proposal (2009)', 'importance': 1, 'realism': 5, 'visibility': 3}, {'title': 'Flickan som lekte med elden (2009)', 'importance': 2, 'realism': 5, 'visibility': 2}, {'title': 'Luftslottet som sprängdes (2009)', 'importance': 1, 'realism': 5, 'visibility': 2}, {'title': 'The Stepfather (2009)', 'importance': 3, 'realism': 4, 'visibility': 3}, {'title': 'Chloe (2009)', 'importance': 2, 'realism': 5, 'visibility': 3}, {'title': 'Bedtime Stories (2008)', 'importance': 1, 'realism': 5, 'visibility': 2}, {'title': 'The Spy Next Door (2010)', 'importance': 4, 'realism': 3, 'visibility': 3}, {'title': 'Tamara Drew (2010)', 'importance': 3, 'realism': 5, 'visibility': 4}, {'title': 'The Mechanic (2011)', 'importance': 3, 'realism': 5, 'visibility': 3}, {'title': 'The Descendants (2011)', 'importance': 1, 'realism': 5, 'visibility': 1}, {'title': 'Over Kanten (2012)', 'importance': 2, 'realism': 5, 'visibility': 2}, {'title': 'Stay Cool (2009)', 'importance': 1, 'realism': 5, 'visibility': 2}, {'title': 'He's Just Not That Into You (2009)', 'importance': 3, 'realism': 5, 'visibility': 3}]",Apple Aluminum iMac,http://www.starringthecomputer.com//computer.html?c=233,13,2.076923,2.538462,4.769231
5,"[{'title': 'Kung Fury (2015)', 'importance': 5, 'realism': 1, 'visibility': 4}]",Applied Technologies MicroBee 32,http://www.starringthecomputer.com//computer.html?c=360,1,5.000000,4.000000,1.000000
6,"[{'title': 'Bird of Prey 2 (1984)', 'importance': 3, 'realism': 5, 'visibility': 3}]",Apricot PC,http://www.starringthecomputer.com//computer.html?c=258,1,3.000000,3.000000,5.000000
7,"[{'title': 'Undercover Grandpa (2017)', 'importance': 3, 'realism': 3, 'visibility': 2}]",Arduino Duemilanove,http://www.starringthecomputer.com//computer.html?c=415,1,3.000000,2.000000,3.000000
8,"[{'title': 'Run Silent Run Deep (1958)', 'importance': 4, 'realism': 5, 'visibility': 3}]",Arma Torpedo Data Computer,http://www.starringthecomputer.com//computer.html?c=253,1,4.000000,3.000000,5.000000
9,"[{'title': 'The IT Crowd - Season 3, Episode 5, ""Facefriend"" (2008)', 'importance': 3, 'realism': 5, 'visibility': 4}]",Asus EEE PC,http://www.starringthecomputer.com//computer.html?c=142,1,3.000000,4.000000,5.000000


In [61]:
df.to_csv('moviedata_cleaned.csv', index=False)

In [63]:
df.sort_values(by='appearance_count', ascending=False)

Unnamed: 0,appearances,computer,url,appearance_count,mean_importance,mean_visibility,mean_realism
4,"[{'title': 'The Proposal (2009)', 'importance': 1, 'realism': 5, 'visibility': 3}, {'title': 'Flickan som lekte med elden (2009)', 'importance': 2, 'realism': 5, 'visibility': 2}, {'title': 'Luftslottet som sprängdes (2009)', 'importance': 1, 'realism': 5, 'visibility': 2}, {'title': 'The Stepfather (2009)', 'importance': 3, 'realism': 4, 'visibility': 3}, {'title': 'Chloe (2009)', 'importance': 2, 'realism': 5, 'visibility': 3}, {'title': 'Bedtime Stories (2008)', 'importance': 1, 'realism': 5, 'visibility': 2}, {'title': 'The Spy Next Door (2010)', 'importance': 4, 'realism': 3, 'visibility': 3}, {'title': 'Tamara Drew (2010)', 'importance': 3, 'realism': 5, 'visibility': 4}, {'title': 'The Mechanic (2011)', 'importance': 3, 'realism': 5, 'visibility': 3}, {'title': 'The Descendants (2011)', 'importance': 1, 'realism': 5, 'visibility': 1}, {'title': 'Over Kanten (2012)', 'importance': 2, 'realism': 5, 'visibility': 2}, {'title': 'Stay Cool (2009)', 'importance': 1, 'realism': 5, 'visibility': 2}, {'title': 'He's Just Not That Into You (2009)', 'importance': 3, 'realism': 5, 'visibility': 3}]",Apple Aluminum iMac,http://www.starringthecomputer.com//computer.html?c=233,13,2.076923,2.538462,4.769231
80,"[{'title': 'The Pacifier (2005)', 'importance': 3, 'realism': 4, 'visibility': 4}, {'title': 'Lara Croft Tomb Raider: The Cradle of Life (2003)', 'importance': 3, 'realism': 4, 'visibility': 3}, {'title': 'Mr. & Mrs. Smith (2005)', 'importance': 4, 'realism': 4, 'visibility': 4}, {'title': 'Rendition (2007)', 'importance': 3, 'realism': 5, 'visibility': 3}, {'title': 'Shooter (2007)', 'importance': 2, 'realism': 5, 'visibility': 3}, {'title': 'Sahara (2005)', 'importance': 2, 'realism': 5, 'visibility': 3}, {'title': 'Be Cool (2005)', 'importance': 2, 'realism': 5, 'visibility': 3}, {'title': 'Hunt to Kill (2010)', 'importance': 4, 'realism': 3, 'visibility': 4}, {'title': 'Outpost 2: Black Sun (2012)', 'importance': 2, 'realism': 4, 'visibility': 3}, {'title': 'Fast Five (2011)', 'importance': 3, 'realism': 3, 'visibility': 3}, {'title': 'The Martian (2015)', 'importance': 4, 'realism': 3, 'visibility': 3}, {'title': 'Savages (2012)', 'importance': 4, 'realism': 4, 'visibility': 2}]",Panasonic Toughbook,http://www.starringthecomputer.com//computer.html?c=181,12,3.000000,3.166667,4.083333
10,"[{'title': 'The Secret of My Success (1987)', 'importance': 1, 'realism': 5, 'visibility': 4}, {'title': 'Wall Street (1987)', 'importance': 2, 'realism': 5, 'visibility': 2}, {'title': 'The Hunt for Red October (1990)', 'importance': 1, 'realism': 5, 'visibility': 1}, {'title': 'Defense Play (1988)', 'importance': 4, 'realism': 4, 'visibility': 3}, {'title': 'The Naked Gun: From the Files of Police Squad! (1988)', 'importance': 1, 'realism': 5, 'visibility': 2}, {'title': 'Doin' Time on Planet Earth (1988)', 'importance': 4, 'realism': 4, 'visibility': 2}, {'title': 'Twister's Revenge (1987)', 'importance': 5, 'realism': 1, 'visibility': 3}, {'title': 'MacGyver - Season 2 (1986)', 'importance': 4, 'realism': 4, 'visibility': 4}, {'title': 'Tales from the Darkside - Season 4, Episode 9, ""Sorry, Right Number"" (1987)', 'importance': 2, 'realism': 5, 'visibility': 2}, {'title': 'Tales from the Darkside - Season 4, Episode 10, ""Payment Overdue"" (1988)', 'importance': 4, 'realism': 5, 'visibility': 3}]",AT&T PC 6300,http://www.starringthecomputer.com//computer.html?c=49,10,2.800000,2.600000,4.300000
51,"[{'title': 'The Man with the Golden Gun (1974)', 'importance': 2, 'realism': 3, 'visibility': 2}, {'title': 'Blakes 7 - Season 1, Episode 2, ""Space Fall"" (1978)', 'importance': 1, 'realism': 1, 'visibility': 3}, {'title': 'The Pink Panther Strikes Again (1976)', 'importance': 3, 'realism': 2, 'visibility': 2}, {'title': 'Doctor Who - Season 8, ""Terror of the Autons"" (1971)', 'importance': 4, 'realism': 2, 'visibility': 4}, {'title': 'For Your Eyes Only (1981)', 'importance': 1, 'realism': 2, 'visibility': 2}, {'title': 'The Satanic Rites of Dracula (1974)', 'importance': 2, 'realism': 2, 'visibility': 3}, {'title': 'Doctor Who - Season 10, ""The Green Death"" (1973)', 'importance': 5, 'realism': 2, 'visibility': 3}]",ICT 1301,http://www.starringthecomputer.com//computer.html?c=108,7,2.571429,2.714286,2.000000
1,"[{'title': 'Dark Season - Season 1 (1991)', 'importance': 5, 'realism': 2, 'visibility': 4}, {'title': 'Bergerac - Season 8, Episode 11, ""There for the Picking"" (1990)', 'importance': 2, 'realism': 5, 'visibility': 2}, {'title': 'Paddington (2014)', 'importance': 3, 'realism': 4, 'visibility': 4}, {'title': 'Strapless (1989)', 'importance': 2, 'realism': 4, 'visibility': 2}, {'title': 'Watt on Earth - Season 1, Episode 1 (1991)', 'importance': 3, 'realism': 5, 'visibility': 3}, {'title': 'Capital City - Season 1, Episode 2, ""Insider Trading"" (1989)', 'importance': 3, 'realism': 5, 'visibility': 3}]",Acorn Archimedes,http://www.starringthecomputer.com//computer.html?c=68,6,3.000000,3.000000,4.166667
115,"[{'title': 'Her Alibi (1989)', 'importance': 3, 'realism': 5, 'visibility': 4}, {'title': 'Think Big (1989)', 'importance': 2, 'realism': 4, 'visibility': 2}, {'title': 'Step By Step - Season 1, Episode 13, ""Getting Organized"" (1991)', 'importance': 4, 'realism': 3, 'visibility': 3}, {'title': 'She-Devil (1989)', 'importance': 2, 'realism': 4, 'visibility': 3}, {'title': 'Dutch (1991)', 'importance': 2, 'realism': 5, 'visibility': 2}]",Zenith Supersport,http://www.starringthecomputer.com//computer.html?c=82,5,2.600000,2.800000,4.200000
35,"[{'title': 'Micro Men (2009)', 'importance': 1, 'realism': 5, 'visibility': 2}, {'title': 'Parker Lewis Can't Lose - Season 1, Episode 1 (1990)', 'importance': 1, 'realism': 5, 'visibility': 2}, {'title': 'Iron Thunder (1998)', 'importance': 1, 'realism': 4, 'visibility': 1}, {'title': 'Dance Me to My Song (1998)', 'importance': 4, 'realism': 5, 'visibility': 4}, {'title': 'Alice - Season 7, Episode 21, ""Sweet Erasable Mel"" (1982)', 'importance': 5, 'realism': 2, 'visibility': 2}]",Epson HX-20,http://www.starringthecomputer.com//computer.html?c=171,5,2.400000,2.200000,4.200000
90,"[{'title': 'The Monkees - Season 1, Episode 3, ""Monkee vs. Machine"" (1966)', 'importance': 4, 'realism': 1, 'visibility': 3}, {'title': 'I Dream of Jeannie - Season 2, Episode 10, ""The Girl Who Never Had a Birthday"" (1966)', 'importance': 3, 'realism': 1, 'visibility': 2}, {'title': 'City Beneath the Sea (1971)', 'importance': 2, 'realism': 1, 'visibility': 3}, {'title': 'The Time Machine (1978)', 'importance': 4, 'realism': 3, 'visibility': 2}, {'title': 'The Ultimate Imposter (1979)', 'importance': 5, 'realism': 1, 'visibility': 2}]",SDS Sigma 7,http://www.starringthecomputer.com//computer.html?c=199,5,3.600000,2.400000,1.400000
56,"[{'title': 'Arrested Development - Season 3, ""Family Ties"" (2006)', 'importance': 3, 'realism': 3, 'visibility': 4}, {'title': 'Magnum, P.I. - Season 5, Episode 12, ""Little Games"" (1985)', 'importance': 3, 'realism': 3, 'visibility': 2}, {'title': 'Access Code (1984)', 'importance': 3, 'realism': 3, 'visibility': 3}, {'title': 'Computer Chess (2013)', 'importance': 5, 'realism': 5, 'visibility': 2}, {'title': 'Altamont Now (2008)', 'importance': 1, 'realism': 3, 'visibility': 2}]",Kaypro 10,http://www.starringthecomputer.com//computer.html?c=55,5,3.000000,2.600000,3.400000
20,"[{'title': 'Crash and Burn (1990)', 'importance': 4, 'realism': 2, 'visibility': 3}, {'title': 'Klatwa doliny wezy (1987)', 'importance': 1, 'realism': 5, 'visibility': 2}, {'title': 'Stealth Hunters (1991)', 'importance': 3, 'realism': 2, 'visibility': 3}, {'title': 'Matlock - Season 1, Episode 14, ""The Author"" (1987)', 'importance': 3, 'realism': 5, 'visibility': 2}, {'title': 'Like Father Like Son (1987)', 'importance': 1, 'realism': 5, 'visibility': 2}]",Commodore 128,http://www.starringthecomputer.com//computer.html?c=150,5,2.400000,2.400000,3.800000
