In [42]:
# Python 2 & 3 Compatibility
from __future__ import print_function, division

Our standard imports

In [44]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

## First, an HTML refresher
HTML is the basic language used to create a web page. 

It tells the web browser what text/media to display, where to display it, and how to display it (style)

HTML is very structured/hirarchical. 

Every page is made up of discrete "elements."

Elements are labeled with "tags."

For example:

    <p>You are beginning to learn HTML.</p>

A start tag also often contains "attributes" with info about the element.

Attributes usually have a name and value.

Example:

    <p class="my_red_sentences">You are beginning to learn HTML.</p>

A full HTML document has a structure more like this:

```
<html> 
  <head> </head>
  <body>
     <p class="red">You are beginning to learn HTML.</p>
     <h1> This is a header </h1>
     <a href="www.google.com"> Some link </a>
  </body>
</html>
```

Let's explore some live HTML!

Go to http://boxofficemojo.com/movies/?id=biglebowski.htm in your browser,
click Inspect Element, also click on View Page Source.




In [47]:
#conda install beautifulsoup4
!pip install beautifulsoup4
from bs4 import BeautifulSoup



### Get the HTML from a page and convert to a BeautifulSoup object

We'll start by scraping some of that information about [The Big Lebowski](http://boxofficemojo.com/movies/?id=biglebowski.htm).

In [52]:
# if needed: pip install requests
!pip install requests

import requests

# Step 1 - go to the website
url = 'http://boxofficemojo.com/movies/?id=biglebowski.htm'

response = requests.get(url) # response object with lots of attributes



For information on HTTP status codes, see:

https://en.wikipedia.org/wiki/List_of_HTTP_status_codes

In [49]:
# high level summary of request (200 = ok, 404 = not found)
# 200-level is okay
response.status_code 

200

In [50]:
# print out actual response in text
print(response.text)

<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html lang="en">
<head>
<meta http-equiv="Content-type" content="text/html;charset=iso-8859-1">
<title>The Big Lebowski (1998) - Box Office Mojo</title>

<style type="text/css">
table.chart-wide { width: 100%; }
</style>
<META name="keywords" content="the big lebowski, movie, film, box office, result, records, charts, revenue, opening weekend, gross, worldwide, overseas, foreign, news, reviews, articles, stories, story, analysis, revenue, release date, mpaa rating, genre, running time, length, budget, production budget, distributor, studio, gramercy, theatrical summary, theatrical, showtimes, tickets, show times, theaters, playing, weekend box office results, weekly box office, weekly box office, similar movies, box office mojo">
<META name="description" content="The Big Lebowski summary of box office results, charts and release information and related links.">

<link rel="stylesheet"

In [53]:
# turn response.text into a BeautifulSoup object
page = response.text
soup = BeautifulSoup(page)
print(soup)

<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html lang="en">
<head>
<meta content="text/html;charset=utf-8" http-equiv="Content-type"/>
<title>The Big Lebowski (1998) - Box Office Mojo</title>
<style type="text/css">
table.chart-wide { width: 100%; }
</style>
<meta content="the big lebowski, movie, film, box office, result, records, charts, revenue, opening weekend, gross, worldwide, overseas, foreign, news, reviews, articles, stories, story, analysis, revenue, release date, mpaa rating, genre, running time, length, budget, production budget, distributor, studio, gramercy, theatrical summary, theatrical, showtimes, tickets, show times, theaters, playing, weekend box office results, weekly box office, weekly box office, similar movies, box office mojo" name="keywords"/>
<meta content="The Big Lebowski summary of box office results, charts and release information and related links." name="description"/>
<link charset="utf-8" href



 BeautifulSoup([your markup])

to this:

 BeautifulSoup([your markup], "lxml")

  markup_type=markup_type))


In [54]:
# prettier output
print(soup.prettify())

<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html lang="en">
 <head>
  <meta content="text/html;charset=utf-8" http-equiv="Content-type"/>
  <title>
   The Big Lebowski (1998) - Box Office Mojo
  </title>
  <style type="text/css">
   table.chart-wide { width: 100%; }
  </style>
  <meta content="the big lebowski, movie, film, box office, result, records, charts, revenue, opening weekend, gross, worldwide, overseas, foreign, news, reviews, articles, stories, story, analysis, revenue, release date, mpaa rating, genre, running time, length, budget, production budget, distributor, studio, gramercy, theatrical summary, theatrical, showtimes, tickets, show times, theaters, playing, weekend box office results, weekly box office, weekly box office, similar movies, box office mojo" name="keywords"/>
  <meta content="The Big Lebowski summary of box office results, charts and release information and related links." name="description"/>
  <

## `soup.find()`

`soup.find()` is the most common function we will use from this package.  

Let's try out some common variations of `soup.find()`

In [55]:
# soup.find() returns the first matched tag it finds.
# It searches the entire tree.

# Search for a type of tag by using the tag as a string
# (like 'body','div','p','a') as an argument.

print(soup.find('a')) # returns the first 'a' tag

<a href="/daily/chart/">Daily Box Office (Sun.)</a>


In [10]:
# Equivalently:
print(soup.a)

<a href="/daily/chart/">Daily Box Office (Sun.)</a>


In [11]:
# soup.find_all() returns a list of all matches

for link in soup.find_all('a'): 
    print(link)

<a href="/daily/chart/">Daily Box Office (Sun.)</a>
<a href="/weekend/chart/">Weekend Box Office (Sep. 23–25)</a>
<a href="/movies/?id=themagnificentseven.htm">#1 Movie: 'The Magnificent Seven'</a>
<a href="http://www.imdb.com/showtimes/?ref_=mojo">Showtimes</a>
<a href="/"><img alt="Box Office Mojo" height="56" src="/img/misc/bom_logo1.png" width="245"/></a>
<a href="http://facebook.com/boxofficemojo" style="vertical-align:middle;"><img alt="Facebook Logo" border="0" src="/images/FaceBook_16x16.png"/>Facebook</a>
<a href="http://twitter.com/boxofficemojo" style="vertical-align:middle;"><img alt="Twitter Logo" border="0" src="/images/Twitter_16x16.png"/>Twitter</a>
<a href="/news/">News</a>
<a href="/schedule/">Release Sched.</a>
<a href="http://www.imdb.com/showtimes?ref_=mojo">Showtimes<br/><span style="margin-left:7px;color: #999999; font-size: 9px;">at <img src="/images/mojo_imdb_sm.png"/></span></a>
<a href="/daily/">Daily</a>
<a href="/weekend/">Weekend</a>
<a href="/weekly/">Wee

In [56]:
# retrieve the url from an anchor tag
soup.find('a')['href'] # 

'/daily/chart/'

In [57]:
# You can match on an attribute like an id or class.
# Take a look at what the 'mp_box_content' classes
# look like on the webpage, with Inspect Element.

for element in soup.find_all(class_='mp_box_content'):
    print(element, '\n')

<div class="mp_box_content">
<table border="0" cellpadding="0" cellspacing="0">
<tr>
<td width="40%"><b>Domestic:</b></td>
<td align="right" width="35%"> <b>$17,451,873</b></td>
</tr>
</table>
</div> 

<div class="mp_box_content">
<table border="0" cellpadding="0" cellspacing="0">
<tr>
<td align="center"><a href="/weekend/chart/?yr=1998&amp;wknd=10&amp;p=.htm">Opening Weekend:</a></td><td> $5,533,844</td></tr>
<tr>
<td align="center" colspan="2"><font size="2">(#6 rank, 1,207 theaters, $4,585 average)</font></td></tr>
<tr>
<td align="right">% of Total Gross:</td><td> 31.7%</td></tr>
<tr><td align="right" colspan="2"><font face="Helvetica, Arial, Sans-Serif" size="1"><a href="/movies/?page=weekend&amp;id=biglebowski.htm"><b>&gt; View All 4 Weekends</b></a></font></td></tr>
</table>
<table border="0" cellpadding="0" cellspacing="0">
<tr>
<td>Widest Release:</td>
<td> 1,235 theaters</td>
</tr>
</table>
</div> 

<div class="mp_box_content">
<table>
<tr><td align="right" valign="top"><font 

In [14]:
# We can find all the columns in the first mp_box_content table
# by "chaining" `find` and `find_all`.
# - do soup.find, then
# - find_all to get list of all columns
print(soup.find(class_='mp_box_content').find_all('td'))

[<td width="40%"><b>Domestic:</b></td>, <td align="right" width="35%"> <b>$17,451,873</b></td>]


In [58]:
# To extract just the value of interest:

soup.find(class_='mp_box_content').find_all('td')[1].text

'\xa0$17,451,873'

In [16]:
# find with an "id". (ID is unique.)

print(soup.find(id='hp_footer')) # all the footers of the site

<div id="hp_footer">
<div style="padding-bottom: 20px;">
<div style="margin: 0px 121px; vertical-align: top;">
<div id="footer_links">
<ul class="footer_link_list">
<li><strong>Latest Updates</strong></li>
<li><a href="/news/?ref=ft">Movie News</a>
</li><li><a href="/daily/chart/?ref=ft">Daily Chart</a></li>
<li><a href="/weekend/chart/?ref=ft">Weekend Chart</a></li>
<li><a href="/alltime/?ref=ft">All Time Charts</a></li>
<li><a href="/intl/?ref=ft">International Charts</a></li>
</ul>
<!--
					<ul class="footer_link_list">
						<li><strong>Popular Movies</strong></li>
											</ul>
					-->
<ul class="footer_link_list">
<li><strong>Indices</strong></li>
<li><a href="/people/?ref=ft">People</a></li>
<li><a href="/genres/?ref=ft">Genres</a></li>
<li><a href="/franchises/?ref=ft">Franchises</a></li>
<li><a href="/showdowns/?ref=ft">Showdowns</a></li>
</ul>
<ul class="footer_link_list">
<li><strong>Other</strong></li>
<li><a href="/about/?ref=ft">About This Site</a></li>
<li><a href="

### Consistency Web scraping is made simple by the consistent format of information among like pages of a website.
### Items to scrape for each movie:
movie title
total domestic gross
release date
runtime
rating

In [17]:
# Movie Title

print(soup.find('title'))

<title>The Big Lebowski (1998) - Box Office Mojo</title>


In [18]:
# extract the text only (remove the title tags)
title_string = soup.find('title').text
print(title_string)

The Big Lebowski (1998) - Box Office Mojo


In [59]:
print(title_string.split('(')) # split to get before ( and after (

['The Big Lebowski ', '1998) - Box Office Mojo']


In [60]:
title = title_string.split('(')[0].strip()
print(title)

The Big Lebowski


In [21]:
# Domestic Total Gross
# - you need the text content (not the title tag)
## text does an exact match search!
print(soup.find(text="Domestic Total Gross")) # None b/c missing ':'

None


In [61]:
# You could find a perfect match:

print(soup.find(text="Domestic Total Gross: "))

Domestic Total Gross: 


#### You could also use regular expressions
![regular expressions](http://imgs.xkcd.com/comics/regular_expressions.png)

[Handy Tool for making RegEx](http://pythex.org/)

In [62]:
# can search using regex (does not need to be exact match)
import re
domestic_total_regex = re.compile('Domestic Total')
soup.find(text=domestic_total_regex)

'Domestic Total Gross: '

In [63]:
dtg_string = soup.find(text=re.compile('Domestic Total'))
print(dtg_string)

Domestic Total Gross: 


In [25]:
# the value closest to 'Domestic Total Gross' (next element within same table)
print(dtg_string.findNextSibling())

<b>$17,451,873</b>


In [64]:
# isolate the dollar value and cast as integer
dtg = dtg_string.findNextSibling().text
dtg = dtg.replace('$','').replace(',','')
domestic_total_gross = int(dtg)
print(domestic_total_gross)

17451873


### We can actually do several of these using the text matching method, so let's make a function for that

In [65]:
# pass a soup object and a field name
def get_movie_value(soup, field_name):
    '''Grab a value from boxofficemojo HTML
    
    Takes a string attribute of a movie on the page and
    returns the string in the next sibling object
    (the value for that attribute)
    or None if nothing is found.
    '''
    obj = soup.find(text=re.compile(field_name))
    if not obj: 
        return None
    # this works for most of the values
    next_sibling = obj.findNextSibling()
    if next_sibling:
        return next_sibling.text 
    else:
        return None

In [66]:
# domestic total gross
dtg = get_movie_value(soup,'Domestic Total')
print(dtg)

$17,451,873


In [67]:
# runtime
runtime = get_movie_value(soup,'Runtime')
print(runtime)

1 hrs. 57 min.


In [68]:
# rating
rating = get_movie_value(soup,'MPAA Rating')
print(rating)

R


In [69]:
release_date = get_movie_value(soup,'Release Date')
print(release_date)

March 6, 1998


### We need a few helper methods to parse the strings we've gotten

In [70]:
import dateutil.parser

def to_date(datestring):
    date = dateutil.parser.parse(datestring)
    return date

def money_to_int(moneystring):
    moneystring = moneystring.replace('$', '').replace(',', '')
    return int(moneystring)

def runtime_to_minutes(runtimestring):
    runtime = runtimestring.split()
    try:
        minutes = int(runtime[0])*60 + int(runtime[2])
        return minutes
    except:
        return None

In [71]:
# Let's get these again and format them all in one swoop

from pprint import pprint

raw_release_date = get_movie_value(soup,'Release Date')
release_date = to_date(raw_release_date)

raw_domestic_total_gross = get_movie_value(soup,'Domestic Total')
domestic_total_gross = money_to_int(raw_domestic_total_gross)

raw_runtime = get_movie_value(soup,'Runtime')
runtime = runtime_to_minutes(raw_runtime)

headers = ['movie title', 'domestic total gross',
           'release date', 'runtime (mins)', 'rating']

movie_data = []
movie_dict = dict(zip(headers, [title,
                                domestic_total_gross,
                                release_date,
                                runtime,
                                rating]))
movie_data.append(movie_dict)

pprint(movie_data)

[{'domestic total gross': 17451873,
  'movie title': 'The Big Lebowski',
  'rating': 'R',
  'release date': datetime.datetime(1998, 3, 6, 0, 0),
  'runtime (mins)': 117}]


In [72]:
# can also put data into a df
pd.DataFrame(movie_data)

Unnamed: 0,domestic total gross,movie title,rating,release date,runtime (mins)
0,17451873,The Big Lebowski,R,1998-03-06,117


### What about scraping tables? 

In [73]:
# retrieve a whole table
response = requests.get("https://en.wikipedia.org/wiki/List_of_highest-grossing_films")
soup = BeautifulSoup(response.text)

movie_list = soup.find("table",{ "class" : "wikitable sortable plainrowheaders" })
print(movie_list)



 BeautifulSoup([your markup])

to this:

 BeautifulSoup([your markup], "lxml")

  markup_type=markup_type))


<table class="wikitable sortable plainrowheaders" style="margin:auto; margin:auto;">
<caption>Highest-grossing films<sup class="reference" id="cite_ref-13"><a href="#cite_note-13">[13]</a></sup></caption>
<tr>
<th scope="col">Rank</th>
<th scope="col"><span style="border-bottom:1px dotted" title="Highest position attained in the chart">Peak</span></th>
<th scope="col">Title</th>
<th scope="col">Worldwide gross</th>
<th scope="col">Year</th>
<th class="unsortable" scope="col">Reference(s)</th>
</tr>
<tr>
<td>1</td>
<td>1</td>
<th scope="row"><i><a href="/wiki/Avatar_(2009_film)" title="Avatar (2009 film)">Avatar</a></i></th>
<td align="right">$2,787,965,087</td>
<td data-sort-value="2009-12" style="text-align:center;">2009</td>
<td style="text-align:center;"><sup class="reference" id="cite_ref-avatar_14-0"><a href="#cite_note-avatar-14">[# 1]</a></sup><sup class="reference" id="cite_ref-avatar_peak_15-0"><a href="#cite_note-avatar_peak-15">[# 2]</a></sup></td>
</tr>
<tr>
<td>2</td>
<td>

In [35]:
# iterate through each row, and each col/(elem of row)
# for each row, find all the <td>
# note that title column has tag <th> <- scrape 
movie_data = []

header = ['Rank','Peak','Worldwide Gross','Year','Reference(s)']
for row in movie_list.findAll("tr"):
    row_dict={}
    for i,cell in enumerate(row.findAll("td")):
        row_dict[header[i]] = cell.find(text=True)
    link = row.find("th").find("a")
    if link:
        row_dict['Title'] = link.text
    movie_data.append(row_dict)
    
movies_df = pd.DataFrame(movie_data)
movies_df.dropna()

Unnamed: 0,Peak,Rank,Reference(s),Title,Worldwide Gross,Year
1,1,1,[# 1],Avatar,"$2,787,965,087",2009
2,1,2,[# 3],Titanic,"$2,186,772,302",1997
3,3,3,[# 5],Star Wars: The Force Awakens,"$2,068,223,624",2015
4,3,4,[# 7],Jurassic World,"$1,670,400,637",2015
5,3,5,[# 9],The Avengers,"$1,519,557,910",2012
6,4,6,[# 11],Furious 7,"$1,516,045,911",2015
7,5,7,[# 13],Avengers: Age of Ultron,"$1,405,413,868",2015
8,3,8,[# 14],Harry Potter and the Deathly Hallows – Part 2,"$1,341,511,219",2011
9,5,9,[# 16],Frozen,"$1,287,000,000",2013
10,5,10,[# 18],Iron Man 3,"$1,215,439,994",2013


### Scraping all relevant movie links on a page

In [36]:
# all time domestic total gross adjusted (one of the indices in boxofficemojo)
url2 = 'http://www.boxofficemojo.com/alltime/adjusted.htm'
soup2 = BeautifulSoup(requests.get(url2).text,'html.parser')

In [37]:
# note that the format of all the movie links on this page:
# (http://www.boxofficemojo.com/alltime/adjusted.htm)
# is: http://www.boxofficemojo.com/movies/?id={}.htm 
all_links = soup2.findAll('a',href = re.compile('/movies/\?*id='))
for link in all_links:
    print (link['href'])

/movies/?id=themagnificentseven.htm
/movies/?id=gonewiththewind.htm
/movies/?id=starwars4.htm
/movies/?id=soundofmusic.htm
/movies/?id=et.htm
/movies/?id=titanic.htm
/movies/?id=tencommandments.htm
/movies/?id=jaws.htm
/movies/?id=doctorzhivago.htm
/movies/?id=exorcist.htm
/movies/?id=snowwhite.htm
/movies/?id=starwars7.htm
/movies/?id=101dalmations.htm
/movies/?id=starwars5.htm
/movies/?id=benhur.htm
/movies/?id=avatar.htm
/movies/?id=starwars6.htm
/movies/?id=jurassicpark.htm
/movies/?id=starwars.htm
/movies/?id=lionking.htm
/movies/?id=sting.htm
/movies/?id=raidersofthelostark.htm
/movies/?id=graduate.htm
/movies/?id=fantasia.htm
/movies/?id=jurassicpark4.htm
/movies/?id=godfather.htm
/movies/?id=forrestgump.htm
/movies/?id=marypoppins.htm
/movies/?id=grease.htm
/movies/?id=avengers11.htm
/movies/?id=thunderball.htm
/movies/?id=darkknight.htm
/movies/?id=junglebook.htm
/movies/?id=sleepingbeauty.htm
/movies/?id=ghostbusters.htm
/movies/?id=shrek2.htm
/movies/?id=butchcassidyandthesu

In [None]:
# Pandas 

In [None]:
tables = pd.read_html("some link with tables")
# table 
tables[2]

# Scraping the Unscrapable

### What happens if I try to parse my gmail with `requests` and `BeautifulSoup`?

In [38]:
import requests
from bs4 import BeautifulSoup

gmail_url="https://mail.google.com"
soup=BeautifulSoup(requests.get(gmail_url).text)
print(soup.prettify())

<!DOCTYPE html>
<html lang="en">
 <head>
  <meta charset="utf-8"/>
  <meta content="width=300, initial-scale=1" name="viewport"/>
  <meta content="Gmail is email that's intuitive, efficient, and useful. 15 GB of storage, less spam, and mobile access." name="description"/>
  <meta content="LrdTUW9psUAMbh4Ia074-BPEVmcpBxF6Gwf0MSgQXZs" name="google-site-verification"/>
  <title>
   Gmail
  </title>
  <style>
   @font-face {
  font-family: 'Open Sans';
  font-style: normal;
  font-weight: 300;
  src: local('Open Sans Light'), local('OpenSans-Light'), url(//fonts.gstatic.com/s/opensans/v13/DXI1ORHCpsQm3Vp6mXoaTYnF5uFdDttMLvmWuJdhhgs.ttf) format('truetype');
}
@font-face {
  font-family: 'Open Sans';
  font-style: normal;
  font-weight: 400;
  src: local('Open Sans'), local('OpenSans'), url(//fonts.gstatic.com/s/opensans/v13/cJZKeOuBrn4kERxqtaUH3aCWcynf_cDxXwCLxiixG1c.ttf) format('truetype');
}
  </style>
  <style>
   h1, h2 {
  -webkit-animation-duration: 0.1s;
  -webkit-animation-name: fon



 BeautifulSoup([your markup])

to this:

 BeautifulSoup([your markup], "lxml")

  markup_type=markup_type))


In [39]:
new_url = "https://mail.google.com/mail"

# get method will navigate the requested url.. 
soup =BeautifulSoup(requests.get(new_url).text)
print(soup.prettify())

<!DOCTYPE html>
<html lang="en">
 <head>
  <meta charset="utf-8"/>
  <meta content="width=300, initial-scale=1" name="viewport"/>
  <meta content="Gmail is email that's intuitive, efficient, and useful. 15 GB of storage, less spam, and mobile access." name="description"/>
  <meta content="LrdTUW9psUAMbh4Ia074-BPEVmcpBxF6Gwf0MSgQXZs" name="google-site-verification"/>
  <title>
   Gmail
  </title>
  <style>
   @font-face {
  font-family: 'Open Sans';
  font-style: normal;
  font-weight: 300;
  src: local('Open Sans Light'), local('OpenSans-Light'), url(//fonts.gstatic.com/s/opensans/v13/DXI1ORHCpsQm3Vp6mXoaTYnF5uFdDttMLvmWuJdhhgs.ttf) format('truetype');
}
@font-face {
  font-family: 'Open Sans';
  font-style: normal;
  font-weight: 400;
  src: local('Open Sans'), local('OpenSans'), url(//fonts.gstatic.com/s/opensans/v13/cJZKeOuBrn4kERxqtaUH3aCWcynf_cDxXwCLxiixG1c.ttf) format('truetype');
}
  </style>
  <style>
   h1, h2 {
  -webkit-animation-duration: 0.1s;
  -webkit-animation-name: fon



 BeautifulSoup([your markup])

to this:

 BeautifulSoup([your markup], "lxml")

  markup_type=markup_type))


In [40]:
print(soup.find(id='Email'))

<input id="Email" name="Email" placeholder="Enter your email" spellcheck="false" type="email" value=""/>


In [41]:
# pip install selenium 

#http://chromedriver.storage.googleapis.com/index.html?path=2.19/

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time

import os
chromedriver = "/Users/username/Downloads/chromedriver"
os.environ["webdriver.chrome.driver"] = chromedriver


driver = webdriver.Chrome(chromedriver)
driver.get("https://mail.google.com")

# Alternatives to Chrome:
# Firefox, phantomjs

ImportError: No module named 'selenium'

#### Fill out username and password, hit enter to log in

In [None]:
username_form = driver.find_element_by_id("Email")
username_form.send_keys("username@gmail.com")

In [None]:
username_form.send_keys(Keys.RETURN)

In [None]:
password_form=driver.find_element_by_id('Passwd')
password_form.send_keys('########')

In [None]:
password_form.send_keys(Keys.RETURN)

#### Click compose button to start a new email draft!

In [None]:
compose_button=driver.find_element_by_xpath('//div[text()="COMPOSE"]')
compose_button.click()

In [None]:
to_field = driver.find_element_by_name("to")
to_field.send_keys("username@gmail.com")

In [None]:
subject = driver.find_element_by_name("subjectbox")
subject.send_keys("This is an alert!")

In [None]:
message_body = driver.find_element_by_xpath("//div[@aria-label='Message Body']")
message_body.send_keys("Hello,")
message_body.send_keys([Keys.RETURN, Keys.RETURN])
message_body.send_keys("I am a computer and I just became self aware.")

In [None]:
send_button = driver.find_element_by_xpath("//div[contains(@aria-label, 'Send')]")
send_button.click()

References: 
- Documentation on finding elements:
- http://selenium-python.readthedocs.org/en/latest/locating-elements.html
- Xpath tutorial:
-  http://www.w3schools.com/xpath/xpath_syntax.asp