Let's scrape some data about sharks from a webpage

In [2]:
import pandas as pd
import re ## added 
import bs4
import sqlite3
import requests
import textwrap

res = requests.get('http://www.sharkresearchcommittee.com/pacific_coast_shark_news.htm')
res.raise_for_status()
soup = bs4.BeautifulSoup(res.text, 'html.parser')

news = [p.text.strip() for p in soup.select('h1 ~ p') if p.find('font')]





Let's store the data we screaped into a database

In [7]:
c = sqlite3.connect('shark.db')
try:
    c.execute('''CREATE TABLE
                    mytable (Location        STRING,
                             Date            STRING,
                             Description     STRING)''')
except sqlite3.OperationalError: #i.e. table exists already
    pass

for n in news:
        groups = re.match(r'(.*?)\W+—?\W+On\W+(.*?\d{4})\W*(.*)', n, flags=re.DOTALL)
        if not groups:
            continue
        place, date, article = groups[1], groups[2], groups[3]

        c.execute('''INSERT INTO mytable(Location, Date, Description) VALUES(?,?,?)''',
            (place, date, article))
c.commit()


The data has been stored into a database, now let's turn it into a datafram and analyze it via Pandas

In [10]:
df = pd.read_sql_query("select * from mytable;",c)

In [11]:
df.head(2)

Unnamed: 0,Location,Date,Description
0,Shell Beach,"August 1, 2018",Kristen Sanchez was paddling an outrigger with...
1,Monterey Bay,"August 1, 2018",Eric Keener was spearfishing for California H...


In [13]:
df.set_index('Location', inplace=True)


In [14]:
df.head(3)

Unnamed: 0_level_0,Date,Description
Location,Unnamed: 1_level_1,Unnamed: 2_level_1
Shell Beach,"August 1, 2018",Kristen Sanchez was paddling an outrigger with...
Monterey Bay,"August 1, 2018",Eric Keener was spearfishing for California H...
Pacifica,"July 27, 2018",Kris Lopez was surfing with 4 unidentified su...


In [21]:
freq = df.groupby('Location').count()


In [27]:
freq

Unnamed: 0_level_0,Date,Description
Location,Unnamed: 1_level_1,Unnamed: 2_level_1
Bolinas,7,7
Carpinteria,35,35
Cataline Island,7,7
Goleta,14,14
Grover Beach,7,7
Manhattan Beach,7,7
Monterey Bay,7,7
New Brighton State Beach,7,7
Oceanside,7,7
Pacifica,7,7


As you can see, that Carpinteria has the most recorded shark sightings by the publich 