## Fetching Data

In [67]:
import bs4

In [9]:
from urllib.request import urlopen

In [10]:
android_url="https://en.wikipedia.org/wiki/Android_version_history"

In [11]:
android_data = urlopen(android_url)
print(type(android_data))

<class 'http.client.HTTPResponse'>


In [66]:
android_html=android_data.read()
android_data.close()

## Using BeautifulSoup

In [13]:
from bs4 import BeautifulSoup as soup

In [68]:
android_soup =  soup(android_html,'html.parser')

In [28]:
print(type(android_soup))

<class 'bs4.BeautifulSoup'>


In [29]:
android_soup.h1

<h1 class="firstHeading" id="firstHeading" lang="en">Android version history</h1>

In [30]:
android_soup.findAll('h1',{})

[<h1 class="firstHeading" id="firstHeading" lang="en">Android version history</h1>]

In [31]:
tables=android_soup.findAll('table',{'class' : 'wikitable'})

In [32]:
len(tables)

31

In [69]:
android_table = tables[0]

## Parsing HTML  Tables

In [34]:
headers = android_table.findAll('th')

In [35]:
headers

[<th>Name
 </th>, <th>Version number(s)
 </th>, <th>Initial stable<br/>release date
 </th>, <th>Supported (security fixes)
 </th>, <th>API level
 </th>, <th>References
 </th>]

In [39]:
column_titles = [ct.text[:-1] for ct in headers]

In [40]:
column_titles

['Name',
 'Version number(s)',
 'Initial stablerelease date',
 'Supported (security fixes)',
 'API level',
 'References']

In [41]:
rows_data = android_table.findAll('tr')[1:]

In [43]:
print(len(rows_data))

18


In [44]:
first_row = rows_data[0]
print(first_row)

<tr>
<td rowspan="2">No official codename
</td>
<td>1.0
</td>
<td>September 23, 2008
</td>
<td class="table-no" style="background:#F99;vertical-align:middle;text-align:center;">No
</td>
<td>1
</td>
<td><sup class="reference" id="cite_ref-unofficial_and_official_codenames_9-1"><a href="#cite_note-unofficial_and_official_codenames-9">[9]</a></sup>
</td></tr>


In [47]:
first_row = rows_data[0].findAll('td',{})
for d in first_row:
    print(d.text[:-1])

No official codename
1.0
September 23, 2008
No
1
[9]


In [53]:
table_rows=[]
for row in rows_data:
    current_row=[]
    row_data= row.findAll('td')
    for idx,data in enumerate(row_data):
        current_row.append(data.text[:-1])
    table_rows.append(current_row)

In [54]:
print(table_rows)

[['No official codename', '1.0', 'September 23, 2008', 'No', '1', '[9]'], ['1.1', 'February 9, 2009', 'No', '2', '[9][14]'], ['Cupcake', '1.5', 'April 27, 2009', 'No', '3', '[15]'], ['Donut', '1.6', 'September 15, 2009', 'No', '4', '[16]'], ['Eclair', '2.0 – 2.1', 'October 26, 2009', 'No', '5 – 7', '[17]'], ['Froyo', '2.2 – 2.2.3', 'May 20, 2010', 'No', '8', '[18]'], ['Gingerbread', '2.3 – 2.3.7', 'December 6, 2010', 'No', '9 – 10', '[19]'], ['Honeycomb', '3.0 – 3.2.6', 'February 22, 2011', 'No', '11 – 13', '[20]'], ['Ice Cream Sandwich', '4.0 – 4.0.4', 'October 18, 2011', 'No', '14 – 15', '[21]'], ['Jelly Bean', '4.1 – 4.3.1', 'July 9, 2012', 'No', '16 – 18', '[22]'], ['KitKat', '4.4 – 4.4.4', 'October 31, 2013', 'No', '19 – 20', '[23]'], ['Lollipop', '5.0 – 5.1.1', 'November 12, 2014', 'No', '21 – 22', '[24]'], ['Marshmallow', '6.0 – 6.0.1', 'October 5, 2015', 'No', '23', '[25]'], ['Nougat', '7.0 – 7.1.2', 'August 22, 2016', 'No', '24 – 25', '[26][27][28][29]'], ['Oreo', '8.0 – 8.1',

## Creating CSV 

In [63]:
filename = 'android_version_history.csv'
with open(filename,'w',encoding='utf-8') as f:
    #write the header
    header_string = ','.join(column_titles)
    header_string+='\n'
    f.write(header_string)
    for row in table_rows:
        row_string=""
        for w in row:
            w=w.replace(',','')
            row_string+=w+','
        row_string=row_string[:-1]
        row_string+='\n'
        f.write(row_string)

In [64]:
import pandas as pd
data=pd.read_csv('android_version_history.csv')

In [65]:
data.head(10)

Unnamed: 0,Name,Version number(s),Initial stablerelease date,Supported (security fixes),API level,References
0,No official codename,1.0,September 23 2008,No,1,[9]
1,1.1,February 9 2009,No,2,[9][14],
2,Cupcake,1.5,April 27 2009,No,3,[15]
3,Donut,1.6,September 15 2009,No,4,[16]
4,Eclair,2.0 – 2.1,October 26 2009,No,5 – 7,[17]
5,Froyo,2.2 – 2.2.3,May 20 2010,No,8,[18]
6,Gingerbread,2.3 – 2.3.7,December 6 2010,No,9 – 10,[19]
7,Honeycomb,3.0 – 3.2.6,February 22 2011,No,11 – 13,[20]
8,Ice Cream Sandwich,4.0 – 4.0.4,October 18 2011,No,14 – 15,[21]
9,Jelly Bean,4.1 – 4.3.1,July 9 2012,No,16 – 18,[22]
