In [3]:
from urllib.request import urlopen

In [4]:
android_url = "https://en.wikipedia.org/wiki/Android_version_history"
android_data = urlopen(android_url)
android_html = android_data.read()

In [5]:
from bs4 import BeautifulSoup as soup

In [6]:
android_soup = soup(android_html, "html.parser")

In [7]:
tables = android_soup.findAll('table', {"class":"wikitable"})
android_table = tables[0]

In [8]:
headers = android_table.findAll('th')
column_titles = [ct.text[:-1] for ct in headers]

In [9]:
rows_data = android_table.findAll('tr')[1:]

table_rows = []
for row in rows_data:
    current_row = []
    row_data = row.findAll('td')
    for idx, data in enumerate(row_data):
        current_row.append(data.text[:-1])
    table_rows.append(current_row)
print(table_rows)

[['No codename', '1.0', 'September 23, 2008', '1', '[9]'], ['1.1', 'February 9, 2009', '2', '[9][11]'], ['Cupcake', '1.5', 'April 27, 2009', '3', ''], ['Donut', '1.6', 'September 15, 2009', '4', '[12]'], ['Eclair', '2.0 – 2.1', 'October 26, 2009', '5 – 7', '[13]'], ['Froyo', '2.2 – 2.2.3', 'May 20, 2010', '8', '[14]'], ['Gingerbread', '2.3 – 2.3.7', 'December 6, 2010', '9 – 10', '[15]'], ['Honeycomb', '3.0 – 3.2.6', 'February 22, 2011', '11 – 13', '[16]'], ['Ice Cream Sandwich', '4.0 – 4.0.4', 'October 18, 2011', '14 – 15', '[17]'], ['Jelly Bean', '4.1 – 4.3.1', 'July 9, 2012', '16 – 18', '[18]'], ['KitKat', '4.4 – 4.4.4', 'October 31, 2013', '19 – 20', '[19]'], ['Lollipop', '5.0 – 5.1.1', 'November 12, 2014', '21 – 22', '[20]'], ['Marshmallow', '6.0 – 6.0.1', 'October 5, 2015', '23', '[21]'], ['Nougat', '7.0', 'August 22, 2016', '24', '[22]'], ['7.1.0 – 7.1.2', 'October 4, 2016', '25', '[23][24][25]'], ['Oreo', '8.0', 'August 21, 2017', '26', '[26]'], ['8.1', 'December 5, 2017', '27',

# Creating CSV

In [10]:
filename = 'android_version_history.csv'

In [17]:
with open(filename, 'w') as f:
    #write header
    header_string = ','.join(column_titles)
    header_string += '\n'
    f.write(header_string)
    
    for row in table_rows:
        row_string= ""
        for w in row:
            w = w.replace(',', '')
            row_string += w+','
        row_string = row_string[:-1]
        row_string += '\n'
        f.write(row_string)

# Cleaning Data

In [18]:
import pandas as pd

In [20]:
df = pd.read_csv('android_version_history.csv')
df.head(n=10)

Unnamed: 0,Code name,Version numbers,Initial release date,API level,References
0,No codename,1.0,September 23 2008,1,[9]
1,1.1,February 9 2009,2,[9][11],
2,Cupcake,1.5,April 27 2009,3,
3,Donut,1.6,September 15 2009,4,[12]
4,Eclair,2.0 – 2.1,October 26 2009,5 – 7,[13]
5,Froyo,2.2 – 2.2.3,May 20 2010,8,[14]
6,Gingerbread,2.3 – 2.3.7,December 6 2010,9 – 10,[15]
7,Honeycomb,3.0 – 3.2.6,February 22 2011,11 – 13,[16]
8,Ice Cream Sandwich,4.0 – 4.0.4,October 18 2011,14 – 15,[17]
9,Jelly Bean,4.1 – 4.3.1,July 9 2012,16 – 18,[18]


# Data Acquisition using API

In [21]:
from urllib.request import urlopen

## Open Weather Map API

In [22]:
api_url = "https://samples.openweathermap.org/data/2.5/weather?q=London,uk&appid=b6907d289e10d714a6e88b30761fae22"

In [23]:
url_result = urlopen(api_url)
data = url_result.read()

In [25]:
import json
json_data = json.loads(data)

In [26]:
print(json_data)

{'coord': {'lon': -0.13, 'lat': 51.51}, 'weather': [{'id': 300, 'main': 'Drizzle', 'description': 'light intensity drizzle', 'icon': '09d'}], 'base': 'stations', 'main': {'temp': 280.32, 'pressure': 1012, 'humidity': 81, 'temp_min': 279.15, 'temp_max': 281.15}, 'visibility': 10000, 'wind': {'speed': 4.1, 'deg': 80}, 'clouds': {'all': 90}, 'dt': 1485789600, 'sys': {'type': 1, 'id': 5091, 'message': 0.0103, 'country': 'GB', 'sunrise': 1485762037, 'sunset': 1485794875}, 'id': 2643743, 'name': 'London', 'cod': 200}


In [28]:
json_data['coord'], json_data['name']

({'lon': -0.13, 'lat': 51.51}, 'London')

In [29]:
json_string = json.dumps(json_data)

## Google API

In [31]:
import requests

In [32]:
url = "https://maps.googleapis.com/maps/api/geocode/json?"

In [36]:
params = {
    "address":"coding blocks pitampura",
    "key":"AIzaSyDxpzAOiOie21qiUfMhWegOvmbKH25TN1E"
}

In [37]:
r = requests.get(url, params = params)
r.url

'https://maps.googleapis.com/maps/api/geocode/json?address=coding+blocks+pitampura&key=AIzaSyDxpzAOiOie21qiUfMhWegOvmbKH25TN1E'

In [38]:
r.content

b'{\n   "error_message" : "The provided API key is invalid.",\n   "results" : [],\n   "status" : "REQUEST_DENIED"\n}\n'

## Facebook API

In [39]:
import requests

In [40]:
url = "https://graph.facebook.com/4/picture?type=large"

In [43]:
r = requests.get(url)
with open("samplePic.jpg", 'wb') as f:
    f.write(r.content)

# Image Scraping

In [46]:
import bs4, requests

In [47]:
url = "https://www.passiton.com/inspirational-quotes?page=2"

In [50]:
response = requests.get(url)

In [52]:
soup = bs4.BeautifulSoup(response.content, "html.parser")

In [59]:
article_element = soup.findAll('div')

In [78]:
article = article_element[1]

In [79]:
article.img.attrs['src']

'/assets/pofo/logo-6d680decaadef58e4fbb586e147bc135.png'

In [None]:
with open('inspiration.jpg', 'wb') as f:
    img_url = ""
    r = requests.get(img_url)
    f.write(r.content)

# Scrapy

In [83]:
!scrapy startproject myproject

New Scrapy project 'myproject', using template directory '/usr/local/lib/python2.7/dist-packages/scrapy/templates/project', created in:
    /home/orashar/ML/CodingBlocks/WebScrapping/myproject

You can start your first spider with:
    cd myproject
    scrapy genspider example example.com
