# Get Data from internet and using API

In [1]:
## import library
import numpy as np
import pandas as pd
import requests
from urllib.request import urlretrieve, urlopen, Request

In [2]:
url = 'https://s3.amazonaws.com/assets.datacamp.com/production/course_1606/datasets/winequality-red.csv'

In [3]:
# save file locally
urlretrieve(url, 'data/csv/winequality-red.csv')

('data/csv/winequality-red.csv', <http.client.HTTPMessage at 0x5901280>)

In [4]:
pd.read_csv(url, sep=';').head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


## HTTP GET request

In [5]:
# basic concept of GET requests
url = 'https://www.wikipedia.org'
req = Request(url)
response = urlopen(req)
html = response.read()
response.close()

In [6]:
# another basic GET requests with requests
url = 'https://www.python.org/~guido/'
req = requests.get(url)
text = req.text

## Scrapping the web in Python
using BeautifulSoup

In [7]:
# import library
from bs4 import BeautifulSoup

In [8]:
print(BeautifulSoup(text).prettify())

<html>
 <head>
  <title>
   Guido's Personal Home Page
  </title>
 </head>
 <body bgcolor="#FFFFFF" text="#000000">
  <h1>
   <a href="pics.html">
    <img border="0" src="images/IMG_2192.jpg"/>
   </a>
   Guido van Rossum - Personal Home Page
   <a href="pics.html">
    <img border="0" height="216" src="images/guido-headshot-2019.jpg" width="270"/>
   </a>
  </h1>
  <p>
   <a href="http://www.washingtonpost.com/wp-srv/business/longterm/microsoft/stories/1998/raymond120398.htm">
    <i>
     "Gawky and proud of it."
    </i>
   </a>
  </p>
  <h3>
   <a href="images/df20000406.jpg">
    Who I Am
   </a>
  </h3>
  <p>
   Read
my
   <a href="http://neopythonic.blogspot.com/2016/04/kings-day-speech.html">
    "King's
Day Speech"
   </a>
   for some inspiration.
  </p>
  <p>
   I am the author of the
   <a href="http://www.python.org">
    Python
   </a>
   programming language.  See also my
   <a href="Resume.html">
    resume
   </a>
   and my
   <a href="Publications.html">
    publicati

In [9]:
BeautifulSoup(text).title

<title>Guido's Personal Home Page</title>

In [10]:
BeautifulSoup(text).get_text()

'\n\nGuido\'s Personal Home Page\n\n\n\n\nGuido van Rossum - Personal Home Page\n\n\n"Gawky and proud of it."\nWho I Am\nRead\nmy "King\'s\nDay Speech" for some inspiration.\n\nI am the author of the Python\nprogramming language.  See also my resume\nand my publications list, a brief bio, assorted writings, presentations and interviews (all about Python), some\npictures of me,\nmy new blog, and\nmy old\nblog on Artima.com.  I am\n@gvanrossum on Twitter.\n\nI am retired, working on personal projects (and maybe a book).\nI have worked for Dropbox, Google, Elemental Security, Zope\nCorporation, BeOpen.com, CNRI, CWI, and SARA.  (See\nmy resume.)  I created Python while at CWI.\n\nHow to Reach Me\nYou can send email for me to guido (at) python.org.\nI read everything sent there, but I receive too much email to respond\nto everything.\n\nMy Name\nMy name often poses difficulties for Americans.\n\nPronunciation: in Dutch, the "G" in Guido is a hard G,\npronounced roughly like the "ch" in Sco

In [11]:
# Search all href inside a tags
for link in BeautifulSoup(text).find_all('a'):
    print(link.get('href'))

pics.html
pics.html
http://www.washingtonpost.com/wp-srv/business/longterm/microsoft/stories/1998/raymond120398.htm
images/df20000406.jpg
http://neopythonic.blogspot.com/2016/04/kings-day-speech.html
http://www.python.org
Resume.html
Publications.html
bio.html
http://legacy.python.org/doc/essays/
http://legacy.python.org/doc/essays/ppt/
interviews.html
pics.html
http://neopythonic.blogspot.com
http://www.artima.com/weblogs/index.jsp?blogger=12088
https://twitter.com/gvanrossum
Resume.html
guido.au
http://legacy.python.org/doc/essays/
images/license.jpg
http://www.cnpbagwell.com/audio-faq
http://sox.sourceforge.net/
images/internetdog.gif


## APIs and JSON

In [12]:
# import library
import json

In [13]:
api = 'http://www.omdbapi.com/?i=tt3896198&apikey=ead1eb11'
req = requests.get(api)
json_data = req.json()

In [14]:
omdb_api = pd.DataFrame.from_dict(json_data)

In [15]:
omdb_api.head()

Unnamed: 0,Title,Year,Rated,Released,Runtime,Genre,Director,Writer,Actors,Plot,...,Metascore,imdbRating,imdbVotes,imdbID,Type,DVD,BoxOffice,Production,Website,Response
0,Guardians of the Galaxy Vol. 2,2017,PG-13,05 May 2017,136 min,"Action, Adventure, Comedy, Sci-Fi",James Gunn,"James Gunn, Dan Abnett (based on the Marvel co...","Chris Pratt, Zoe Saldana, Dave Bautista, Vin D...",The Guardians struggle to keep together as a t...,...,67,7.6,556178,tt3896198,movie,,,"Marvel Studios, Walt Disney Pictures",,True
1,Guardians of the Galaxy Vol. 2,2017,PG-13,05 May 2017,136 min,"Action, Adventure, Comedy, Sci-Fi",James Gunn,"James Gunn, Dan Abnett (based on the Marvel co...","Chris Pratt, Zoe Saldana, Dave Bautista, Vin D...",The Guardians struggle to keep together as a t...,...,67,7.6,556178,tt3896198,movie,,,"Marvel Studios, Walt Disney Pictures",,True
2,Guardians of the Galaxy Vol. 2,2017,PG-13,05 May 2017,136 min,"Action, Adventure, Comedy, Sci-Fi",James Gunn,"James Gunn, Dan Abnett (based on the Marvel co...","Chris Pratt, Zoe Saldana, Dave Bautista, Vin D...",The Guardians struggle to keep together as a t...,...,67,7.6,556178,tt3896198,movie,,,"Marvel Studios, Walt Disney Pictures",,True


### Twitter API

In [16]:
# import library
import tweepy

In [17]:
access_token = 'secret'
access_token_secret = 'secret'
consumer_key = 'secret'
consumer_secret = 'secret'

In [18]:
# Authenticate to Twitter
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)

In [19]:
api = tweepy.API(auth)

In [20]:
try:
    api.verify_credentials()
    print("Authentication OK")
except:
    print("Error during authentication")

Authentication OK


In [28]:
trends_result = api.trends_place(1)
for trend in trends_result[0]["trends"]:
    print(trend["name"])

Michigan
#GetWellSoonJisung
jungkook
#フロイド・リーチ誕生祭2020
Nevada
#いい推しの日
#渡辺翔太誕生祭
#겨울의_기분좋은시작_큐_생일축하해
リーチ兄弟
Öykü Serter
VERY STRANGE
USPS
Democrats
Bernie
Jrue
投票率100%超え
Oregon
Dems
数学偏差値
WI and MI
理系学生
가스파드
Hititbet Yüzde100Bonus
Detroit
推しと結婚
Ben Shapiro
Gorosito
ゲームセンター運営
Arnab
Counting
しょっぴーお誕生日
Wayne County
#tite𓂺
#ATEEZ𓂸
#YGEntertainment𓆗
#اربح_مع_stc5g_الاسرع
#乃木坂46ANN
#USElection2020
#TREASURE_MMM_D1
#MichelTeloFeatChan
#مسابقه_عبدالصمد_القرشي
#WangYibo𓃮
#BlackDay4Press
#halktanyana
#メンズ校
#CountAllTheVotes
#SAPAMANTAN
#XboxWatchDogsLegion
#Aileye5000SosyalCalismaci
#WednesdayMegaWord
