# Python's requests library - Basic Usage and Examples

## Reference URL: 
### URL: 
#### https://blog.hartleybrody.com/web-scraping-cheat-sheet/#making-simple-requests
#### https://brennan.io/2016/03/02/logging-in-with-requests/
#### https://realpython.com/python-requests/

## Theory:
### requests API ensures that all forms of HTTP request are obvious. 
### Possible HTTP requests: GET/POST/PUSH/DELETE/HEAD/OPTIONS. 

## HTTP request protocols:
  ### >>> r = requests.get('https://api.github.com/events')
  ### >>> r = requests.put('https://httpbin.org/put', data = {'key':'value'})
  ### >>> r = requests.delete('https://httpbin.org/delete')
  ### >>> r = requests.head('https://httpbin.org/get')
  ### >>> r = requests.options('https://httpbin.org/get')

## Status Coces:
    200/202 - Successful operation
    204     - No Content to show
    400     - Not Found/Bad request/ Request body incorrect
    401     - Unauthorized access/ Token invlaid or expired
    403     - Forbidden
    404     - URL incorrect/ Page not found
    429     - Too many requests
    500     - Internal server error
    
 ### Lis of Github ReST API's:
 #### https://api.github.com/
 #### https://apidocs.symantec.com/
 


## Import required module

In [1]:
import requests

## Submit a GET request & print the response

In [2]:
r = requests.get("https://api.github.com/events")    # Response Object
print(r.text)


[{"id":"9229321868","type":"PushEvent","actor":{"id":17232997,"login":"akmaz","display_login":"akmaz","gravatar_id":"","url":"https://api.github.com/users/akmaz","avatar_url":"https://avatars.githubusercontent.com/u/17232997?"},"repo":{"id":175255662,"name":"akmaz/example_algorithms","url":"https://api.github.com/repos/akmaz/example_algorithms"},"payload":{"push_id":3393077361,"size":1,"distinct_size":1,"ref":"refs/heads/master","head":"82b0f43cb86394642d85850ba4868978ba55d72e","before":"09b579ff143b21ef574072746bce90953054ac52","commits":[{"sha":"82b0f43cb86394642d85850ba4868978ba55d72e","author":{"email":"aleksandra8mazur@gmail.com","name":"Aleksandra"},"message":"Added bubblesort algorithm.","distinct":true,"url":"https://api.github.com/repos/akmaz/example_algorithms/commits/82b0f43cb86394642d85850ba4868978ba55d72e"}]},"public":true,"created_at":"2019-03-12T19:04:24Z"},{"id":"9229321863","type":"PullRequestEvent","actor":{"id":31713290,"login":"Katzesama","display_login":"Katzesama"

## Print the default encoding

In [3]:
print(r.encoding)

utf-8


## Print and check the response status code

In [4]:
print(r.status_code)
print(r.raise_for_status())

if r.status_code == 200:
    print("Success")
else:
    print("Failure")

    
# NOTE: This code will return 'Success' even if we get a 4xx response.

200
None
Success


## Complete Code - Use the same skeleton always

In [5]:
# Complete Code:-----> Follow this skeleton everywhere

import requests
from requests.exceptions import HTTPError
for url in ['https://api.github.com','https://api.github.com/junkURL']:
    try:
        resp = requests.get(url)
        resp.raise_for_status()
    except HTTPError as err:
        print("HTTP Error occurred:", err) 
    except Exception as err:
        print("Some other error occurred:", err)
    else:
        print("Success")
        

Success
HTTP Error occurred: 404 Client Error: Not Found for url: https://api.github.com/junkURL


## Ways to print the responses

In [6]:
resp=requests.get("https://api.github.com")

# METHOD-1
resp.content                                      # Prints the payload - In bytes 
print("+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_")

# METHOD-2
resp.text                                         # Prints the payload - In text/ string
'''
NOTE: Decoding bytes to string will require an encoding scheme. requests will try to guess the encoding based on the 
      responses header. In case you arent ok with the guess encoding, you can explicitly provide your encoding using the 
      response.encoding = "xxxxxxxxxx"
'''

# METHOD-3
resp.json()                                      # Prints the payload - In json format


+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_+_


{'authorizations_url': 'https://api.github.com/authorizations',
 'code_search_url': 'https://api.github.com/search/code?q={query}{&page,per_page,sort,order}',
 'commit_search_url': 'https://api.github.com/search/commits?q={query}{&page,per_page,sort,order}',
 'current_user_authorizations_html_url': 'https://github.com/settings/connections/applications{/client_id}',
 'current_user_repositories_url': 'https://api.github.com/user/repos{?type,page,per_page,sort}',
 'current_user_url': 'https://api.github.com/user',
 'emails_url': 'https://api.github.com/user/emails',
 'emojis_url': 'https://api.github.com/emojis',
 'events_url': 'https://api.github.com/events',
 'feeds_url': 'https://api.github.com/feeds',
 'followers_url': 'https://api.github.com/user/followers',
 'following_url': 'https://api.github.com/user/following{/target}',
 'gists_url': 'https://api.github.com/gists{/gist_id}',
 'hub_url': 'https://api.github.com/hub',
 'issue_search_url': 'https://api.github.com/search/issues?q={q

## Print Headers

In [7]:
resp.headers

{'Server': 'GitHub.com', 'Date': 'Tue, 12 Mar 2019 19:09:25 GMT', 'Content-Type': 'application/json; charset=utf-8', 'Transfer-Encoding': 'chunked', 'Status': '200 OK', 'X-RateLimit-Limit': '60', 'X-RateLimit-Remaining': '56', 'X-RateLimit-Reset': '1552421364', 'Cache-Control': 'public, max-age=60, s-maxage=60', 'Vary': 'Accept', 'ETag': 'W/"7dc470913f1fe9bb6c7355b50a0737bc"', 'X-GitHub-Media-Type': 'github.v3; format=json', 'Access-Control-Expose-Headers': 'ETag, Link, Location, Retry-After, X-GitHub-OTP, X-RateLimit-Limit, X-RateLimit-Remaining, X-RateLimit-Reset, X-OAuth-Scopes, X-Accepted-OAuth-Scopes, X-Poll-Interval, X-GitHub-Media-Type', 'Access-Control-Allow-Origin': '*', 'Strict-Transport-Security': 'max-age=31536000; includeSubdomains; preload', 'X-Frame-Options': 'deny', 'X-Content-Type-Options': 'nosniff', 'X-XSS-Protection': '1; mode=block', 'Referrer-Policy': 'origin-when-cross-origin, strict-origin-when-cross-origin', 'Content-Security-Policy': "default-src 'none'", 'Con

## Using the response.headers to get required information

In [8]:
print(resp.headers['Server'])
print(resp.headers['Content-Type'])    

## Plus point: the headers arent case sensitive
print(resp.headers['server'])
print(resp.headers['conTENT-TYpe'])

GitHub.com
application/json; charset=utf-8
GitHub.com
application/json; charset=utf-8


# Parameterizing GET requests

## Concept of query strings - [I] Passing params in the form of a dictionary

In [9]:
import requests
res = requests.get('https://api.github.com/search/repositories', 
                   params={'q' : 'requests+language:python'})        # passed params to get() in the form of a dictionary

json_resp = res.json()
print(json_resp)

{'total_count': 7106, 'incomplete_results': False, 'items': [{'id': 1362490, 'node_id': 'MDEwOlJlcG9zaXRvcnkxMzYyNDkw', 'name': 'requests', 'full_name': 'kennethreitz/requests', 'private': False, 'owner': {'login': 'kennethreitz', 'id': 47496974, 'node_id': 'MDEyOk9yZ2FuaXphdGlvbjQ3NDk2OTc0', 'avatar_url': 'https://avatars1.githubusercontent.com/u/47496974?v=4', 'gravatar_id': '', 'url': 'https://api.github.com/users/kennethreitz', 'html_url': 'https://github.com/kennethreitz', 'followers_url': 'https://api.github.com/users/kennethreitz/followers', 'following_url': 'https://api.github.com/users/kennethreitz/following{/other_user}', 'gists_url': 'https://api.github.com/users/kennethreitz/gists{/gist_id}', 'starred_url': 'https://api.github.com/users/kennethreitz/starred{/owner}{/repo}', 'subscriptions_url': 'https://api.github.com/users/kennethreitz/subscriptions', 'organizations_url': 'https://api.github.com/users/kennethreitz/orgs', 'repos_url': 'https://api.github.com/users/kennethre

## To print the json in readable format

In [10]:
import json
print(json.dumps(json_resp, indent=4, sort_keys=True))

{
    "incomplete_results": false,
    "items": [
        {
            "archive_url": "https://api.github.com/repos/kennethreitz/requests/{archive_format}{/ref}",
            "archived": false,
            "assignees_url": "https://api.github.com/repos/kennethreitz/requests/assignees{/user}",
            "blobs_url": "https://api.github.com/repos/kennethreitz/requests/git/blobs{/sha}",
            "branches_url": "https://api.github.com/repos/kennethreitz/requests/branches{/branch}",
            "clone_url": "https://github.com/kennethreitz/requests.git",
            "collaborators_url": "https://api.github.com/repos/kennethreitz/requests/collaborators{/collaborator}",
            "comments_url": "https://api.github.com/repos/kennethreitz/requests/comments{/number}",
            "commits_url": "https://api.github.com/repos/kennethreitz/requests/commits{/sha}",
            "compare_url": "https://api.github.com/repos/kennethreitz/requests/compare/{base}...{head}",
            "contents

## Fetch a specific key's value 

In [11]:
repo = json_resp['items'][0]
print(repo["name"])
print(repo["description"])

requests
Python HTTP Requests for Humans™ ✨🍰✨


## Concept of query strings - [II] Passing params in the form of a list of tuples

In [12]:
resp = requests.get("https://api.github.com/search/repositories", 
                    params = [('q','requests+language:r')])            # List of Tuples
json_resp = resp.json()

import json
print(json.dumps(json_resp, indent=4, sort_keys=True))

{
    "incomplete_results": false,
    "items": [
        {
            "archive_url": "https://api.github.com/repos/r-lib/async/{archive_format}{/ref}",
            "archived": false,
            "assignees_url": "https://api.github.com/repos/r-lib/async/assignees{/user}",
            "blobs_url": "https://api.github.com/repos/r-lib/async/git/blobs{/sha}",
            "branches_url": "https://api.github.com/repos/r-lib/async/branches{/branch}",
            "clone_url": "https://github.com/r-lib/async.git",
            "collaborators_url": "https://api.github.com/repos/r-lib/async/collaborators{/collaborator}",
            "comments_url": "https://api.github.com/repos/r-lib/async/comments{/number}",
            "commits_url": "https://api.github.com/repos/r-lib/async/commits{/sha}",
            "compare_url": "https://api.github.com/repos/r-lib/async/compare/{base}...{head}",
            "contents_url": "https://api.github.com/repos/r-lib/async/contents/{+path}",
            "contribut

## Customize requests headers

In [13]:
import requests
resp = requests.get('https://api.github.com/search/repositories', 
                    params={'q':'requests+language:python'}, 
                    headers={'accept': 'application/vnd.github.v3.text-match+json'})
json_resp = res.json()
repo = json_resp['items'][0]
print(repo)
print(repo['keys_url'])

# NOTE: The Accept header tells the server what content types your application can handle. 
# The header value 'application/vnd.github.v3.text-match+json' is a proprietary Github header

{'id': 1362490, 'node_id': 'MDEwOlJlcG9zaXRvcnkxMzYyNDkw', 'name': 'requests', 'full_name': 'kennethreitz/requests', 'private': False, 'owner': {'login': 'kennethreitz', 'id': 47496974, 'node_id': 'MDEyOk9yZ2FuaXphdGlvbjQ3NDk2OTc0', 'avatar_url': 'https://avatars1.githubusercontent.com/u/47496974?v=4', 'gravatar_id': '', 'url': 'https://api.github.com/users/kennethreitz', 'html_url': 'https://github.com/kennethreitz', 'followers_url': 'https://api.github.com/users/kennethreitz/followers', 'following_url': 'https://api.github.com/users/kennethreitz/following{/other_user}', 'gists_url': 'https://api.github.com/users/kennethreitz/gists{/gist_id}', 'starred_url': 'https://api.github.com/users/kennethreitz/starred{/owner}{/repo}', 'subscriptions_url': 'https://api.github.com/users/kennethreitz/subscriptions', 'organizations_url': 'https://api.github.com/users/kennethreitz/orgs', 'repos_url': 'https://api.github.com/users/kennethreitz/repos', 'events_url': 'https://api.github.com/users/kenne

## Requests- to make HTTP HEAD request

In [14]:
resp = requests.head("https://httpbin.org/head")
print(resp.headers["content-type"])


text/html


## Requests- to make HTTP DELETE request

In [15]:
resp = requests.delete("https://httpbin.org/delete")
json_resp = resp.json()
json_resp["args"]


{}

## Concept of The Messsage Body:
### Unlike GET, where in the request parameters are passed in the query string, in HTTP POST, PUT and PATCH, the data is passed through the message body. 

### Data could be a dictionary/ a list of tuples, bytes or a file object. 

In [16]:
# For example:
resp = requests.post('https://httpbin.org/post', data={'key':'value'})     # Pass as a dictionary
print(resp)


resp = requests.post('https://httpbin.org/post', data=[('key', 'value')])
print(resp)

# NOTE: httpbin.org is a great resource created by the author of requests, Kenneth Reitz. 
# It’s a service that accepts test requests and responds with data about the requests

<Response [200]>
<Response [200]>


## Inspecting the requests - 
### When you make a request, the requests library prepares the request, before actually sending it to the destination server. Request preparation includes things like:
### - Validating headers
### - Serializing JSON Content etc...

### You have to view the "PreparedRequest" by accessing .request of the response

In [17]:
# For e.g. 
resp = requests.post("https://httpbin.org/post", data=[('key','value')])
print(resp.request.headers['Content-Type'])
print(resp.request.url)
print(resp.request.body)


application/x-www-form-urlencoded
https://httpbin.org/post
key=value


## Request Authentication:
### Few web services require you to authenticate yourself, before the service is rendered [Basic/OAuth/ custom Authentication defined by you...]
### The service needs to know, your identity. 
### This needs you to include the 'Authorization' header or any custom header defined by the service. 

### All the requests methods provide a parameter called "auth", whic allows you to pass your credentials. 

In [18]:
from getpass import getpass               # Prompt the user for a password without echoing
requests.get("https://api.github.com/user", auth=('silver1icicle', getpass()))

# NOTE: 
# If the username and password you supplied were authenticated, you get the response <200>, else you get the response <401>
# When you supply your username and password, requests applies the credentials using the HTTP's Basic Authentication (BA) scheme.
# The BA scheme doesnt protect your credentials while transmitting. The creds are ,erely encoded with Base64 but not encrypted 
# or hashed in any way. Thus BA is usually used along with HTTPS to confor confidentiality. 

# BA is the simplest technique for enforcing access controls, to web resources, as it does not require cookies, 
# session identifiers or login pages. It uses standard fields in the HTTP headers, removing the need for handshakes. 


········


<Response [200]>

In [19]:
# Alternative way for basic authentication
from requests.auth import HTTPBasicAuth
from getpass import getpass
requests.get('https://api.github.com/user', auth=HTTPBasicAuth('silver1icicle', getpass()))

········


<Response [200]>

In [20]:
# Various authentication methods offered by requests:
# 1. HTTPBasicAuth
# 2. HTTPDigestAuth
# 3. HTTPProxyAuth

## SSL Certification Verification
### If the data shared by you is sensitive, security is important. Communication with secure websites over HTTP, involves establishing an encrypted connection using SSL i.e. verfiying the target server's SSL Certificate. 
### requests does this by default. In case you wish to change this behavior (not recommended) i.e. disable SSL Certification Verification, pass 'False', to the 'verify' parameter of the request function. However it will issue you a critical warning, regarding your data being unsafe. 

### requests internally makes use of the package 'certifi'. 

In [21]:
resp = requests.get("https://api.github.com", verify=False)




## Achieving performance with requests [For production environments]
### Using:
### 1. Timeout control: 
### When you make a request, you have to wait for the external server to respond. If the server takes forever to respond, this could frustate the user or hang any of the other background services. By default, requests waits indefinitely for the response. We can however specify a "timeout" duration, to prevent these things from happening. Timeout can be specified as 'Integer' or 'float' == Number of seconds to wait before timing out. In the event that the timeout occurs, Timeout exception is raised. 

### 2. Sessions: 
### The requests get() and post() are high level API's which abstract majority of the internal operations, such as sessions. In case you need to control, how requests are managed, you may need to use a Session instance directly. Sessions are used to persist parameters across requests. For e.g. if you need to use the same authentication across multiple requests, use session

### 3. Retry limits: 
### When a request fails, you may want your application to retry the same request. requests doesnt do that by default. To apply this functionality, you need to implement a custom TransportAdapter. These adapters help you define a set of custom configurations/ service you are interacting with. For e.g. Try all requests to "https://api.github.com" atleast 3 times, before raising a ConnectionError. Steps:

### 3.1] Build a transport adapter
### 3.2] Set the max_retries parameter
### 3.3] Mount to an exiting session


## Summary: 
### All these three strategies viz...Timeout, Sessions and Retry Limits are used to keep your code efficient + your application resilient. 


In [22]:
# Demo of timeout
resp = requests.get("https://api.github.com", timeout=1)     # 1 sec timeout duration...success
print(resp)

resp = requests.get("https://api.github.com", timeout=0.5)  # 0.5 sec timeout duration...success. 
print(resp)

resp = requests.get("https://api.github.com", timeout=(2,5))   # Wait 2 sec...for establishing connection with the server
print(resp)                                                    # Wait 5 sec...for the server to respond. 


# Timeout exception
from requests.exceptions import Timeout
try:
    resp = requests.get('https://api.github.com', timeout=0.05)
    print(res)
except Timeout:
    print("The request timed out")
else:
    print("The server responded timely")
    

<Response [200]>
<Response [200]>
<Response [200]>
The request timed out


In [23]:
# Demo of sessions
import requests
from getpass import getpass

# Here we use the context manager 'with', to ensure resources used by the session will be released after use
with requests.Session() as ss:                         
    ss.auth=('silver1icicle', getpass())               
    resp = ss.get('https://api.github.com/user')     # Instead of requests.get() use the sessions instance. 
    
print(resp.json())
print(resp.headers)
    

········
{'login': 'silver1icicle', 'id': 45722967, 'node_id': 'MDQ6VXNlcjQ1NzIyOTY3', 'avatar_url': 'https://avatars1.githubusercontent.com/u/45722967?v=4', 'gravatar_id': '', 'url': 'https://api.github.com/users/silver1icicle', 'html_url': 'https://github.com/silver1icicle', 'followers_url': 'https://api.github.com/users/silver1icicle/followers', 'following_url': 'https://api.github.com/users/silver1icicle/following{/other_user}', 'gists_url': 'https://api.github.com/users/silver1icicle/gists{/gist_id}', 'starred_url': 'https://api.github.com/users/silver1icicle/starred{/owner}{/repo}', 'subscriptions_url': 'https://api.github.com/users/silver1icicle/subscriptions', 'organizations_url': 'https://api.github.com/users/silver1icicle/orgs', 'repos_url': 'https://api.github.com/users/silver1icicle/repos', 'events_url': 'https://api.github.com/users/silver1icicle/events{/privacy}', 'received_events_url': 'https://api.github.com/users/silver1icicle/received_events', 'type': 'User', 'site_ad

In [24]:
# Demo of max retries
import requests
from requests.adapters import HTTPAdapter
from requests.exceptions import ConnectionError

githubCustomAdapter = HTTPAdapter(max_retries=3)                # Custom adapter with configuration setting
ss = requests.Session()                                         # Create an instance of Session 
ss.mount("https://api.github.com", githubCustomAdapter)    # Mount the custom adapter to an existing session

try:
    resp = ss.get("https://api.github.com")
    print(resp.json())
except ConnectionError as ce:
    print(ce)


{'current_user_url': 'https://api.github.com/user', 'current_user_authorizations_html_url': 'https://github.com/settings/connections/applications{/client_id}', 'authorizations_url': 'https://api.github.com/authorizations', 'code_search_url': 'https://api.github.com/search/code?q={query}{&page,per_page,sort,order}', 'commit_search_url': 'https://api.github.com/search/commits?q={query}{&page,per_page,sort,order}', 'emails_url': 'https://api.github.com/user/emails', 'emojis_url': 'https://api.github.com/emojis', 'events_url': 'https://api.github.com/events', 'feeds_url': 'https://api.github.com/feeds', 'followers_url': 'https://api.github.com/user/followers', 'following_url': 'https://api.github.com/user/following{/target}', 'gists_url': 'https://api.github.com/gists{/gist_id}', 'hub_url': 'https://api.github.com/hub', 'issue_search_url': 'https://api.github.com/search/issues?q={query}{&page,per_page,sort,order}', 'issues_url': 'https://api.github.com/issues', 'keys_url': 'https://api.git