# 网易云课堂：看文档学爬虫(python3.6)

## 6.requests

### 基本get请求

In [1]:
import requests
r = requests.get('http://httpbin.org/get')
print(r.url)

http://httpbin.org/get


In [2]:
payload = {'key1': 'value1','key2': 'value2'}
r = requests.get('http://httpbin.org/get', params = payload)
print(r.url)

http://httpbin.org/get?key1=value1&key2=value2


In [3]:
headers = {'hello': 'world'}
r = requests.get('http://httpbin.org/get', headers = headers)
print(r.text)

{
  "args": {}, 
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate", 
    "Connection": "close", 
    "Hello": "world", 
    "Host": "httpbin.org", 
    "User-Agent": "python-requests/2.19.1"
  }, 
  "origin": "111.203.22.227", 
  "url": "http://httpbin.org/get"
}



### 基本post请求

In [4]:
payload = {'hello': 'world'}
r = requests.post('http://httpbin.org/post', data = payload)
print(r.text)

{
  "args": {}, 
  "data": "", 
  "files": {}, 
  "form": {
    "hello": "world"
  }, 
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate", 
    "Connection": "close", 
    "Content-Length": "11", 
    "Content-Type": "application/x-www-form-urlencoded", 
    "Host": "httpbin.org", 
    "User-Agent": "python-requests/2.19.1"
  }, 
  "json": null, 
  "origin": "111.203.22.227", 
  "url": "http://httpbin.org/post"
}



#### 发送json格式数据

In [5]:
import json
payload = {'hello': 'world'}
r = requests.post('http://httpbin.org/post', data = json.dumps(payload))
print(r.text)

{
  "args": {}, 
  "data": "{\"hello\": \"world\"}", 
  "files": {}, 
  "form": {}, 
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate", 
    "Connection": "close", 
    "Content-Length": "18", 
    "Host": "httpbin.org", 
    "User-Agent": "python-requests/2.19.1"
  }, 
  "json": {
    "hello": "world"
  }, 
  "origin": "111.203.22.227", 
  "url": "http://httpbin.org/post"
}



#### 上传文件

In [6]:
url = 'http://httpbin.org/post'
files = {'file': open('baidu.txt', 'rb')}
r = requests.post(url, files = files)
print(r.text)

{
  "args": {}, 
  "data": "", 
  "files": {
  }, 
  "form": {}, 
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate", 
    "Connection": "close", 
    "Content-Length": "118531", 
    "Content-Type": "multipart/form-data; boundary=72e1270cf3c040c1e6c18dd2acf7ed26", 
    "Host": "httpbin.org", 
    "User-Agent": "python-requests/2.19.1"
  }, 
  "json": null, 
  "origin": "111.203.22.227", 
  "url": "http://httpbin.org/post"
}



#### Cookies

In [7]:
url = 'http://httpbin.org/cookies'
cookies = dict(cookies_are = 'working')
r = requests.get(url, cookies = cookies)
print(r.text)

{
  "cookies": {
    "cookies_are": "working"
  }
}



### 请求超时配置

In [8]:
url = 'http://github.com'
r = requests.get(url, timeout = 100)
print(r.text)







<!DOCTYPE html>
<html lang="en">
  <head>
    <meta charset="utf-8">
  <link rel="dns-prefetch" href="https://assets-cdn.github.com">
  <link rel="dns-prefetch" href="https://avatars0.githubusercontent.com">
  <link rel="dns-prefetch" href="https://avatars1.githubusercontent.com">
  <link rel="dns-prefetch" href="https://avatars2.githubusercontent.com">
  <link rel="dns-prefetch" href="https://avatars3.githubusercontent.com">
  <link rel="dns-prefetch" href="https://github-cloud.s3.amazonaws.com">
  <link rel="dns-prefetch" href="https://user-images.githubusercontent.com/">



  <link crossorigin="anonymous" media="all" integrity="sha512-FCg44VGg5ax/5MpZ8otwiPE+/tG1/Sq67mKkl6agbqgoScZtJyXhQSFQMIJfOHMZZ+yXDINb8nEiws60SiLohg==" rel="stylesheet" href="https://assets-cdn.github.com/assets/frameworks-5aa6d9885579bb2359f66266aee26f3b.css" />
  <link crossorigin="anonymous" media="all" integrity="sha512-7j4LVYeYSjq7Il+lAz2YmgIdh1yZseGZInIyLEgtZiWTttnprn5+JEKrgObqOcqvW5+OCSbnA8NpZI/qDa3Z

### 持久会话
#### 没有持久会话的情况

In [9]:
r = requests.get('http://httpbin.org/cookies/set/sessioncookie/123456789')
print(r.text)

{
  "cookies": {
    "sessioncookie": "123456789"
  }
}



In [10]:
r = requests.get('http://httpbin.org/cookies')
print(r.text)

{
  "cookies": {}
}



#### 设置持久会话的情况

In [11]:
s = requests.Session()
s.get('http://httpbin.org/cookies/set/sessioncookie/123456789')
r = s.get('http://httpbin.org/cookies')
print(r.text)

{
  "cookies": {
    "sessioncookie": "123456789"
  }
}



### 代理

In [12]:
proxies = {'https': 'http//41.118.132.69:4433'}
r = requests.post('https://baidu.com', proxies = proxies)
print(r.status_code)

ProxyError: HTTPSConnectionPool(host='baidu.com', port=443): Max retries exceeded with url: / (Caused by ProxyError('Cannot connect to proxy.', NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x0000020D8D5BA7B8>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed',)))

In [13]:
r = requests.post('https://baidu.com')
print(r.status_code)

200


## 7.BeautifulSoup
## 8.Python与MongoDB