# 引入套件 要引入到(base才可以在這用)

## Http request

In [2]:
import requests
url = 'http://www.ehappy.tw/demo.htm'
r = requests.get(url)
if r.status_code == requests.codes.ok:
    print(r.text)

<!doctype html>
<html>
  <head>
    <meta charset="UTF-8">
    <title>Hello</title>
  </head>
  <body>
    <p>Hello World!</p>
  </body>
</html>


## Http request
### get and post with parameter

In [7]:
import requests
payload = {'key1':'value1','key2':'value2'}
r = requests.get("http://httpbin.org/get", params=payload)
print(r.text)

{
  "args": {
    "key1": "value1", 
    "key2": "value2"
  }, 
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate", 
    "Host": "httpbin.org", 
    "User-Agent": "python-requests/2.24.0", 
    "X-Amzn-Trace-Id": "Root=1-5f337ae2-29ef8bcf003f86b797fa32ba"
  }, 
  "origin": "59.127.49.239", 
  "url": "http://httpbin.org/get?key1=value1&key2=value2"
}



In [9]:
import requests
payload = {'key1':'value1','key2':'value2'}
r = requests.post("http://httpbin.org/post", data=payload)
print(r.text)

{
  "args": {}, 
  "data": "", 
  "files": {}, 
  "form": {
    "key1": "value1", 
    "key2": "value2"
  }, 
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate", 
    "Content-Length": "23", 
    "Content-Type": "application/x-www-form-urlencoded", 
    "Host": "httpbin.org", 
    "User-Agent": "python-requests/2.24.0", 
    "X-Amzn-Trace-Id": "Root=1-5f337c25-db71f68998db9547de20fe6c"
  }, 
  "json": null, 
  "origin": "59.127.49.239", 
  "url": "http://httpbin.org/post"
}



## Analyse
### use BeautifulSoup to get goal

In [13]:
import requests
from bs4 import BeautifulSoup
url = 'http://ehappy.tw/bsdemo1.htm'
html = requests.get(url)
html.encoding = 'UTF-8'
sp = BeautifulSoup(html.text,'lxml')
print(sp.title)
print(sp.title.text)
print(sp.h1)
print(sp.p)

<title>我是網頁標題</title>
我是網頁標題
<h1 class="large">我是標題</h1>
<p>我是段落</p>


In [15]:
import requests
from bs4 import BeautifulSoup
html = '''
    <html>
        <head>
            <meta charset = "UTF-8">
            <title>我是標題網頁</title>
        </head>
        <body>
            <div>
                <p id='p1'>我是第一段</p>
                <p id='p2' class='red'>我是第二段</p>
            </div>
        </body>
    </html>
'''

sp = BeautifulSoup(html,'lxml')
print(sp.find('p')) #找第一個
print(sp.find_all('p')) #找全部
print(sp.find('p',{'id':'p2','class':'red'}))
print(sp.find('p',id='p2',class_ = 'red')) #效果與上一行相同, class為關鍵字 所以補上底線才可認定為變數

<p id="p1">我是第一段</p>
[<p id="p1">我是第一段</p>, <p class="red" id="p2">我是第二段</p>]
<p class="red" id="p2">我是第二段</p>
<p class="red" id="p2">我是第二段</p>


In [19]:
import requests
from bs4 import BeautifulSoup
html = '''
    <html>
        <head>
            <meta charset = "UTF-8">
            <title>我是標題網頁</title>
        </head>
        <body>
            <div>
                <p id='p1'>我是第一段</p>
                <p id='p2' class='red'>我是第二段</p>
            </div>
        </body>
    </html>
'''

sp = BeautifulSoup(html,'lxml')
print(sp.select('title')) 
print(sp.select('p')) 
print(sp.select('#p1'))
print(sp.select('.red'))

[<title>我是標題網頁</title>]
[<p id="p1">我是第一段</p>, <p class="red" id="p2">我是第二段</p>]
[<p id="p1">我是第一段</p>]
[<p class="red" id="p2">我是第二段</p>]


In [20]:
import requests
from bs4 import BeautifulSoup
html = '''
<html>
<head>
<meta charset="UTF-8">
<title>我是網頁標題</title>
</head>
<body>
<img src="http://www.ehappy.tw/python.png">
<a href="http://www.e-happy.com.tw">超連結</a>
</body>
</html>
'''

sp = BeautifulSoup(html,'lxml')
print(sp.select('img')[0].get('src')) 
print(sp.select('a')[0].get('href')) 
print(sp.select('img')[0]['src']) 
print(sp.select('a')[0]['href']) 

http://www.ehappy.tw/python.png
http://www.e-happy.com.tw
http://www.ehappy.tw/python.png
http://www.e-happy.com.tw


## 實際爬蟲範例
### 爬威力彩號碼

In [65]:
import requests
from bs4 import BeautifulSoup
url = 'http://www.taiwanlottery.com.tw'
html = requests.get(url)
html.encoding = 'UTF-8'
sp = BeautifulSoup(html.text,'lxml')
print( '威力彩林俊佑期數:' +sp.select('.contents_mine_tx02')[0].select('span')[0].text)
s = ''
for i in range(3,9):
    s += ' ' + sp.select('.contents_box02')[0].select('div')[i].text
print( '開出順序:' + s)
s = ''
for i in range(9,15):
    s += ' ' + sp.select('.contents_box02')[0].select('div')[i].text
print( '大小順序:' + s)
print('第二區:' + sp.select('.contents_box02')[0].select('div')[15].text)

威力彩林俊佑期數:109/8/10 第109000064期 
開出順序: 06  15  22  04  19  05 
大小順序: 04  05  06  15  19  22 
第二區:03 


# python 抓 youtube影片的 套件 範例

In [2]:
from pytube import YouTube

yt = YouTube('https://www.youtube.com/watch?v=pkSjkse6j-o')
yt.streams.first().download()

'C:\\Users\\a0970\\Desktop\\python_exercise\\climb\\麋先生MIXER【某某某 Someone】Official Music Video.mp4'