# 反爬：代理伺服器/IP

* 了解「IP 黑/白名單」的反爬蟲機制
* 「IP 黑/白名單」反爬蟲的因應策略

## 作業目標

* 目前程式中的 proxy_ips 是手動輸入的，請根據 https://free-proxy-list.net/ 寫一個可自動化抓取可用 Proxy 的 proxy_ips。




In [1]:
import requests
import random
from bs4 import BeautifulSoup

In [2]:
url = 'https://free-proxy-list.net'
res = requests.get(url)
soup = BeautifulSoup(res.text, 'lxml')

In [3]:
proxy_ips = []
for n in soup.find('tbody').find_all('tr'):
    proxy_ip = n.find_all('td')[0].text + ':' + n.find_all('td')[1].text
    proxy_ips.append(proxy_ip)
    
proxy_ips

['119.11.240.78:34997',
 '161.35.130.184:8080',
 '139.180.188.35:3128',
 '75.150.251.146:3128',
 '34.91.56.14:8080',
 '178.134.71.138:35942',
 '118.97.53.99:31439',
 '197.210.187.46:45753',
 '51.254.237.77:3129',
 '145.239.121.218:3129',
 '182.253.31.82:8080',
 '176.28.75.230:54657',
 '87.255.27.163:3128',
 '42.3.51.114:80',
 '118.172.201.59:46447',
 '41.188.164.222:80',
 '89.34.208.223:50478',
 '36.89.181.161:50204',
 '150.129.56.138:31111',
 '212.42.113.240:49297',
 '104.154.143.77:3128',
 '138.122.140.67:3128',
 '94.177.227.138:80',
 '91.67.240.45:3128',
 '202.166.205.78:58431',
 '193.110.115.220:3128',
 '191.5.0.79:53281',
 '85.128.16.61:56452',
 '103.78.75.165:8080',
 '223.25.97.210:52905',
 '142.44.148.56:8080',
 '5.22.154.106:37555',
 '110.74.221.18:53348',
 '103.141.180.130:8080',
 '78.8.45.68:8080',
 '1.20.99.89:32963',
 '90.151.249.204:3128',
 '41.65.168.89:3128',
 '103.216.82.22:6666',
 '103.69.125.151:80',
 '200.236.221.242:55263',
 '179.108.123.210:58107',
 '103.6.104.105:

In [6]:
# 將免費的代理伺服器，發送至 http://ip.filefab.com/index.php 找出可用的代理伺服器

available_proxies = []

for i in range(10):
    ip = random.choice(proxy_ips)
    print('Use', ip)
    try:
        resp = requests.get('http://ip.filefab.com/index.php',
                        proxies={'http': ip, 'https': ip}, timeout=10)
        soup = BeautifulSoup(resp.text, 'html5lib')
        print(soup.find('h1', id='ipd').text.strip())
        available_proxies.append(ip)
    except:
        print('Fail')
        
print(available_proxies)

Use 159.138.20.247:80
Your IP address: 159.138.20.247
Use 138.122.140.67:3128
Your IP address: 138.122.140.67
Use 182.53.206.26:45336
Fail
Use 104.148.76.156:3128
Fail
Use 95.38.214.130:51805
Your IP address: 95.38.214.130
Use 187.45.123.137:36559
Fail
Use 79.137.44.85:3129
Your IP address: 79.137.44.85
Use 188.170.52.188:58928
Your IP address: 188.170.52.188
Use 112.133.214.242:80
Your IP address: 112.133.248.189
Use 88.99.10.251:1080
Fail
['159.138.20.247:80', '138.122.140.67:3128', '95.38.214.130:51805', '79.137.44.85:3129', '188.170.52.188:58928', '112.133.214.242:80']


In [7]:
# 將前面找出的可用代理伺服器，再發送至 https://httpbin.org/ip 查看是否代理（偷換 IP）成功

for ip in available_proxies:
    print('Use', ip)
    try:
        resp = requests.get('https://httpbin.org/ip',
                        proxies={'http': ip, 'https': ip}, timeout=10)
        print(resp.json())
    except:
        print('Fail')

Use 159.138.20.247:80
Fail
Use 138.122.140.67:3128
{'origin': '138.122.140.67'}
Use 95.38.214.130:51805
Fail
Use 79.137.44.85:3129
{'origin': '79.137.44.85'}
Use 188.170.52.188:58928
Fail
Use 112.133.214.242:80
Fail
