In [9]:
import requests
from bs4 import BeautifulSoup
import concurrent.futures

def getProxies():
    """
    Fetch proxies from free-proxy-list.net and return a list of elite proxies.
    """
    r = requests.get('https://free-proxy-list.net/')
    soup = BeautifulSoup(r.content, 'html.parser')
    table = soup.find('tbody')
    proxies = []
    for row in table.find_all('tr'):
        if row.find_all('td')[4].text == 'elite proxy':
            proxy = ":".join([row.find_all('td')[0].text, row.find_all('td')[1].text])
            proxies.append(proxy)
    return proxies

def proxy_from_txt(filename):
    """
    Read proxies from a given file and return a list of proxies.
    If the file is not found, return an empty list.
    """
    try:
        with open(filename, 'r') as f:
            txt_proxies = [line.strip() for line in f]
        return txt_proxies
    except FileNotFoundError:
        print(f"File {filename} not found.")
        return []

def extract(proxy):
    """
    Test a proxy by making a request to a specified URL and return the result.
    """
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.162 Safari/537.36'} 
    try:
        r = requests.get('https://minimalistbaker.com/recipe-index/?fwp_special-diet=vegan&fwp_paged=1', headers=headers, proxies={'http': proxy, 'https': proxy}, timeout=5)
        if r.status_code == 200:
            working = {
                'proxy': proxy,
                'statuscode': r.status_code,
                'data': r.text[:200],
            }
            print(proxy)
            return working
    except requests.ConnectionError:
        pass
    return None

def main():
    """
    Main function to orchestrate fetching and testing of proxies.
    """
    txt_prox = proxy_from_txt('proxy-list.txt')
    proxylist = getProxies()
    
    proxylist.extend(txt_prox)
    
    with concurrent.futures.ThreadPoolExecutor() as executor:
        results = list(executor.map(extract, proxylist))
    
    # Filter out None results
    results = [result for result in results if result is not None]
    return results

if __name__ == '__main__':
    results = main()
    print(results)


File proxy-list.txt not found.
172.183.241.1:8080
188.247.194.210:3128
189.240.60.171:9090


ReadTimeout: HTTPSConnectionPool(host='minimalistbaker.com', port=443): Read timed out. (read timeout=5)