In [1]:
import os
import time
import pandas as pd
import requests
from bs4 import BeautifulSoup as bs

In [2]:
# The pictures will be restored here
rootDir = os.path.join(os.environ['HOME'], 'Desktop', 'pic')

class AutoPath(object):
    '''Automatic Path Manager'''
    
    def __init__(self, root=rootDir):
        '''Initial with root dir'''
        self.root = root
        self.check()
        print(f'D: Initialize AutoPath with root: {root}')
        
    def check(self):
        '''Guarantee the root dir exists,
        and _thumb and _fullSize exist.'''
        root = self.root
        try:
            if not os.path.isdir(root):
                os.mkdir(root)
        except Exception as err:
            print(f'E: Can not work with the given root: {root}')
            raise ValueError(err)
            
        for d in [os.path.join(root, e) for e in ['_thumb', '_fullSize']]:
            if not os.path.isdir(d):
                os.mkdir(d)
                print(f'D: Made new dir: {d}')
                
        return
    
    def _make_full_path(self, sub1, name):
        '''Make full path with given sub1 and name'''
        root = self.root
        p = os.path.join(root, sub1, name)
        if os.path.isfile(p):
            print(f'W: The file name exists: {sub1}, {name}.')
            return [p, 'conflict']
        return [p, 'clear']
    
    def thumb(self, name, force=True):
        ps = self._make_full_path('_thumb', name)
        if ps[1] == 'conflict' and not force:
            raise ValueError(f'{ps[0]} exists')
        print(f'D: Made path: {ps[0]}')
        return ps[0]
    
    def fullSize(self, name, force=True):
        ps = self._make_full_path('_fullSize', name)
        if ps[1] == 'conflict' and not force:
            raise ValueError(f'{ps[0]} exists')
        print(f'D: Made path: {ps[0]}')
        return ps[0]
        

In [3]:
headers = {
    'user-agent':
    'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'
}
url = r'https://wallhaven.cc/search?categories=111&purity=100&resolutions=2560x1440&sorting=random&order=desc'


autoPath = AutoPath()

def rndSrc(url=url, headers=headers, autoPath=autoPath):
    '''Require the random pictures from the resource website'''
    t0 = time.time()

    resp = requests.get(url=url, headers=headers)
    soup = bs(resp.text)

    found = dict(url=url,
                 preview=soup.find_all('a', {'class': 'preview'}),
                 thumb=soup.find_all('img', {'alt': 'loading'}))
    
    n = len(found['preview'])
    found['num'] = n
    found['fullSize'] = ['--'] * n
    found['thumbPicPath'] = ['--'] * n
    found['fullSizePicPath'] = ['--'] * n

    for j, pt in enumerate(zip(found['preview'], found['thumb'])):
        p, t = pt
        
        tUrl = t['data-src']
        
        resp = requests.get(url=p['href'], headers=headers)
        soup = bs(resp.text)
        for _ in range(10):
            f = soup.find('img', {'id': 'wallpaper'})
            if f:
                break
                
        if f:
            fUrl = f['src']
        else:
            fUrl = '--' + p['href']
            
        print(f'D: Parsed new Pic: {tUrl}, {fUrl}')
        
        if not fUrl.startswith('--'):
            with open(autoPath.thumb(os.path.basename(tUrl)), 'wb') as f:
                resp = requests.get(tUrl)
                f.write(resp.content)

            with open(autoPath.fullSize(os.path.basename(fUrl)), 'wb') as f:
                resp = requests.get(fUrl)
                f.write(resp.content)
            
            pass

    print(f'D: Found {found["num"]} sources, costs {time.time() - t0} seconds.')

    return found

D: Made new dir: C:\Users\zcc\Desktop\pic\_thumb
D: Made new dir: C:\Users\zcc\Desktop\pic\_fullSize
D: Initialize AutoPath with root: C:\Users\zcc\Desktop\pic


In [4]:
found = rndSrc()

D: Parsed new Pic: https://th.wallhaven.cc/small/md/mdv9k1.jpg, https://w.wallhaven.cc/full/md/wallhaven-mdv9k1.jpg
D: Made path: C:\Users\zcc\Desktop\pic\_thumb\mdv9k1.jpg
D: Made path: C:\Users\zcc\Desktop\pic\_fullSize\wallhaven-mdv9k1.jpg
D: Parsed new Pic: https://th.wallhaven.cc/small/p2/p2go19.jpg, https://w.wallhaven.cc/full/p2/wallhaven-p2go19.jpg
D: Made path: C:\Users\zcc\Desktop\pic\_thumb\p2go19.jpg
D: Made path: C:\Users\zcc\Desktop\pic\_fullSize\wallhaven-p2go19.jpg
D: Parsed new Pic: https://th.wallhaven.cc/small/g7/g79roq.jpg, https://w.wallhaven.cc/full/g7/wallhaven-g79roq.png
D: Made path: C:\Users\zcc\Desktop\pic\_thumb\g79roq.jpg
D: Made path: C:\Users\zcc\Desktop\pic\_fullSize\wallhaven-g79roq.png
D: Parsed new Pic: https://th.wallhaven.cc/small/yj/yjmmzd.jpg, https://w.wallhaven.cc/full/yj/wallhaven-yjmmzd.png
D: Made path: C:\Users\zcc\Desktop\pic\_thumb\yjmmzd.jpg
D: Made path: C:\Users\zcc\Desktop\pic\_fullSize\wallhaven-yjmmzd.png
D: Parsed new Pic: https://t