In [72]:
import cloudscraper
from bs4 import BeautifulSoup
import math
import pprint


# Compares 2 public Discogs lists (option for Collection or Wantlist), and returns matching titles from both lists.


scraper = cloudscraper.create_scraper(browser={'browser': 'firefox','platform': 'windows','mobile': False})


# Helper Functions

def get_collection(username):

    URL = "https://www.discogs.com/user/{0}/collection?header=1".format(username)
    pages = count_pages(URL)
    
    return parse_list(URL, pages)

def get_wantlist(username):
    
    URL = "https://www.discogs.com/wantlist?user={0}".format(username)
    pages = count_pages(URL)

    return parse_list(URL, pages)
    
def parse_list(URL, pages): # Takes URL of a collection or wantlist, returns the releases as a list.
    
    new_list = []

    for page in range(1, pages + 1):
        new_URL = URL + "&limit=250&sort=artist&sort_order=asc&page={0}".format(page)
        html = scraper.get(new_URL).content
        soup = BeautifulSoup(html, 'html.parser')

        list_items = soup.find_all("tr", class_="shortcut_navigable")
        for item in list_items:
            release = item.find("span", class_="release_title").find_all("a")
            format = item.find_all("td")[3].text

            new_list_item = "{0} - {1} ({2})".format(release[0].text, release[1].text, format)
            
            # removes consecutive duplicates in the list
            # if len(new_list) > 0 and new_list[-1] != new_list_item:
            #     new_list.append(new_list_item)
            # elif len(new_list) == 0:
            #     new_list.append(new_list_item)
            
            new_list.append(new_list_item)
    
    return new_list

def count_pages(URL): # Takes URL for either collection or wantlist, returns the number of pages.
    html = scraper.get(URL).content
    soup = BeautifulSoup(html, 'html.parser')

    collection_size = int(soup.find("li", class_="active_header_section").find("small", class_="facet_count").text)
    pages = math.ceil(collection_size/250)
    
    return pages


# Main

user1 = "curefortheitch"
collection = get_collection(user1)
print("Collection: {0}".format(len(collection)))

user2 = "AustinSimard"
wantlist = get_wantlist(user2)
print("Wantlist: {0}".format(len(wantlist)))

matches = set(collection) & set(wantlist)
print("Matches: {0}".format(len(matches)))
pprint.pprint(matches)


Collection: 431
Wantlist: 98
Matches: 0
set()
