In [4]:
from bs4 import BeautifulSoup as bs
from pymongo import MongoClient
from slugify import slugify
import datetime
import requests
import json
import time
import os

## Get all quests

In [5]:
URL = "https://www.dofuspourlesnoobs.com/classeacutees-par-succegraves.html"
base_URL = "https://www.dofuspourlesnoobs.com"
success_index_page = requests.get(URL)
success_index_soup = bs(success_index_page.content, "html.parser")

### Parse "Listing des quêtes par succès" page

In [6]:
success_info_list = {}

success_table = success_index_soup.find_all("table", class_="wsite-multicol-table")
success_table_list = [elem.find_all("td", class_="wsite-multicol-col") for elem in success_table]

#### success_table_list :
- [0] I- Quêtes par succès
- [1] II- Quêtes événementielles par succès (part 1)
- [2] II- Quêtes événementielles par succès (part 2)
- [3] III- Quêtes non disponibles dans les succès
- [4] Anciennes quêtes de Dofus
- [5+] Nothing

In [7]:
def get_success_list(bs_table_list):
    list = []
    succes_group = ""
    for bs_table_group in bs_table_list:
        for bs_table in bs_table_group:
            for div in bs_table.find_all("div", class_="paragraph"):
                if div.find("a") and succes_group != "":
                    for link in div.find_all("a"):
                        list.append({"succes_group": succes_group, "success_name": link.text.strip(), "link": link["href"], "quests" :[], "sid": datetime.datetime.now().timestamp()})
                else:
                    succes_group = div.text.encode('utf-8', 'ignore').decode('utf-8').replace('\ufeff', '').replace('\ufefb', '').strip()
    return list

def get_direct_quests(bs_table_list):
    list = {"succes_group": "NOSUCCESS", "success_name": "NOSUCCESS", "link": "https://www.dofuspourlesnoobs.com/classeacutees-par-succegraves.html#quetesnonsucces", "quests" :[]}
    for bs_table_group in bs_table_list:
        for bs_table in bs_table_group:
            for link in bs_table.find_all("a"):
                list["quests"].append({"name": link.text.strip(), "link": link["href"], "qid": datetime.datetime.now().timestamp()})
    
    return list

In [8]:
success_index_list = get_success_list(success_table_list[:3])
nosuccess_quests = get_direct_quests([success_table_list[3]])

### Get all quests for each success

In [None]:
def get_quests_from_succes_page(bs_page):
    quests = []
    no_quest_count = 0
    for link in bs_page.find_all("a"):
        try:
            if len(link.text.strip())>0 and ".html" in link["href"]:
                quests.append({"name": link.text.strip(), "link": link["href"], "qid": datetime.datetime.now().timestamp()})
        except Exception as x:
            no_quest_count += 1
    if no_quest_count > 0:
        print(f"Found {no_quest_count} other than quest with link items.")
    return quests

def add_quests_to_success(succes_list):
    for success in succes_list:
        page_url = f"{base_URL}{success["link"]}"
        page_soup = bs(requests.get(page_url).content).find("div", id="wsite-content")
        success.update({"quests": get_quests_from_succes_page(page_soup)})

In [54]:
add_quests_to_success(success_index_list)
success_index_list.append(nosuccess_quests)

Found 1 other than quest with link items.


In [56]:
for suc in success_index_list:
    print(f"{suc["success_name"]}: {len(suc["quests"])} quests")

En route pour l'aventure.: 9 quests
Un citoyen modèle.: 6 quests
Mais où sont les Dofus ?: 5 quests
Vert émeraude.: 9 quests
Pourpre profond.: 8 quests
Bleu turquoise.: 11 quests
Ocre d'ambre.: 9 quests
La fin de l'éternité.: 1 quests
Quatre sur six.: 10 quests
Blanc Ivoire.: 11 quests
Noir d'ébène.: 11 quests
Six sur six.: 13 quests
Fri carré.: 3 quests
Halte au péage.: 8 quests
La maire dénie.: 7 quests
L'hiver arrive.: 6 quests
L'âme de glace.: 7 quests
Fraîchement pondu.: 1 quests
Épilogue hivernal.: 1 quests
Le tour du monde en 27 donjons.: 8 quests
Première édition de donjons.: 6 quests
Donjons avancés.: 6 quests
Donjons trois point cinq.: 5 quests
Le siège des donjons.: 6 quests
La tornade des donjons.: 6 quests
D'un monde à l'autre.: 3 quests
Aux portes de la nuit.: 3 quests
Générations futures.: 3 quests
Par-delà les apparences.: 6 quests
Errances félines: 3 quests
La Fratrie des Oubliés.: 32 quests
Rêves de dragons.: 10 quests
L'avenir du futur.: 5 quests
Eliocalypse : Résona

## Save dict in mongodb

In [10]:
def connect_to_local_mongodb_db():
    db = None
    try:
        client = MongoClient("mongodb://localhost:27017/")
        db = client["DPLN"]
        print("Connected to local MongoDB")
    except Exception as e:
        print(f"Error connecting to local MongoDB: {e}")
    
    return db

In [11]:
db = connect_to_local_mongodb_db()
success_collection = db["Success"]
quests_collection = db["Quests"]

Connected to local MongoDB


In [None]:
success_collection.insert_many(success_index_list)

In [None]:
for success in success_index_list:
    if len(success["quests"]) > 0:
        quests_collection.insert_many(success["quests"])

## Download all quests pages in file system

In [21]:
if not os.path.exists("Quests"):
    os.makedirs("Quests")

In [None]:
def add_folder(path):
    if not os.path.exists(path):
        os.makedirs(path)
        
def download_all_html():
    success_cursor = success_collection.find()
    path = "Quests/"
    for success in success_cursor:
        count = 0
        path = "Quests/" + slugify(success["success_name"]) + "/"
        print("Saving quests in:", path)
        add_folder(path)
        add_folder(path + "txt/")
        for quest in success["quests"]:
            if "http" in quest["link"]:
                page_url = quest["link"]
            else:
                page_url = f"{base_URL}{quest["link"]}"
            try:
                page_soup = bs(requests.get(page_url).content).find("div", id="wsite-content")
            except Exception as x:
                print(f"Quest : {quest["name"]} error : {x}")
                continue
            for div in page_soup:
                string = str(div)
                if "PUBLICITE" in string or "data-ad-text" in string:
                    div.decompose()
            html_file_path = path + quest["link"].split("/")[-1]
            with open(html_file_path, "w") as file:
                file.write(str(page_soup))
            txt_file_path = path + "txt/" + quest["link"].split("/")[-1].replace(".html", ".txt")
            with open(txt_file_path, "w") as file:
                file.write(page_soup.get_text())
            time.sleep(0.05)
            count += 1
        print("^- Saved", count, "quests.")


In [23]:
download_all_html()

Saving quests in: Quests/en-route-pour-l-aventure/
saving: Quests/en-route-pour-l-aventure/bien-debuter.html
saving: Quests/en-route-pour-l-aventure/vaincre-les-monstres.html
saving: Quests/en-route-pour-l-aventure/l-anneau-de-tous-les-dangers.html
saving: Quests/en-route-pour-l-aventure/sous-le-regard-des-dieux.html
saving: Quests/en-route-pour-l-aventure/reacuteponses-agrave-tout.html
saving: Quests/en-route-pour-l-aventure/le-village-dans-les-nuages.html
saving: Quests/en-route-pour-l-aventure/espoirs-et-trageacutedies.html
saving: Quests/en-route-pour-l-aventure/dans-la-gueule-du-milimilou.html
saving: Quests/en-route-pour-l-aventure/destination-astrub.html
^- Saved 9 quests.
Saving quests in: Quests/un-citoyen-modele/
saving: Quests/un-citoyen-modele/les-principes-d-archie-m-aident.html
saving: Quests/un-citoyen-modele/on-marche-sur-des-oeufs.html
saving: Quests/un-citoyen-modele/conseil-de-classe.html
saving: Quests/un-citoyen-modele/quete-de-classe.html
saving: Quests/un-citoyen