In [10]:
import requests
from bs4 import BeautifulSoup
import json
import time
import os


In [11]:
BASE_URL = "https://support.highrise.game/en/"


In [12]:
response = requests.get(BASE_URL)
if response.status_code == 200:
    main_page_html = response.text
else:
    print(f"Failed to retrieve the main page. Status code: {response.status_code}")


In [13]:
from bs4 import BeautifulSoup

soup = BeautifulSoup(main_page_html, 'lxml')

# Find all category links
category_links = soup.find_all('a', {'data-testid': 'collection-card-compact'})
categories = []

for link in category_links:
    category_name_div = link.find('div', {'data-testid': 'collection-name'})
    if category_name_div:
        category_name = category_name_div.get_text(strip=True)
    else:
        category_name = 'Unknown Category'
    category_url = link['href']
    categories.append({
        'name': category_name,
        'url': category_url
    })

# Check the categories extracted
print(f"Found {len(categories)} categories.")
for cat in categories:
    print(cat['name'], cat['url'])


Found 20 categories.
4.0 Features https://support.highrise.game/en/collections/8183635-4-0-features
What's Highrise about? https://support.highrise.game/en/collections/4710659-what-s-highrise-about
Gold, Bubbles and Payments https://support.highrise.game/en/collections/4710692-gold-bubbles-and-payments
Newsfeed and Posting https://support.highrise.game/en/collections/4711623-newsfeed-and-posting
Rooms and Building https://support.highrise.game/en/collections/4711832-rooms-and-building
Rewards and Gifts https://support.highrise.game/en/collections/4712644-rewards-and-gifts
Grabs and Shop Items https://support.highrise.game/en/collections/4712950-grabs-and-shop-items
Reporting https://support.highrise.game/en/collections/6037541-reporting
Your Crew https://support.highrise.game/en/collections/6121827-your-crew
Trading and Selling Items https://support.highrise.game/en/collections/6121878-trading-and-selling-items
Bugs and Troubleshooting https://support.highrise.game/en/collections/61219

In [None]:
all_articles = []

for category in categories:
    print(f"Processing category: {category['name']}")
    category_url = category['url']
    
    # Fetch category page
    response = requests.get(category_url)
    if response.status_code != 200:
        print(f"Failed to retrieve category page: {category_url}")
        continue
    soup = BeautifulSoup(response.text, 'lxml')
    
    # Find all article links
    article_links = soup.find_all('a', {'data-testid': 'article-link'})
    articles = []
    
    for link in article_links:
        article_title_span = link.find('span', {'class': 'm-0 text-md text-body-primary-color group-hover/article:text-primary font-regular'})
        if article_title_span:
            article_title = article_title_span.get_text(strip=True)
        else:
            article_title = 'Unknown Article'
        article_url = link['href']
        articles.append({
            'title': article_title,
            'url': article_url,
            'category': category['name']
        })
    
    print(f"Found {len(articles)} articles in category '{category['name']}'.")
    all_articles.extend(articles)
    time.sleep(1) 

print(f"Total articles found: {len(all_articles)}")


Processing category: 4.0 Features
Found 14 articles in category '4.0 Features'.
Processing category: What's Highrise about?
Found 3 articles in category 'What's Highrise about?'.
Processing category: Gold, Bubbles and Payments
Found 12 articles in category 'Gold, Bubbles and Payments'.
Processing category: Newsfeed and Posting
Found 5 articles in category 'Newsfeed and Posting'.
Processing category: Rooms and Building
Found 15 articles in category 'Rooms and Building'.
Processing category: Rewards and Gifts
Found 5 articles in category 'Rewards and Gifts'.
Processing category: Grabs and Shop Items
Found 9 articles in category 'Grabs and Shop Items'.
Processing category: Reporting
Found 4 articles in category 'Reporting'.
Processing category: Your Crew
Found 8 articles in category 'Your Crew'.
Processing category: Trading and Selling Items
Found 4 articles in category 'Trading and Selling Items'.
Processing category: Bugs and Troubleshooting
Found 3 articles in category 'Bugs and Troubl

In [None]:
# Extract content from each article
for idx, article in enumerate(all_articles, start=1):
    print(f"Extracting article {idx}/{len(all_articles)}: {article['title']}")
    article_url = article['url']

    # Fetch article page
    response = requests.get(article_url)
    if response.status_code != 200:
        print(f"Failed to retrieve article page: {article_url}")
        continue
    soup = BeautifulSoup(response.text, 'lxml')

    # Extract the article content
    content_div = soup.find('div', class_='article_body')
    if content_div:
        # Remove the 'Related Articles' section
        related_section = content_div.find('section', class_='related_articles')
        if related_section:
            related_section.decompose()
        content = content_div.get_text(separator='\n', strip=True)
        article['content'] = content
    else:
        article['content'] = ''
        print(f"Content not found for article: {article['title']}")

    # Extract the last updated date 
    updated_div = soup.find('div', class_='avatar__info -mt-0.5 text-base')
    if updated_div:
        date_span = updated_div.find('span', class_='text-body-secondary-color')
        if date_span and 'Updated' in date_span.get_text():
            article['last_updated'] = date_span.get_text(strip=True)
        else:
            article['last_updated'] = ''
    else:
        article['last_updated'] = ''

    time.sleep(1)  


Extracting article 1/135: 📌Pinning
Extracting article 2/135: Deleting Items
Extracting article 3/135: Profile 4.0
Extracting article 4/135: How Do I Change My Background?
Extracting article 5/135: What is Grab Swapping?
Extracting article 6/135: Outfit of the Day
Extracting article 7/135: Directory 4.0
Extracting article 8/135: Social Media Links
Extracting article 9/135: Highrise+
Extracting article 10/135: Inbox Calls
Extracting article 11/135: Inbox Optimizations
Extracting article 12/135: Inbox Media Messages
Extracting article 13/135: Engagement-Based Payouts
Extracting article 14/135: Display Names
Extracting article 15/135: What are the Highrise Community Guidelines?
Extracting article 16/135: How do I play Highrise? What is this game about?
Extracting article 17/135: How does the Highrise chat filter work?
Extracting article 18/135: What are Bubbles?
Extracting article 19/135: What is Gold and how do I get it?
Extracting article 20/135: Why didn't I receive the gold that I purc

In [None]:
def clean_text(text):
    # Remove unwanted characters or patterns
    text = text.replace('\r', '').replace('\n', ' ').strip()
    return text

# Apply cleaning to article content
for article in all_articles:
    article['content'] = clean_text(article['content'])


In [17]:
os.makedirs('data', exist_ok=True)
with open('data/faq_data.json', 'w', encoding='utf-8') as f:
    json.dump(all_articles, f, ensure_ascii=False, indent=4)

print("Data extraction complete. Saved to data/faq_data.json")


Data extraction complete. Saved to data/faq_data.json
