In [1]:
from bs4 import BeautifulSoup
import numpy as np
import requests
import pandas as pd
import plotly.express as px

## Parameters of speedrun.com website

In [2]:
base_url = 'https://www.speedrun.com/ajax_games.php?game=&platform=&unofficial=off&orderby=mostactive&title=&series=&start='
game_tag = 'div'
game_class = 'game-name'
players_tag = 'p'
players_class = 'small text-muted'
delete_words = ['active players', 'active player', '\t', '\n', ',', ' ']
games_per_page = 50
max_pages = 1

## Helper Functions

In [3]:
def get_parsed_url(base_url, page):
    url = base_url + str(page)
    website = requests.get(url)
    return BeautifulSoup(website.content, 'html.parser')

def add_games_in_page(games, soup):
    current_games = soup.find_all(game_tag, class_=game_class)
    current_games = [game_tag.text for game_tag in current_games]
    games += current_games
    return games

def add_players_in_page(players, soup):
    current_players = soup.find_all(players_tag, class_=players_class)
    for tag in current_players:
        text = tag.text
        for word in delete_words:
            text = text.replace(word, '')
        players.append(int(text))
    return players

## Scraping Speedrun.com

In [22]:
games = []
players =[]

page = 0
soup = get_parsed_url(base_url, page)


while len(soup) > 1 and page < max_pages*games_per_page:
    games = add_games_in_page(games, soup)
    players = add_players_in_page(players, soup)   
    page += games_per_page
    soup = get_parsed_url(base_url, page)

### Making a Pandas dataframe

In [23]:
games_df = pd.DataFrame({'Game': games, 'Active Players': players})
games_df['Rank'] = games_df.index+1
games_df['Predicted Active Players'] = np.ceil(games_df['Active Players'][0]/games_df['Rank'])

### Plotting

In [24]:
import plotly.graph_objects as go
fig_data = px.scatter(games_df, x="Rank", y="Active Players", hover_data=["Game"])
fig_data.update_traces(marker=dict(color="#40B0A6"))
fig_prediction = px.line(games_df, x="Rank", y='Predicted Active Players')
fig_prediction.update_traces(line_color='#D35FB7')
fig = go.Figure(data=fig_data.data + fig_prediction.data)
fig.update_layout(title_text="Most active speedrunning games on Speedrun.com", 
                  xaxis_title='Rank',
                  yaxis_title='Number of Active Players')
fig['data'][0]['name']='Actual Active Players'
fig['data'][0]['showlegend']=True
fig['data'][1]['name']="Zipf's Law"
fig['data'][1]['showlegend']=True
fig.show()