In [14]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime, timedelta

In [2]:
def get_data(URL):
    res = requests.get(URL)
    soup = BeautifulSoup(res.content, 'html.parser')
    rows = soup.find_all('div', attrs={'class': 'row mb-2'})
    result_list = []
    for row in rows:
        accepted = row.find('span', attrs={'class': 'badge badge-green'})
        if accepted is not None:
            accepted_text = accepted.text.strip()
            # remove new line
            accepted_text = accepted_text.replace('\n', ' ')
            # remove excess space in the middle
            accepted_text = ' '.join(accepted_text.split())
            acceptance_date = '-'.join(accepted_text.split(' ')[-2:])
            result_list.append({
                'date': acceptance_date,
                'status': 'accepted'
            })
        else:
            rejected = row.find('span', attrs={'class': 'badge badge-red'})
            if rejected is not None:
                rejected_text = rejected.text.strip()
                # remove new line
                rejected_text = rejected_text.replace('\n', ' ')
                # remove excess space in the middle
                rejected_text = ' '.join(rejected_text.split())
                rejection_date = '-'.join(rejected_text.split(' ')[-2:])
                result_list.append({
                    'date': rejection_date,
                    'status': 'rejected'
                })

    return result_list

In [3]:
all_result_list = []
for _page in range(1, 5):
    try:
        URL = "https://www.thegradcafe.com/survey/?per_page=40&q=&institution=University+of+California%2C+Los+Angeles&program=Computer+Science&degree=Masters&season=F22"
        result_list = get_data(URL)
        all_result_list.extend(result_list)
        break
    except:
        pass

In [4]:
df = pd.DataFrame(all_result_list)

In [5]:
df

Unnamed: 0,date,status
0,7-Apr,accepted
1,6-Apr,rejected
2,7-Apr,rejected
3,7-Apr,rejected
4,7-Apr,rejected
5,7-Apr,rejected
6,7-Apr,rejected
7,7-Apr,rejected
8,7-Apr,rejected
9,7-Apr,accepted


In [6]:
# convert date to datetime
# date if form of '9-Apr', '17-Feb' and year is 2022
df['date'] = pd.to_datetime(df['date'] + '-2022', format='%d-%b-%Y')

In [7]:
import plotly.graph_objects as go

In [23]:
# create a figure
fig = go.Figure()

# make bar plot, x is date, y is count of accepts and rejects on that date
fig.add_trace(go.Bar(x=df[df['status'] == 'accepted']['date'].value_counts().index,
                        y=df[df['status'] == 'accepted']['date'].value_counts().values, 
                        name='Accepted'))

fig.add_trace(go.Bar(x=df[df['status'] == 'rejected']['date'].value_counts().index,
                        y=df[df['status'] == 'rejected']['date'].value_counts().values,                     
                        name='Rejected'))

# add vertical dotted line to indicate todays date but with previous year
fig.add_shape(type="line", x0=datetime.now() - timedelta(days=365), y0=0, x1=datetime.now() - timedelta(days=365), y1=max(df['date'].value_counts().values),
               line=dict(color="LightSeaGreen", width=4, dash="dot"))

fig.show()