# store information of a competition in one table: Competition

<table>
  <thead>
    <tr>
      <th>Datafield</th>
      <th>Description</th>
    </tr>
  <tbody>
    <tr>
      <td>id</td>
      <td>unique identifier for competition</td>
    </tr>
    <tr>
      <td>url</td>
      <td>competition webpage</td>
    </tr>
    <tr>
      <td>name</td>
      <td>competition name</td>
    </tr>
    <tr>
      <td>start_time</td>
      <td>start time of competition</td>
    </tr>
    <tr>
      <td>seeker</td>
      <td>competition host</td>
    </tr>
    <tr>
      <td>prize</td>
      <td>prize for winners</td>
    </tr>
    <tr>
      <td>entries</td>
      <td>{{entry_id: xx, user_id: xx, entry_url:xxx, time:xxx}, {entry_id: xx, user_id: xx, entry_url:xxx, time:xxx}, ...}</td>
    </tr>
    <tr>
      <td>deleted entries</td>
      <td>{{entry_id: xx, user_id: xx}, {entry_id: xx, user_id: xx}, ...}</td>
    </tr>
    <tr>
      <td>winners</td>
      <td>[entry_id, entry_id]</td>
    </tr>

  </tbody>
</table>


In [1]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd

In [2]:
competition_url = "https://99designs.hk/logo-design/contests/exotic-rides-ultimate-logo-contest-48555"


## 1. get competition information

In [3]:
kv = {'user-agent': 'Mozilla/5.0'}
r = requests.get(competition_url, headers=kv, timeout=30)
r.raise_for_status()
r.status_code
r.encoding = r.apparent_encoding

In [4]:
soup = BeautifulSoup(r.text, 'html.parser')

In [5]:
## title
result = soup.find(name='h1', attrs="heading heading--h1 heading--no-margin")
if result:
    title = result.text
else:
    title = None

title

'Exotic Rides Ultimate Logo Contest'

In [6]:
results = soup.find(name='div', attrs='inline-page')

In [7]:
headers = results.findAll("p", "heading heading--size4 heading--no-margin")
headers

[<p class="heading heading--size4 heading--no-margin">Company name</p>,
 <p class="heading heading--size4 heading--no-margin">Overview</p>,
 <p class="heading heading--size4 heading--no-margin">About us</p>]

In [8]:
descriptions = results.findAll("p", "paragraph")
descriptions

[<p class="paragraph paragraph--no-margin">Exotic Rides</p>,
 <p class="paragraph"><span>We are based in Bali, Indonesia. We offer clients the opportunity to hire and experience amazing vehicles: Hummer Limos, Helicopters, Luxury Boats, and Super Cars. 
 More than just a vehicle hire company, we aim to give our clients a once in a lifetime luxury experience.
 We also specialize in making sure the clients receive exceptional service to ensure their experience with us is a truly memorable one.</span></p>,
 <p class="paragraph"><span>We cater for several different markets; Weddings, Hotels (mostly 5 Star), Corporate Clients, Clubbers, Surfers and party goers, as well as tourists to Bali. 
 Our target audience ranges from 18-80 years old, is male and female, would like to (or already do) experience a luxury/celebrity lifestyle, and have disposable income which they want to have a bit of fun with.</span></p>]

In [9]:
summary = ""
company_name = ""
vision = ""

In [10]:
for header, description in zip(headers, descriptions):
    if header.text == "Summary":
        summary = description.text
    elif header.text == "Company name":
        company_name = description.text
    elif header.text == "What's your vision?":
        vision = description.text


In [11]:
print("**********summary*************")
print(summary)
print("**********company name**********")
print(company_name)
print("**********vision**********")
print(vision)


**********summary*************

**********company name**********
Exotic Rides
**********vision**********



In [12]:
kv = {'user-agent': 'Mozilla/5.0'}
url = competition_url + "/entries"

r = requests.get(url, headers=kv, timeout=30)
r.raise_for_status()
r.status_code
r.encoding = r.apparent_encoding

In [13]:
soup = BeautifulSoup(r.text, 'html.parser')

In [14]:
## competition start date
text = str(soup.find(name='div', attrs="contest-header__price"))
start_time = re.search('"startDate": .+,', text).group(0).replace('"startDate": "', '')[:-2]
print(start_time)

Sun, 11 Jul 2010 10:37:24 +0000


## 2. get participants and entry info

### winner id and entry id

In [15]:
def make_soup(competition_url, page_number, active=True):

    kv = {'user-agent': 'Mozilla/5.0'}
    if active:
        url = competition_url + "/entries?filter=active&page=" + str(page_number)
    else: 
        url = competition_url + "/entries?filter=non_active&page=" + str(page_number)
    print(url)

    r = requests.get(url, headers=kv, timeout=30)
    r.raise_for_status()
    r.status_code
    r.encoding = r.apparent_encoding
    soup = BeautifulSoup(r.text, 'html.parser')
    return soup

In [16]:
def get_winner_info(soup):

    results = soup.findAll(name='div', attrs='entry-matrix__item matrix__item entry-winners')
        
    winner_entry_ids = []
    for result in results:
        winner_entry = result.find(name='div', attrs="entry entry--linked entry--zoom-linked")
        if winner_entry:
            winner_entry_ids.append(winner_entry["id"])
            continue
        winner_entry = result.find(name='div', attrs="entry") # case if winner entry is deleted
        if winner_entry:
            winner_entry_ids.append(winner_entry["id"])
            continue
    return winner_entry_ids


In [17]:
page_number = 1
soup = make_soup(competition_url, page_number)
winner_entry_ids = get_winner_info(soup) 
winner_entry_ids

https://99designs.hk/logo-design/contests/exotic-rides-ultimate-logo-contest-48555/entries?filter=active&page=1


[]

In [18]:
seeker = soup.find(name='span', attrs="display-name").text
seeker

'Exotic Rides'

In [19]:
entry_summary = soup.find(name='select', attrs="styled-select__select")
entry_summary = entry_summary.text.replace('\n', "")
entry_summary

'            All (54)                    Unrated (10)                    1–2 stars (22)                    3–5 stars (22)                    Declined and withdrawn (292)        '

In [20]:
entry_count = int(re.search('All \(\d+\)', entry_summary).group(0)[5: -1])
deleted_entry_count = re.search('Declined and withdrawn \(\d+\)', entry_summary).group(0)[24:-1]
print(entry_count, deleted_entry_count)

54 292


In [21]:
def get_participant_and_entry_info(soup):

    entry_matrix = soup.findAll(name='div', attrs='entry-matrix__item matrix__item')
    for entry in entry_matrix:
        result = entry.find(name='div', attrs='entry entry--linked entry--zoom-linked')
        try:
            participants_user_ids.append(result["data-user-id"])
            participants_entry_ids.append(result["id"])
            participants_entry_image_urls.append(result.find(name='a')["href"])
        except:
            pass

In [22]:
def get_time(soup):
    ## get time
    # text = str(soup.find(name='div', attrs='entry-pane__results').contents)
    text = str(soup.find(name='div', attrs='entry-pane__results'))
    time = re.findall('"timeCreatedString":".{20,30}"', text)
    time = list(map(lambda x: x.replace('"timeCreatedString":"', '').replace('","', ""), time))
    return time

    

In [23]:
def get_prize(soup):
    ## get time
    text = str(soup.find(name='div', attrs='contest-header contest-header--with-breadcrumbs').contents)
    prize = re.search('"prizeMoney": ".{2,10}"', text).group(0)
    prize = prize.replace('"prizeMoney": "', "").replace('"', "")
    return prize


In [24]:
participants_user_ids = []
participants_entry_ids = []
participants_entry_image_urls = []
participants_submission_time = []
page_number = 1



while True:
    print(page_number)
    if page_number == 1:
        soup = make_soup(competition_url, page_number)
        prize = get_prize(soup)
        winner_entry_ids = get_winner_info(soup)
        get_participant_and_entry_info(soup)
        time = get_time(soup)
        participants_submission_time += time
        if "entry-1" in participants_entry_ids or len(time) < 36:
            break
    else:
        soup = make_soup(competition_url, page_number)
        get_participant_and_entry_info(soup)
        time = get_time(soup)
        participants_submission_time += time
        if "entry-1" in participants_entry_ids or len(time) < 36:
            break

    # print(participants_user_ids)
    # print(participants_entry_ids)
    # print(participants_entry_image_urls)

    page_number += 1

1
https://99designs.hk/logo-design/contests/exotic-rides-ultimate-logo-contest-48555/entries?filter=active&page=1
2
https://99designs.hk/logo-design/contests/exotic-rides-ultimate-logo-contest-48555/entries?filter=active&page=2


In [25]:
participants_submission_time[:5]

['2010-07-17T03:58:13+00:00',
 '2010-07-12T10:35:05+00:00',
 '2010-07-17T07:29:38+00:00',
 '2010-07-17T04:02:15+00:00',
 '2010-07-17T03:36:56+00:00']

In [26]:
entries = []
for entry_id, participant_id, time, url in zip(participants_entry_ids, 
                                                participants_user_ids, 
                                               participants_submission_time, 
                                               participants_entry_image_urls):
    entries.append({'entry_id': entry_id, 'participant_id': participant_id, 'time': time, 'url':url})


In [27]:
entries[:5]

[{'entry_id': 'entry-286',
  'participant_id': '380660',
  'time': '2010-07-17T03:58:13+00:00',
  'url': '/logo-design/contests/exotic-rides-ultimate-logo-contest-48555/entries/286'},
 {'entry_id': 'entry-82',
  'participant_id': '380660',
  'time': '2010-07-12T10:35:05+00:00',
  'url': '/logo-design/contests/exotic-rides-ultimate-logo-contest-48555/entries/82'},
 {'entry_id': 'entry-297',
  'participant_id': '439574',
  'time': '2010-07-17T07:29:38+00:00',
  'url': '/logo-design/contests/exotic-rides-ultimate-logo-contest-48555/entries/297'},
 {'entry_id': 'entry-287',
  'participant_id': '252077',
  'time': '2010-07-17T04:02:15+00:00',
  'url': '/logo-design/contests/exotic-rides-ultimate-logo-contest-48555/entries/287'},
 {'entry_id': 'entry-284',
  'participant_id': '252077',
  'time': '2010-07-17T03:36:56+00:00',
  'url': '/logo-design/contests/exotic-rides-ultimate-logo-contest-48555/entries/284'}]

In [28]:
entries[-5:]

[{'entry_id': 'entry-329',
  'participant_id': '399217',
  'time': '2010-07-18T07:11:02+00:00',
  'url': '/logo-design/contests/exotic-rides-ultimate-logo-contest-48555/entries/329'},
 {'entry_id': 'entry-185',
  'participant_id': '380660',
  'time': '2010-07-13T19:55:29+00:00',
  'url': '/logo-design/contests/exotic-rides-ultimate-logo-contest-48555/entries/185'},
 {'entry_id': 'entry-77',
  'participant_id': '424912',
  'time': '2010-07-12T08:34:01+00:00',
  'url': '/logo-design/contests/exotic-rides-ultimate-logo-contest-48555/entries/77'},
 {'entry_id': 'entry-18',
  'participant_id': '303477',
  'time': '2010-07-11T16:40:55+00:00',
  'url': '/logo-design/contests/exotic-rides-ultimate-logo-contest-48555/entries/18'},
 {'entry_id': 'entry-1',
  'participant_id': '304907',
  'time': '2010-07-11T11:11:28+00:00',
  'url': '/logo-design/contests/exotic-rides-ultimate-logo-contest-48555/entries/1'}]

## 3. get participants and entry info for deleted entries

In [29]:
def get_participant_and_entry_info_deleted_page(soup):

    user_ids = []
    entry_ids = []
    status = []
    deleted_count = 0
    withdrawn_count = 0
    declined_count = 0

    entry_matrix = soup.findAll(name='div', attrs='entry-matrix__item matrix__item')
    
    for entry in entry_matrix:
        result = entry.find(name='div', attrs='entry')
    # print(result)
        try:
            user_ids.append(result["data-user-id"])
            entry_ids.append(result["id"])
            s = ''
            for tag in entry.findAll('div', "entry-status-overlay"):
                attributes = tag.attrs
                if attributes['data-entry-status'] == 'deleted' and 'data-hidden' not in attributes:
                    s = 'deleted'
                    deleted_count += 1
                    break
                if attributes['data-entry-status'] == 'withdrawn' and 'data-hidden' not in attributes:
                    s = 'withdrawn'
                    withdrawn_count += 1
                    break
                if attributes['data-entry-status'] == 'declined' and 'data-hidden' not in attributes:
                    s = 'declined' 
                    declined_count += 1
                    break
            
            status.append(s)

        except:
            pass

    return user_ids, entry_ids, status, deleted_count, withdrawn_count, declined_count

In [30]:
deleted_participants_user_ids = []
deleted_participants_entry_ids = []
status_all = []
total_deleted_count = 0
total_withdrawn_count = 0
total_declined_count = 0

page_number = 1
winner_number = len(winner_entry_ids)


while True:
    print(page_number)
    if page_number == 1:
        soup = make_soup(competition_url, page_number, active=False)
        winner_entry_ids = get_winner_info(soup)
        user_ids, entry_ids, status, deleted_count, withdrawn_count, declined_count = get_participant_and_entry_info_deleted_page(soup)
        deleted_participants_user_ids += user_ids
        deleted_participants_entry_ids += entry_ids
        status_all += status
        total_deleted_count += deleted_count
        total_withdrawn_count += withdrawn_count
        total_declined_count += declined_count
        if "entry-1" in entry_ids or len(user_ids) < 36:
            break
    else:
        soup = make_soup(competition_url, page_number, active=False)
        user_ids, entry_ids, status, deleted_count, withdrawn_count, declined_count = get_participant_and_entry_info_deleted_page(soup)
        deleted_participants_user_ids += user_ids
        deleted_participants_entry_ids += entry_ids
        status_all += status
        total_deleted_count += deleted_count
        total_withdrawn_count += withdrawn_count
        total_declined_count += declined_count
        if "entry-1" in entry_ids or len(user_ids) < 36:
            break

    page_number += 1

1
https://99designs.hk/logo-design/contests/exotic-rides-ultimate-logo-contest-48555/entries?filter=non_active&page=1
2
https://99designs.hk/logo-design/contests/exotic-rides-ultimate-logo-contest-48555/entries?filter=non_active&page=2
3
https://99designs.hk/logo-design/contests/exotic-rides-ultimate-logo-contest-48555/entries?filter=non_active&page=3
4
https://99designs.hk/logo-design/contests/exotic-rides-ultimate-logo-contest-48555/entries?filter=non_active&page=4
5
https://99designs.hk/logo-design/contests/exotic-rides-ultimate-logo-contest-48555/entries?filter=non_active&page=5
6
https://99designs.hk/logo-design/contests/exotic-rides-ultimate-logo-contest-48555/entries?filter=non_active&page=6
7
https://99designs.hk/logo-design/contests/exotic-rides-ultimate-logo-contest-48555/entries?filter=non_active&page=7
8
https://99designs.hk/logo-design/contests/exotic-rides-ultimate-logo-contest-48555/entries?filter=non_active&page=8
9
https://99designs.hk/logo-design/contests/exotic-rides

In [31]:
print(deleted_participants_user_ids)
print(deleted_participants_entry_ids)

['271748', '271748', '400717', '400717', '400717', '385721', '416828', '416828', '271748', '385721', '271748', '414952', '400717', '416828', '414952', '400717', '271748', '271748', '271748', '414952', '414952', '414952', '416828', '416828', '416828', '416828', '416828', '416828', '228439', '416828', '416828', '271748', '271748', '264209', '424561', '252077', '400717', '385721', '385721', '385721', '385721', '436438', '400717', '271748', '414952', '252077', '414952', '436438', '400717', '400717', '437154', '252077', '252077', '400717', '393993', '252077', '252077', '252077', '400717', '400717', '400717', '282258', '437154', '393993', '282258', '400717', '400717', '416828', '416828', '416828', '416828', '271748', '304907', '402479', '424561', '437154', '414952', '437154', '252077', '414952', '437154', '264925', '271748', '424561', '424561', '424561', '414952', '414952', '414952', '307703', '324234', '324234', '424561', '424561', '324234', '424561', '193802', '424561', '367939', '435981',

In [32]:
deleted_entries = []
for entry_id, participant_id, status in zip(deleted_participants_entry_ids, 
                                    deleted_participants_user_ids, status_all):
    deleted_entries.append({'entry_id': entry_id, 'participant_id': participant_id, 'status': status})

## output to file

In [33]:
df_competition_description = pd.DataFrame({'title': [title], 'url': competition_url, 'seeker': seeker, 'summary': [summary], 
                                            'company_name': [company_name], 
                                            'vision': [vision],  'start_time': start_time, 
                                            'entry_count': entry_count, 'deleted_entry_count': deleted_entry_count,
                                            'deleted': total_deleted_count, 'withdrawn': total_withdrawn_count, 
                                            'declined': total_declined_count, 
                                            'prize': prize, 'entries': [entries],
                                            'deleted_entries': [deleted_entries], 'winners': [winner_entry_ids]})

In [34]:
competition_name = competition_url.split("/")[-1]
df_competition_description.to_csv('data_20220301/'+competition_name+'.csv', index=False)