# store information of a competition in one table: Competition

<table>
  <thead>
    <tr>
      <th>Datafield</th>
      <th>Description</th>
    </tr>
  <tbody>
    <tr>
      <td>id</td>
      <td>unique identifier for competition</td>
    </tr>
    <tr>
      <td>url</td>
      <td>competition webpage</td>
    </tr>
    <tr>
      <td>name</td>
      <td>competition name</td>
    </tr>
    <tr>
      <td>start_time</td>
      <td>start time of competition</td>
    </tr>
    <tr>
      <td>seeker</td>
      <td>competition host</td>
    </tr>
    <tr>
      <td>prize</td>
      <td>prize for winners</td>
    </tr>
    <tr>
      <td>entries</td>
      <td>{{entry_id: xx, user_id: xx, entry_url:xxx, time:xxx}, {entry_id: xx, user_id: xx, entry_url:xxx, time:xxx}, ...}</td>
    </tr>
    <tr>
      <td>deleted entries</td>
      <td>{{entry_id: xx, user_id: xx}, {entry_id: xx, user_id: xx}, ...}</td>
    </tr>
    <tr>
      <td>winners</td>
      <td>[entry_id, entry_id]</td>
    </tr>

  </tbody>
</table>


In [1]:
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd

In [2]:
# competition_url = "https://99designs.hk/logo-design/contests/logo-design-wanted-pure-water-technology-63431"
competition_url = "https://99designs.hk/logo-design/contests/logo-design-appointmentpost-63470"

## 1. get competition information

In [3]:
kv = {'user-agent': 'Mozilla/5.0'}
r = requests.get(competition_url, headers=kv, timeout=30)
r.raise_for_status()
r.status_code
r.encoding = r.apparent_encoding

In [4]:
soup = BeautifulSoup(r.text, 'html.parser')

In [5]:
## title
result = soup.find(name='h1', attrs="heading heading--h1 heading--no-margin")
if result:
    title = result.text
else:
    title = None

title

'Logo Design for AppointmentPost.com'

In [6]:
results = soup.find(name='div', attrs='inline-page')

In [7]:
headers = results.findAll("p", "heading heading--size4 heading--no-margin")
headers

[<p class="heading heading--size4 heading--no-margin">About us</p>,
 <p class="heading heading--size4 heading--no-margin">Logo types to explore</p>]

## 2. get brief page

In [8]:
from selenium import webdriver
from selenium.webdriver.common.by import By


In [9]:
PATH = "C:\Program Files (x86)\chromedriver.exe" 

In [10]:
driver = webdriver.Chrome(PATH)
driver.get(competition_url + '/brief')

  driver = webdriver.Chrome(PATH)


In [11]:
background_information = driver.find_element(By.XPATH, '//div[@id="section-backgroundInformation"]/div')
name_to_incorporate = background_information.find_element(By.XPATH, '//div[@id="element-backgroundInformation-logoBusinessName"]/div/div/div').text
slogan_to_incorporate = background_information.find_element(By.XPATH, '//div[@id="element-backgroundInformation-slogan"]/div/div/div/div').text
description = background_information.find_element(By.XPATH, '//div[@id="element-backgroundInformation-targetAudience"]/div/div/div').text
industry = background_information.find_element(By.XPATH, '//div[@id="element-backgroundInformation-industry"]/div/div/div').text



In [12]:
visual_style = driver.find_element(By.XPATH, '//div[@id="section-visualStyle"]/div')
logo_types = driver.find_element(By.XPATH, '/html/body/div[1]/div[1]/div[3]/div/div/div[2]/div/div[1]/div/div/div/div/div[2]/div/div/div[1]/div/div/div/div').text
logo_to_be_used = driver.find_element(By.XPATH, '/html/body/div[1]/div[1]/div[3]/div/div/div[2]/div/div[1]/div/div/div/div/div[2]/div/div/div[2]/div/div/div').text
colors_to_explore = visual_style.find_element(By.XPATH, '//*[@id="element-visualStyle-colorPreferences"]/div/div/div').text.split('\n')
other_color_requirements = visual_style.find_element(By.XPATH, '//div[@id="element-visualStyle-otherColorRequirements"]/div/div/div').text
style_attributes = []
for i in range(7):
    value = visual_style.find_element(By.XPATH, '//*[@id="element-visualStyle-styleAttributes"]/div/div/div/div/div[' + str(i+1) + ']/div[2]/div/div').get_attribute("aria-valuenow")
    style_attributes.append(value)

try: 
    design_inspiration = visual_style.find_element(By.XPATH, '//*[@id="element-visualStyle-designExamples"]/div/div/div/div/div')
    design_inspiration_count = len(design_inspiration.find_elements(By.CLASS_NAME, "matrix__item"))
except: 
    design_inspiration_count = 0



In [13]:
references = visual_style.find_element(By.XPATH, '//div[@id="section-references"]')
attachments = references.find_element(By.XPATH, '//*[@id="element-references-attachments"]/div/div/div').text
if attachments == 'No files':
    attachments_count = 0
else:
    attachments_count = len(references.find_elements(By.CLASS_NAME, "matrix__item"))
other_notes = references.find_element(By.XPATH, '//*[@id="element-references-notes"]/div/div/div').text

In [14]:
driver.close()

## 3. get participants and entry info

In [15]:
kv = {'user-agent': 'Mozilla/5.0'}
url = competition_url + "/entries"

r = requests.get(url, headers=kv, timeout=30)
r.raise_for_status()
r.status_code
r.encoding = r.apparent_encoding

In [16]:
soup = BeautifulSoup(r.text, 'html.parser')

In [17]:
## competition start date
text = str(soup.find(name='div', attrs="contest-header__price"))
start_time = re.search('"startDate": .+,', text).group(0).replace('"startDate": "', '')[:-2]
print(start_time)

Tue, 25 Jan 2011 16:46:09 +0000


### winner id and entry id

In [18]:
def make_soup(competition_url, page_number, active=True):

    kv = {'user-agent': 'Mozilla/5.0'}
    if active:
        url = competition_url + "/entries?filter=active&page=" + str(page_number)
    else: 
        url = competition_url + "/entries?filter=non_active&page=" + str(page_number)
    print(url)

    r = requests.get(url, headers=kv, timeout=30)
    r.raise_for_status()
    r.status_code
    r.encoding = r.apparent_encoding
    soup = BeautifulSoup(r.text, 'html.parser')
    return soup

In [19]:
def get_winner_info(soup):

    results = soup.findAll(name='div', attrs='entry-matrix__item matrix__item entry-winners')
        
    winner_entry_ids = []
    for result in results:
        winner_entry = result.find(name='div', attrs="entry entry--linked entry--zoom-linked")
        if winner_entry:
            winner_entry_ids.append(winner_entry["id"])
            continue
        winner_entry = result.find(name='div', attrs="entry") # case if winner entry is deleted
        if winner_entry:
            winner_entry_ids.append(winner_entry["id"])
            continue
    return winner_entry_ids


In [20]:
page_number = 1
soup = make_soup(competition_url, page_number)
winner_entry_ids = get_winner_info(soup) 
winner_entry_ids

https://99designs.hk/logo-design/contests/logo-design-appointmentpost-63470/entries?filter=active&page=1


['entry-44']

In [21]:
seeker = soup.find(name='span', attrs="display-name").text
seeker

'Dan.lausted'

In [22]:
entry_summary = soup.find(name='select', attrs="styled-select__select")
entry_summary = entry_summary.text.replace('\n', "")
entry_summary

'            All (12)                    Unrated (2)                    1–2 stars (4)                    3–5 stars (6)                    Declined and withdrawn (48)        '

In [23]:
entry_count = int(re.search('All \(\d+\)', entry_summary).group(0)[5: -1])
deleted_entry_count = re.search('Declined and withdrawn \(\d+\)', entry_summary).group(0)[24:-1]
print(entry_count, deleted_entry_count)

12 48


In [24]:
def get_participant_and_entry_info(soup):

    entry_matrix = soup.findAll(name='div', attrs='entry-matrix__item matrix__item')
    for entry in entry_matrix:
        result = entry.find(name='div', attrs='entry entry--linked entry--zoom-linked')
        try:
            participants_user_ids.append(result["data-user-id"])
            participants_entry_ids.append(result["id"])
            participants_entry_image_urls.append(result.find(name='a')["href"])
        except:
            pass

In [25]:
def get_time(soup):
    ## get time
    # text = str(soup.find(name='div', attrs='entry-pane__results').contents)
    text = str(soup.find(name='div', attrs='entry-pane__results'))
    time = re.findall('"timeCreatedString":".{20,30}"', text)
    if time:
        time = list(map(lambda x: x.replace('"timeCreatedString":"', '').replace('","', ""), time))
        return time

    time = re.findall('timeCreatedString\&quot\;\:&quot;.{20,30}\&quot\;', text)
    if time:
        time = list(map(lambda x: x[30: -6], time))
        return time

        
    

In [26]:
def get_prize(soup):
    ## get time
    text = str(soup.find(name='div', attrs='contest-header contest-header--with-breadcrumbs').contents)
    prize = re.search('"prizeMoney": ".{2,10}"', text).group(0)
    prize = prize.replace('"prizeMoney": "', "").replace('"', "")
    return prize


In [27]:
## get ratings


In [28]:
def get_ratings(competition_url, page_number, active=True):
    if active:
        url = competition_url + "/entries?filter=active&page=" + str(page_number)
    else: 
        url = competition_url + "/entries?filter=non_active&page=" + str(page_number)
    
    driver = webdriver.Chrome(PATH)
    driver.get(url)
    entry_information = driver.find_element(By.XPATH, '/html/body/div[1]/div[1]/div[3]/div/div/div[2]/div[5]/div[1]/div/div').get_attribute('data-design-collection')
    ratings = re.findall("\"rating\":\d|\"rating\":false", entry_information)
    ratings = list(map(lambda x: x.replace("\"rating\":", ""), ratings))
    driver.close()
    return ratings

In [29]:
participants_user_ids = []
participants_entry_ids = []
participants_entry_image_urls = []
participants_submission_time = []
ratings = []
page_number = 1



while True:
    print(page_number)
    if page_number == 1:
        soup = make_soup(competition_url, page_number)
        prize = get_prize(soup)
        winner_entry_ids = get_winner_info(soup)
        get_participant_and_entry_info(soup)
        rating = get_ratings(competition_url, page_number)
        ratings += rating
        time = get_time(soup)
        # print(time)
        participants_submission_time += time
        if len(rating) < 36:
            break
    else:
        soup = make_soup(competition_url, page_number)
        get_participant_and_entry_info(soup)
        # time = get_time(soup)
        # print(time)
        rating = get_ratings(competition_url, page_number)
        ratings += rating
        participants_submission_time += time
        if len(rating) < 36:
            break

    # print(participants_user_ids)
    # print(participants_entry_ids)
    # print(participants_entry_image_urls)

    page_number += 1

1
https://99designs.hk/logo-design/contests/logo-design-appointmentpost-63470/entries?filter=active&page=1


  driver = webdriver.Chrome(PATH)


In [30]:
len(participants_submission_time), participants_submission_time[:5]

(12,
 ['2011-01-31T01:21:02+00:00',
  '2011-01-31T00:54:05+00:00',
  '2011-01-28T18:26:42+00:00',
  '2011-01-28T18:29:00+00:00',
  '2011-01-28T16:01:20+00:00'])

In [31]:
entries = []
for entry_id, participant_id, time, url, rating in zip(participants_entry_ids, 
                                                participants_user_ids, 
                                               participants_submission_time, 
                                               participants_entry_image_urls,
                                               ratings):
    entries.append({'entry_id': entry_id, 'participant_id': participant_id, 'time': time, 'url':url, 'rating':rating})


In [32]:
entries[:5]

[{'entry_id': 'entry-45',
  'participant_id': '266667',
  'time': '2011-01-31T01:21:02+00:00',
  'url': '/logo-design/contests/logo-design-appointmentpost-63470/entries/45',
  'rating': '4'},
 {'entry_id': 'entry-44',
  'participant_id': '266667',
  'time': '2011-01-31T00:54:05+00:00',
  'url': '/logo-design/contests/logo-design-appointmentpost-63470/entries/44',
  'rating': '4'},
 {'entry_id': 'entry-35',
  'participant_id': '266667',
  'time': '2011-01-28T18:26:42+00:00',
  'url': '/logo-design/contests/logo-design-appointmentpost-63470/entries/35',
  'rating': '4'},
 {'entry_id': 'entry-36',
  'participant_id': '266667',
  'time': '2011-01-28T18:29:00+00:00',
  'url': '/logo-design/contests/logo-design-appointmentpost-63470/entries/36',
  'rating': '3'},
 {'entry_id': 'entry-34',
  'participant_id': '266667',
  'time': '2011-01-28T16:01:20+00:00',
  'url': '/logo-design/contests/logo-design-appointmentpost-63470/entries/34',
  'rating': '3'}]

In [33]:
entries[-5:]

[{'entry_id': 'entry-46',
  'participant_id': '482288',
  'time': '2011-01-31T04:10:24+00:00',
  'url': '/logo-design/contests/logo-design-appointmentpost-63470/entries/46',
  'rating': '2'},
 {'entry_id': 'entry-48',
  'participant_id': '289217',
  'time': '2011-01-31T13:54:26+00:00',
  'url': '/logo-design/contests/logo-design-appointmentpost-63470/entries/48',
  'rating': '1'},
 {'entry_id': 'entry-30',
  'participant_id': '266667',
  'time': '2011-01-28T14:10:50+00:00',
  'url': '/logo-design/contests/logo-design-appointmentpost-63470/entries/30',
  'rating': '1'},
 {'entry_id': 'entry-60',
  'participant_id': '511441',
  'time': '2011-02-01T03:26:36+00:00',
  'url': '/logo-design/contests/logo-design-appointmentpost-63470/entries/60',
  'rating': 'false'},
 {'entry_id': 'entry-50',
  'participant_id': '509620',
  'time': '2011-01-31T20:59:48+00:00',
  'url': '/logo-design/contests/logo-design-appointmentpost-63470/entries/50',
  'rating': 'false'}]

## 3. get participants and entry info for deleted entries

In [35]:
def get_time_and_status_for_deleted_entry(entry_id):
    entry_id = entry_id.split('-')[-1]
    entry_url = competition_url + '/entries/' + entry_id
    
    ## make soup for entry page
    kv = {'user-agent': 'Mozilla/5.0'}
    r = requests.get(entry_url, headers=kv, timeout=30)
    r.raise_for_status()
    r.status_code
    r.encoding = r.apparent_encoding
    soup = BeautifulSoup(r.text, 'html.parser')

    ## 
    script = str(soup.find('script', id="standalone-design-details-app-data"))
    time = re.search('"timeCreatedString":".{20,30}"', script).group(0)
    time = time.replace('"timeCreatedString":"', '').replace('","', "")

    status = 'deleted'
    if re.search('"status":"withdrawn"', script):
        status = 'withdrawn'
    elif re.search('"status":"eliminated"', script):
        status = 'declined'
    

    return time, status
    


In [36]:
def get_participant_and_entry_info_deleted_page(soup):

    user_ids = []
    entry_ids = []
    status = []
    submission_time = []
    ratings = []
    deleted_count = 0
    withdrawn_count = 0
    declined_count = 0

    entry_matrix = soup.findAll(name='div', attrs='entry-matrix__item matrix__item')
    
    for entry in entry_matrix:
        result = entry.find(name='div', attrs='entry')
    # print(result)
        try:
            user_ids.append(result["data-user-id"])
            entry_ids.append(result["id"])
            time, s = get_time_and_status_for_deleted_entry(result["id"])
            submission_time.append(time)
            status.append(s)

            if s == 'deleted':
                deleted_count += 1
            elif s == 'withdrawn':
                withdrawn_count += 1  
            elif s == 'declined':
                declined_count += 1
                

        except:
            pass

    return user_ids, entry_ids, status, submission_time, deleted_count, withdrawn_count, declined_count

In [37]:
deleted_participants_user_ids = []
deleted_participants_entry_ids = []
status_all = []
submission_time_all = []
total_deleted_count = 0
total_withdrawn_count = 0
total_declined_count = 0
ratings = []

page_number = 1
winner_number = len(winner_entry_ids)


while True:
    print(page_number)
    if page_number == 1:
        soup = make_soup(competition_url, page_number, active=False)
        winner_entry_ids = get_winner_info(soup)
        user_ids, entry_ids, status, submission_time, deleted_count, withdrawn_count, declined_count = get_participant_and_entry_info_deleted_page(soup)
        deleted_participants_user_ids += user_ids
        deleted_participants_entry_ids += entry_ids
        rating = get_ratings(competition_url, page_number, False)
        ratings += rating
        status_all += status
        submission_time_all += submission_time
        total_deleted_count += deleted_count
        total_withdrawn_count += withdrawn_count
        total_declined_count += declined_count
        if len(user_ids) < 36:
            break
    else:
        soup = make_soup(competition_url, page_number, active=False)
        user_ids, entry_ids, status, submission_time, deleted_count, withdrawn_count, declined_count = get_participant_and_entry_info_deleted_page(soup)
        deleted_participants_user_ids += user_ids
        deleted_participants_entry_ids += entry_ids
        rating = get_ratings(competition_url, page_number, False)
        ratings += rating
        status_all += status
        submission_time_all += submission_time
        total_deleted_count += deleted_count
        total_withdrawn_count += withdrawn_count
        total_declined_count += declined_count
        if len(user_ids) < 36:
            break

    page_number += 1

1
https://99designs.hk/logo-design/contests/logo-design-appointmentpost-63470/entries?filter=non_active&page=1


  driver = webdriver.Chrome(PATH)


2
https://99designs.hk/logo-design/contests/logo-design-appointmentpost-63470/entries?filter=non_active&page=2


In [38]:
print(deleted_participants_user_ids)
print(deleted_participants_entry_ids)

['300080', '396217', '332573', '300080', '300080', '332573', '332573', '332573', '384234', '491986', '410426', '410426', '410426', '509385', '332573', '332573', '332573', '332573', '332573', '367806', '367806', '367806', '367806', '332573', '497832', '332573', '332573', '332573', '495898', '495898', '495898', '495898', '495898', '495898', '495898', '509587', '332573', '501828', '501828', '501828', '501828', '501828', '495898', '495898', '495898', '495898', '495898', '509385']
['entry-32', 'entry-49', 'entry-40', 'entry-31', 'entry-29', 'entry-37', 'entry-28', 'entry-27', 'entry-26', 'entry-20', 'entry-19', 'entry-18', 'entry-17', 'entry-1', 'entry-59', 'entry-58', 'entry-57', 'entry-56', 'entry-55', 'entry-54', 'entry-53', 'entry-52', 'entry-51', 'entry-43', 'entry-42', 'entry-41', 'entry-39', 'entry-38', 'entry-25', 'entry-24', 'entry-23', 'entry-22', 'entry-21', 'entry-16', 'entry-15', 'entry-14', 'entry-13', 'entry-12', 'entry-11', 'entry-10', 'entry-9', 'entry-8', 'entry-7', 'entry

In [39]:
deleted_entries = []
for entry_id, participant_id, status, submission_time, rating in zip(deleted_participants_entry_ids, 
                                    deleted_participants_user_ids, status_all, submission_time_all, ratings):
    deleted_entries.append({'entry_id': entry_id, 'participant_id': participant_id, 'status': status, 'time': submission_time, 'rating':rating})

## output to file

In [40]:
df_competition_description = pd.DataFrame({'title': [title], 'url': competition_url, 'seeker': seeker, 'start_time': start_time,
                                            'name_to_incorporate': [name_to_incorporate], 'slogan_to_incorporate': [slogan_to_incorporate], 
                                            'description': [description], 'industry': [industry],  
                                            'logo_types': [logo_types],
                                            'logo_to_be_used': [logo_to_be_used],
                                            'colors_to_explore': [colors_to_explore], 
                                            'other_color_requirements': [other_color_requirements], 
                                            'style_attributes': [style_attributes], 
                                            'design_inspiration_count': [design_inspiration_count],
                                            'attachments_count': [attachments_count], 
                                            'other_notes': [other_notes],
                                            'entry_count': entry_count, 'deleted_entry_count': deleted_entry_count,
                                            'deleted': total_deleted_count, 'withdrawn': total_withdrawn_count, 
                                            'declined': total_declined_count, 
                                            'prize': prize, 'entries': [entries],
                                            'deleted_entries': [deleted_entries], 'winners': [winner_entry_ids]})

In [41]:
competition_name = competition_url.split("/")[-1]
df_competition_description.to_csv('data_20220301/' + competition_name+'.csv', index=False)