# Installation and Set-Up

In [1]:
import os

from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import *
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from fake_useragent import UserAgent
import pandas as pd
import datetime
import time

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

## Import and Install Chromedriver

Running this will help install the chromedriver to the local session

In [8]:
import chromedriver_autoinstaller
chromedriver_autoinstaller.install()

'/opt/anaconda3/envs/nft/lib/python3.8/site-packages/chromedriver_autoinstaller/99/chromedriver'

In [9]:
def web_driver(headless=False):
    option = webdriver.ChromeOptions()
    if(headless == True):
        option.add_argument('--headless')
    option.add_argument('start-maximized')
    option.add_argument('disable-infobars')
    option.add_argument('--disable-extensions')
    driver = webdriver.Chrome(options=option)
    return driver

Once we have defined the web_driver function, we will be able to run the Chromedriver

# Webscrapping

As an exercise, we will be scraping the Bored Ape Yacht Club

In [65]:
driver = web_driver()
# Put the URL of your desired website here
driver.get('https://opensea.io/collection/boredapeyachtclub')
time.sleep(5)

## Scrape throguh BoredApe NFTs 

In [84]:
# Print time started to track time duration of scraping
print(f'Time started: {datetime.datetime.now()}')

# Perform PAGE DOWN action to load NFTs
webdriver.ActionChains(driver).send_keys(Keys.PAGE_DOWN).perform()
record_link = set()

# Create empty lists to store data
series_df = []
nft_number_df = []
best_offer_df = []
likes_df = []
timestamp_df = []

error = 0

# Loop through 40 sections of NFTs
for i in range(40):
    print(f'Scraping Section {i+1}')
    time.sleep(10)
    collection_nft = driver.find_elements_by_class_name("Asset--anchor")
    for i in collection_nft:
        hyperlink = i.get_attribute("href")
        if  hyperlink not in record_link:
            # Sometimes, last panel info does not load despite href being there
            # Also, hyperlink is 'None'
            try:
                hyperlink_element = "/" + hyperlink.split('/', maxsplit=3)[-1]
                nft_grid_elements = driver.find_element_by_xpath("//a[@href='" + hyperlink_element + "']")
                series = nft_grid_elements.text.split('\n')[0]
                nft_number = int(nft_grid_elements.text.split('\n')[1])
                best_offer = nft_grid_elements.text.split('\n')[3]
                likes = nft_grid_elements.text.split('\n')[-1]
                timestamp = datetime.datetime.now()
                series_df.append(series)
                nft_number_df.append(nft_number)
                best_offer_df.append(best_offer)
                likes_df.append(likes)
                timestamp_df.append(timestamp)
                record_link.add(hyperlink)
            except:
                error += 1
                print("Error Count: " + str(error))
                
        else: pass
    time.sleep(2)
    webdriver.ActionChains(driver).send_keys(Keys.PAGE_DOWN).perform()
nft_df3 = pd.DataFrame(
        {'series': series_df,
         'nft_number': nft_number_df,
         'best_offer': best_offer_df,
         'likes': likes_df,
         'timestamp': timestamp_df
        })

print(f'Time finished: {datetime.datetime.now()}')
print(f'Number of nft scraped: {len(nft_df3)}')

Time started: 2022-03-26 13:37:59.755477
Scraping Section 1
Scraping Section 2
Scraping Section 3
Scraping Section 4
Scraping Section 5
Scraping Section 6
Scraping Section 7
Scraping Section 8
Scraping Section 9
Scraping Section 10
Scraping Section 11
Scraping Section 12
Scraping Section 13
Scraping Section 14
Scraping Section 15
Scraping Section 16
Scraping Section 17
Scraping Section 18
Scraping Section 19
Scraping Section 20
Scraping Section 21
Scraping Section 22
Scraping Section 23
Scraping Section 24
Scraping Section 25
Scraping Section 26
Scraping Section 27
Scraping Section 28
Scraping Section 29
Scraping Section 30
Scraping Section 31
Scraping Section 32
Scraping Section 33
Scraping Section 34
Scraping Section 35
Scraping Section 36
Scraping Section 37
Scraping Section 38
Scraping Section 39
Scraping Section 40
Error Count: 1
Error Count: 2
Time finished: 2022-03-26 13:46:53.788091
Number of nft scraped: 418


In [None]:
driver.quit()

In [87]:
nft_df3.sort_values("nft_number")

Unnamed: 0,series,nft_number,best_offer,likes,timestamp
15,Bored Ape Yacht Club,30,59.2,35,2022-03-26 13:38:11.712210
16,Bored Ape Yacht Club,31,97.01,48,2022-03-26 13:38:11.812955
17,Bored Ape Yacht Club,32,102.1111,75,2022-03-26 13:38:11.915217
18,Bored Ape Yacht Club,33,85.54,63,2022-03-26 13:38:12.017859
19,Bored Ape Yacht Club,34,92.01,50,2022-03-26 13:38:12.110783
...,...,...,...,...,...
416,Bored Ape Yacht Club,444,30,13,2022-03-26 13:46:51.646331
417,Bored Ape Yacht Club,445,129,343,2022-03-26 13:46:51.751778
410,Bored Ape Yacht Club,446,102.1111,72,2022-03-26 13:46:51.028621
411,Bored Ape Yacht Club,447,30,134,2022-03-26 13:46:51.135806


In [96]:
nft_df3.head(10)

Unnamed: 0,series,nft_number,best_offer,likes,timestamp
0,Bored Ape Yacht Club,35,335.0,50,2022-03-26 13:38:10.216100
1,Bored Ape Yacht Club,36,52.585,68,2022-03-26 13:38:10.335123
2,Bored Ape Yacht Club,37,59.2,65,2022-03-26 13:38:10.441677
3,Bored Ape Yacht Club,38,59.3,63,2022-03-26 13:38:10.549043
4,Bored Ape Yacht Club,39,52.585,658,2022-03-26 13:38:10.642763
5,Bored Ape Yacht Club,45,58.626,80,2022-03-26 13:38:10.738568
6,Bored Ape Yacht Club,46,59.2,17,2022-03-26 13:38:10.826591
7,Bored Ape Yacht Club,47,52.585,22,2022-03-26 13:38:10.902031
8,Bored Ape Yacht Club,48,52.558,26,2022-03-26 13:38:11.005019
9,Bored Ape Yacht Club,49,52.585,35,2022-03-26 13:38:11.110458


In [92]:
nft_df3.to_csv("boredape_df.csv")

# Reflection

For 40 iterations:

Number scraped = 418
Time spent: 8minutes 55seconds

46~ NFTs per minute

Assuming that there are 10,000 on opensea listing:
10000/46 = 217 minutes
~ under 4 hours

# What improvements can be made?
1. OpenSea has started to implement an anti-bot system to prevent web-scraping

2. Faster web crawling system

3. Try using other web crawlers other than selenium

4. Scraping features to also capture features of each NFT


