# Mise à jour de la BDD de NFT
Source: [binance.com](https://www.binance.com/en/nft/market?currency=&mediaType=&tradeType=&amountFrom=&amountTo=&categorys=&keyword=&page=1&rows=16&productIds=&order=list_time%40-1)

In [1]:
import os
import time
import requests
import numpy as np
from utils import *
from tqdm import tqdm
from selenium import webdriver
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities



## 1. Ouverture du marketplace

- Paramétrage du navigateur distant
- Chargement de la page d'accueil
- Activation des cookies 

In [3]:
%%time
start_url = 'https://www.binance.com/en/nft/market?currency=&mediaType=&tradeType=&amountFrom=&amountTo=&categorys=&keyword=&page=1&rows=16&productIds=&order=list_time%40-1'
options = webdriver.ChromeOptions()
options.add_argument('--headless')
browser = webdriver.Remote("http://selenium:4444/wd/hub", options=options)
browser.get(start_url)
cookies = browser.find_element_by_xpath("//button[contains(text(),'Accept')]")
cookies.click()

WebDriverException: Message: unknown error: session deleted because of page crash
from unknown error: cannot determine loading status
from tab crashed
  (Session info: headless chrome=94.0.4606.61)


## 2. Défilement des pages sur le marketplace
 - Choix du nombre de nft à récupérer
 - Chargement des pages contenant les nfts à récuperer 

In [None]:
%%time

MAX_NFT_NB = 10#select_number()
CURRENT_PAGE_NUMBER = int(browser.current_url.split("page=")[-1].split("&")[0])
NFT_NUMBER_PER_PAGE = int(browser.current_url.split("rows=")[-1].split("&")[0])
if MAX_NFT_NB/NFT_NUMBER_PER_PAGE == MAX_NFT_NB//NFT_NUMBER_PER_PAGE:
    scroll_down = np.arange((MAX_NFT_NB//NFT_NUMBER_PER_PAGE)-1)
else:
    scroll_down = np.arange(MAX_NFT_NB//NFT_NUMBER_PER_PAGE)
for i in tqdm(scroll_down, postfix=' Scrolling down on the main page'):
    while CURRENT_PAGE_NUMBER == (browser.current_url.split("page=")[-1]).split("&")[0]:
        browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    CURRENT_PAGE_NUMBER = (browser.current_url.split("page=")[-1]).split("&")[0]

## 3. Récupération des liens des pages détaillées
- Détection des boutons donnant accès aux infos détaillées

In [None]:
%%time

detailed_pages = []
pbar = tqdm(total = MAX_NFT_NB, postfix=" Searching of NFT detailed pages")
BAR_LEVEL = 0
while len(detailed_pages) < MAX_NFT_NB:
  browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
  detailed_pages = browser.find_elements_by_xpath('//button[contains(text(),"BSC")]')
  pbar.update(max(0,len(detailed_pages)-BAR_LEVEL))
  BAR_LEVEL = len(detailed_pages)
pbar.close()

## 4. Parcours des pages détaillés
- Récupération des données utiles sur les nft
- Nettoyage des chaînes de caractères
- Stockage des dictionnaires de données
- Fermeture du navigateur distant

In [None]:
%%time
success = 0
failure = 0
total = len(detailed_pages)

for page in tqdm(detailed_pages, postfix=" Scraping of detailed NFT pages"):
    page.click()
    browser.switch_to.window(browser.window_handles[-1])
    res= parse_nft(browser)
    if res.status_code == 201:
        success += 1
    else:
        failure += 1
    browser.close()
    browser.switch_to.window(browser.window_handles[-1])

print(f"Success={success}/{total}; Failure={failure}/{total} - {res.status_code}")
print(f"NFT collection shape: {requests.get('http://api:8000/nfts/').text.count('_id')}")
browser.quit()

## 5. Automatisation du scraping de nfts
- Lancement du processus toutes les 5h

In [4]:
#%%writefile nft.py

from utils import *

WAIT = 2*60 # 5h = 18000s
MAX_NFT_NB = 16
NEXT_START = datetime.now()+timedelta(seconds=WAIT)
start_url = 'https://www.binance.com/en/nft/market?currency=&mediaType=&tradeType=&amountFrom=&amountTo=&categorys=&keyword=&page=1&rows=16&productIds=&order=list_time%40-1'
browser= None

while True:
    browser = open_binance(start_url, browser) # ouverture du marketplace
    scroll_down(browser, MAX_NFT_NB) # scroll down sur marketplace
    detailed_pages = find_detailed_nft_pages(browser, MAX_NFT_NB) # recherche de pages détaillées
    parse_detailed_nft_pages(browser, detailed_pages) # scraping des pages détaillées
    NEXT_START = wait_next_scraping(NEXT_START, WAIT) # attente avant prochain scraping 

2021-10-11 13:06:51: Scraping of detailed NFT pages: 100%|██████████| 16/16 [00:58<00:00,  3.63s/it]


2021-10-11 13:07:49: Storage on database (Success=16/16; Failure=0/16)


2021-10-11 13:07:49: Waiting for next scraping: 100%|██████████| 56/56 [00:56<00:00,  1.00s/it]


2021-10-11 13:08:45: New scraping initiated
2021-10-11 13:08:47: Binance page correctly opened


2021-10-11 13:08:47: Scrolling down on the main page: 0it [00:00, ?it/s]
2021-10-11 13:08:47: Searching of NFT detailed pages: 100%|██████████| 16/16 [00:00<00:00, 17.10it/s]
2021-10-11 13:08:48: Scraping of detailed NFT pages: 100%|██████████| 16/16 [00:43<00:00,  2.74s/it]


2021-10-11 13:09:32: Storage on database (Success=16/16; Failure=0/16)


2021-10-11 13:09:32: Waiting for next scraping:  58%|█████▊    | 43/74 [00:43<00:31,  1.01s/it]


KeyboardInterrupt: 

## 6. Automatisation du scraping de cryptos
- Lancement du processus toutes les 15 minutes

In [None]:
#%%writefile crypto.py

from utils import *

#WAIT = 3*60 # in seconds
WAIT = 1 # 30s
NEXT_START = datetime.now()+timedelta(seconds=WAIT)
start_url = 'https://www.binance.com/fr/markets'
browser = None

while True:
    browser = open_binance(start_url, browser, handle_cookies=False) # ouverture de la page principale des cryptos
    nb_pages = find_crypto_pages(browser) # recherche de pages additionnelles
    parse_crypto_pages(browser, nb_pages) # scraping des pages de cryptos
    NEXT_START = wait_next_scraping(NEXT_START, WAIT) # attente avant prochain scraping

2021-10-11 13:10:18: New scraping initiated
2021-10-11 13:10:18: Selenium configuration
2021-10-11 13:10:29: Binance page correctly opened
2021-10-11 13:10:29: Searching of crypto pages


2021-10-11 13:10:31: Scraping of crypto pages: 100%|██████████| 19/19 [00:16<00:00,  1.12it/s]


2021-10-11 13:11:02: Storage on database (Success=368/368; Failure=0/368)


2021-10-11 13:11:02: Waiting for next scraping: 0it [00:00, ?it/s]

2021-10-11 13:11:02: New scraping initiated





2021-10-11 13:11:06: Binance page correctly opened
2021-10-11 13:11:06: Searching of crypto pages


2021-10-11 13:11:07: Scraping of crypto pages: 100%|██████████| 19/19 [00:23<00:00,  1.21s/it]


2021-10-11 13:11:44: Storage on database (Success=368/368; Failure=0/368)


2021-10-11 13:11:44: Waiting for next scraping: 0it [00:00, ?it/s]

2021-10-11 13:11:44: New scraping initiated





2021-10-11 13:11:48: Binance page correctly opened
2021-10-11 13:11:48: Searching of crypto pages


2021-10-11 13:11:50: Scraping of crypto pages: 100%|██████████| 19/19 [00:26<00:00,  1.38s/it]


2021-10-11 13:12:31: Storage on database (Success=368/368; Failure=0/368)


2021-10-11 13:12:31: Waiting for next scraping: 0it [00:00, ?it/s]

2021-10-11 13:12:31: New scraping initiated





2021-10-11 13:12:35: Binance page correctly opened
2021-10-11 13:12:35: Searching of crypto pages


2021-10-11 13:12:36: Scraping of crypto pages: 100%|██████████| 19/19 [00:18<00:00,  1.01it/s]


2021-10-11 13:13:09: Storage on database (Success=368/368; Failure=0/368)


2021-10-11 13:13:09: Waiting for next scraping: 0it [00:00, ?it/s]

2021-10-11 13:13:09: New scraping initiated





2021-10-11 13:13:13: Binance page correctly opened
2021-10-11 13:13:13: Searching of crypto pages


2021-10-11 13:13:14: Scraping of crypto pages: 100%|██████████| 19/19 [00:19<00:00,  1.04s/it]


2021-10-11 13:13:47: Storage on database (Success=368/368; Failure=0/368)


2021-10-11 13:13:47: Waiting for next scraping: 0it [00:00, ?it/s]

2021-10-11 13:13:47: New scraping initiated





2021-10-11 13:13:51: Binance page correctly opened
2021-10-11 13:13:51: Searching of crypto pages


2021-10-11 13:13:53: Scraping of crypto pages: 100%|██████████| 19/19 [00:19<00:00,  1.03s/it]


2021-10-11 13:14:28: Storage on database (Success=368/368; Failure=0/368)


2021-10-11 13:14:28: Waiting for next scraping: 0it [00:00, ?it/s]

2021-10-11 13:14:28: New scraping initiated





2021-10-11 13:14:40: Binance page correctly opened
2021-10-11 13:14:40: Searching of crypto pages


2021-10-11 13:14:43: Scraping of crypto pages: 100%|██████████| 19/19 [00:19<00:00,  1.05s/it]


2021-10-11 13:15:18: Storage on database (Success=368/368; Failure=0/368)


2021-10-11 13:15:18: Waiting for next scraping: 0it [00:00, ?it/s]

2021-10-11 13:15:18: New scraping initiated





2021-10-11 13:15:22: Binance page correctly opened
2021-10-11 13:15:22: Searching of crypto pages


2021-10-11 13:15:23: Scraping of crypto pages: 100%|██████████| 19/19 [00:20<00:00,  1.07s/it]


2021-10-11 13:15:57: Storage on database (Success=368/368; Failure=0/368)


2021-10-11 13:15:57: Waiting for next scraping: 0it [00:00, ?it/s]

2021-10-11 13:15:57: New scraping initiated





2021-10-11 13:16:01: Binance page correctly opened
2021-10-11 13:16:01: Searching of crypto pages


2021-10-11 13:16:03: Scraping of crypto pages: 100%|██████████| 19/19 [00:28<00:00,  1.49s/it]


2021-10-11 13:16:45: Storage on database (Success=368/368; Failure=0/368)


2021-10-11 13:16:45: Waiting for next scraping: 0it [00:00, ?it/s]

2021-10-11 13:16:45: New scraping initiated





2021-10-11 13:16:50: Binance page correctly opened
2021-10-11 13:16:50: Searching of crypto pages


2021-10-11 13:16:52: Scraping of crypto pages: 100%|██████████| 19/19 [00:44<00:00,  2.35s/it]


2021-10-11 13:17:51: Storage on database (Success=368/368; Failure=0/368)


2021-10-11 13:17:51: Waiting for next scraping: 0it [00:00, ?it/s]

2021-10-11 13:17:51: New scraping initiated





2021-10-11 13:17:55: Binance page correctly opened
2021-10-11 13:17:55: Searching of crypto pages


2021-10-11 13:17:58: Scraping of crypto pages: 100%|██████████| 19/19 [00:22<00:00,  1.20s/it]


2021-10-11 13:18:43: Storage on database (Success=368/368; Failure=0/368)


2021-10-11 13:18:43: Waiting for next scraping: 0it [00:00, ?it/s]

2021-10-11 13:18:43: New scraping initiated





2021-10-11 13:18:59: Binance page correctly opened
2021-10-11 13:18:59: Searching of crypto pages


2021-10-11 13:19:03: Scraping of crypto pages: 100%|██████████| 19/19 [00:23<00:00,  1.21s/it]


2021-10-11 13:19:52: Storage on database (Success=368/368; Failure=0/368)


2021-10-11 13:19:52: Waiting for next scraping: 0it [00:00, ?it/s]

2021-10-11 13:19:52: New scraping initiated





2021-10-11 13:20:07: Binance page correctly opened
2021-10-11 13:20:07: Searching of crypto pages


2021-10-11 13:20:09: Scraping of crypto pages: 100%|██████████| 19/19 [00:21<00:00,  1.11s/it]


2021-10-11 13:20:46: Storage on database (Success=368/368; Failure=0/368)


2021-10-11 13:20:46: Waiting for next scraping: 0it [00:00, ?it/s]

2021-10-11 13:20:46: New scraping initiated





2021-10-11 13:20:52: Binance page correctly opened
2021-10-11 13:20:52: Searching of crypto pages


2021-10-11 13:20:56: Scraping of crypto pages: 100%|██████████| 19/19 [00:38<00:00,  2.05s/it]


2021-10-11 13:21:52: Storage on database (Success=368/368; Failure=0/368)


2021-10-11 13:21:52: Waiting for next scraping: 0it [00:00, ?it/s]

2021-10-11 13:21:52: New scraping initiated





2021-10-11 13:21:57: Binance page correctly opened
2021-10-11 13:21:57: Searching of crypto pages


2021-10-11 13:21:58: Scraping of crypto pages: 100%|██████████| 19/19 [00:27<00:00,  1.45s/it]


2021-10-11 13:22:54: Storage on database (Success=368/368; Failure=0/368)


2021-10-11 13:22:54: Waiting for next scraping: 0it [00:00, ?it/s]

2021-10-11 13:22:54: New scraping initiated





2021-10-11 13:23:07: Binance page correctly opened
2021-10-11 13:23:07: Searching of crypto pages


2021-10-11 13:23:10: Scraping of crypto pages: 100%|██████████| 19/19 [00:28<00:00,  1.52s/it]


2021-10-11 13:23:59: Storage on database (Success=368/368; Failure=0/368)


2021-10-11 13:23:59: Waiting for next scraping: 0it [00:00, ?it/s]


2021-10-11 13:24:00: New scraping initiated
2021-10-11 13:24:08: Binance page correctly opened
2021-10-11 13:24:08: Searching of crypto pages


2021-10-11 13:24:11: Scraping of crypto pages: 100%|██████████| 19/19 [00:34<00:00,  1.79s/it]


2021-10-11 13:25:03: Storage on database (Success=368/368; Failure=0/368)


2021-10-11 13:25:03: Waiting for next scraping: 0it [00:00, ?it/s]

2021-10-11 13:25:03: New scraping initiated





2021-10-11 13:25:09: Binance page correctly opened
2021-10-11 13:25:09: Searching of crypto pages


2021-10-11 13:25:13: Scraping of crypto pages: 100%|██████████| 19/19 [00:23<00:00,  1.24s/it]


2021-10-11 13:25:59: Storage on database (Success=368/368; Failure=0/368)


2021-10-11 13:25:59: Waiting for next scraping: 0it [00:00, ?it/s]

2021-10-11 13:25:59: New scraping initiated





2021-10-11 13:26:03: Binance page correctly opened
2021-10-11 13:26:03: Searching of crypto pages


2021-10-11 13:26:05: Scraping of crypto pages: 100%|██████████| 19/19 [00:23<00:00,  1.24s/it]


2021-10-11 13:27:15: Storage on database (Success=368/368; Failure=0/368)


2021-10-11 13:27:15: Waiting for next scraping: 0it [00:00, ?it/s]

2021-10-11 13:27:15: New scraping initiated





2021-10-11 13:27:21: Binance page correctly opened
2021-10-11 13:27:21: Searching of crypto pages


2021-10-11 13:27:23: Scraping of crypto pages: 100%|██████████| 19/19 [00:31<00:00,  1.64s/it]


2021-10-11 13:28:18: Storage on database (Success=368/368; Failure=0/368)


2021-10-11 13:28:18: Waiting for next scraping: 0it [00:00, ?it/s]

2021-10-11 13:28:18: New scraping initiated





2021-10-11 13:28:23: Binance page correctly opened
2021-10-11 13:28:23: Searching of crypto pages


2021-10-11 13:28:25: Scraping of crypto pages:  11%|█         | 2/19 [00:02<00:26,  1.57s/it]