In [1]:
# pip install BeautifulSoup
# pip install bs4
# pip install requests

import requests
import pandas as pd
from bs4 import BeautifulSoup


In [2]:
url = "https://www.coingecko.com/"
response = requests.get(url)
response.status_code ## 200 means OK

200

In [3]:
# response.content ## returns the html content of the page as a string (bytes) object
## 'html.parser' is the parser we want to use to parse the html content
soup = BeautifulSoup(response.content, 'html.parser')
soup

<!DOCTYPE html>

<html lang="en">
<head>
<script src="/cdn-cgi/apps/head/gYtXOyllgyP3-Z2iKTP8rRWGBm4.js"></script><script async="" defer="" src="https://www.googleoptimize.com/optimize.js?id=GTM-W3CD992"></script>
<meta charset="utf-8"/>
<meta content="IE=edge" http-equiv="X-UA-Compatible"/>
<meta content="width=device-width, initial-scale=1.0" name="viewport"/>
<link href="/favicon.ico" rel="shortcut icon"/>
<link href="/OpensearchDescription.xml" rel="search" type="application/opensearchdescription+xml">
<title>Cryptocurrency Prices, Charts, and Crypto Market Cap | CoinGecko</title>
<meta content="View top cryptocurrency prices live, crypto charts, market cap, and trading volume. Discover today’s new and trending coins, top crypto gainers and losers in the market." name="description">
<script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start':
  new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],
  j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async

In [4]:
## find the target data
## find the first div with class coingecko-table

results = soup.find("div", {"class": "coingecko-table"}).find("tbody").find_all("tr") ## returns the first div with class coingecko-table


# Name
extracts Name from the website for the bitcoin row (`results[0]`)

In [5]:
results[0].find("span", {"class": "lg:tw-flex font-bold tw-items-center tw-justify-between"}).get_text().strip()

'Bitcoin'

# Price
extracts Price

In [6]:
results[0].find("span", {"class": "no-wrap"}).get_text().strip()

'$23,224.29'

In [7]:
results[0].find("td", {"class": "td-price price text-right"}).get_text().strip()

'$23,224.29'

# 24H Volume
extracts 24h volume from the website for the bitcoin row (`results[0]`)

In [8]:
# span class="no-wrap" does not work here

results[0].find("td", {"class": "td-liquidity_score lit text-right col-market"}).get_text().strip()

'$38,922,238,195'

# 24h 

In [9]:
# span class="no-wrap" does not work here

results[0].find("td", {"class": "td-change24h change24h stat-percent text-right col-market"}).get_text().strip()

'-1.7%'

# Market Cap

In [10]:
results[0].find("td", {"class": "td-market_cap cap col-market cap-price text-right"}).get_text().strip()

'$447,650,805,086'

In [11]:
names = []
price = []
volume_24h = []
mkt_cap = []

for result in results:
    try:
        names.append(result.find("span", {"class": "lg:tw-flex font-bold tw-items-center tw-justify-between"}).get_text().strip())
    except:
        names.append("n/a")
    
    try:
        price.append(result.find("span", {"class": "no-wrap"}).get_text().strip())
    except:
        names.append("n/a")
        
    try:
        volume_24h.append(result.find("td", {"class": "td-liquidity_score lit text-right col-market"}).get_text().strip())
    except:
        volume_24h.append("n/a")
        
    try:
        mkt_cap.append(result.find("td", {"class": "td-market_cap cap col-market cap-price text-right"}).get_text().strip())
    except:
        mkt_cap.append("n/a")
        
        
        
        

In [12]:
# create a dataframe from the lists
# dictionary {key:value} of lists 
df = pd.DataFrame({"Coin": names, "price": price, "24h_volume": volume_24h, "Market_Cap": mkt_cap})

In [13]:
df.head()

Unnamed: 0,Coin,price,24h_volume,Market_Cap
0,Bitcoin,"$23,224.29","$38,922,238,195","$447,650,805,086"
1,Ethereum,"$1,586.89","$11,287,318,949","$191,220,028,798"
2,Tether,$1.00,"$48,146,105,487","$67,881,996,031"
3,USD Coin,$1.00,"$3,181,744,673","$43,082,864,776"
4,BNB,$309.90,"$897,341,341","$41,786,580,216"


In [14]:
# data frame to csv
df.to_csv("crypto.csv", index=False)

In [15]:
# data frame to excel
df.to_excel("crypto.xlsx", index=False)

## Important Note: Until now we have taken the data from only 1 page of the website

# load the data from multiple pages

In [18]:
names = []
price = []
volume_24h = []
mkt_cap = []


for i in range(1, 4): ## 1 to 2 pages
    url = "https://www.coingecko.com/?page="+str(i)
    
    response = requests.get(url)
    response.status_code ## 200 means OK
    print(response.status_code)
    
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # names = []
    # price = []
    # volume_24h = []
    # mkt_cap = []

    for result in results:
        try:
            names.append(result.find("span", {"class": "lg:tw-flex font-bold tw-items-center tw-justify-between"}).get_text().strip())
        except:
            names.append("n/a")
        
        try:
            price.append(result.find("span", {"class": "no-wrap"}).get_text().strip())
        except:
            names.append("n/a")
            
        try:
            volume_24h.append(result.find("td", {"class": "td-liquidity_score lit text-right col-market"}).get_text().strip())
        except:
            volume_24h.append("n/a")
            
        try:
            mkt_cap.append(result.find("td", {"class": "td-market_cap cap col-market cap-price text-right"}).get_text().strip())
        except:
            mkt_cap.append("n/a")
        
    
    # create a dataframe from the lists
    # dictionary {key:value} of lists
    df = pd.DataFrame({"Coin": names, "price": price, "24h_volume": volume_24h, "Market_Cap": mkt_cap})
    
    df.head()
    
    # data frame to csv
    df.to_csv("crypto.csv", index=False)
    
    # data frame2 to excel
    df.to_excel("crypto.xlsx", index=False)

    

200
200
200
