# Web Scraping

## Scraping Cryptocurrency Prices on 'coingecko.com'
## Data Points 
- Name
- Price
- 1h Change
- 24h Change
- 7Day Change
- 24h Volume
- Market Cap

### Importing libraries

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import openpyxl

In [2]:
website = 'https://www.coingecko.com/en'

### Request

In [3]:
response = requests.get(website)
response.status_code

200

### Creating Soup object

In [4]:
bs = BeautifulSoup(response.content,'html.parser')

In [5]:
bs.h1

<h1 class="tw-text-xl tw-mb-2 tw-mr-2 tw-pb-0 tw-mb-0 tw-text-gray-700 tw-font-bold dark:tw-text-white md:tw-text-2xl">
Cryptocurrency Prices by Market Cap
</h1>

### Creating List to store details

In [6]:
results = bs.find('table',{'class' : 'table-scrollable'}).find('tbody').find_all('tr')

In [7]:
len(results)

100

### There are only 100 results displayed on the webpage

### Targeting the Data points

### Name

In [8]:
results[0].find('a', {'class' : 'tw-hidden lg:tw-flex font-bold tw-items-center tw-justify-between'}).get_text().strip()

'Bitcoin'

### Price

In [9]:
results[0].find('td', {'class' : 'td-price'}).get_text().strip()

'$66,824'

### 1h Change

In [10]:
results[0].find('td', {'class' : 'td-change1h'}).get_text().strip()

'0.2%'

### 24h Change

In [11]:
results[0].find('td', {'class' : 'td-change24h'}).get_text().strip()

'5.1%'

### 7day Change

In [12]:
results[0].find('td', {'class' : 'td-change7d'}).get_text().strip()

'18.6%'

### 24h Volume

In [13]:
results[0].find('td', {'class' : 'td-liquidity_score'}).get_text().strip()

'$44,249,303,564'

### Market Cap

In [14]:
results[0].find('td', {'class' : 'td-market_cap'}).get_text().strip()

'$1,260,863,299,967'

### Getting all the data

In [15]:
name = []
price = []
_1h_change = []
_24h_change = []
_7d_change = []
_24h_volume = []
market_cap = []

for result in results:

    #name
    try:
        name.append(result.find('a', {'class' : 'tw-hidden lg:tw-flex font-bold tw-items-center tw-justify-between'}).get_text().strip())
    except:
        name.append('n/a')

    #price
    try:
        price.append(result.find('td', {'class' : 'td-price'}).get_text().strip())
    except:
        price.append('n/a')

    #_1h_change
    try:
        _1h_change.append(result.find('td', {'class' : 'td-change1h'}).get_text().strip())
    except:
        _1h_change.append('n/a')

    #_24h_change
    try:
        _24h_change.append(result.find('td', {'class' : 'td-change24h'}).get_text().strip())
    except:
        _24h_change.append('n/a')

    #_7d_change
    try:
        _7d_change.append(result.find('td', {'class' : 'td-change7d'}).get_text().strip())
    except:
        _7d_change.append('n/a')

    #_24h_volume
    try:
        _24h_volume.append(result.find('td', {'class' : 'td-liquidity_score'}).get_text().strip())
    except:
        _24h_volume.append('n/a')

    #market_cap
    try:
        market_cap.append(result.find('td', {'class' : 'td-market_cap'}).get_text().strip())
    except:
        market_cap.append('n/a')


### Creating a dataframe

In [16]:
crypto = pd.DataFrame({'Name' : name, 'Price' : price, '1h_change' : _1h_change, '24h_change' : _24h_change,
                     '7d_change' : _7d_change, '24h_volume' : _24h_volume ,'Market Cap' : market_cap})

In [17]:
crypto


Unnamed: 0,Name,Price,1h_change,24h_change,7d_change,24h_volume,Market Cap
0,Bitcoin,"$66,824",0.2%,5.1%,18.6%,"$44,249,303,564","$1,260,863,299,967"
1,Ethereum,"$4,124.45",0.8%,8.0%,17.9%,"$21,086,101,020","$486,685,284,073"
2,Binance Coin,$505.20,0.6%,4.3%,14.7%,"$1,835,311,929","$85,047,351,861"
3,Cardano,$2.20,0.2%,3.8%,3.7%,"$1,996,472,511","$70,496,812,938"
4,Tether,$1.00,-0.1%,0.2%,0.2%,"$61,199,951,230","$70,245,586,861"
...,...,...,...,...,...,...,...
95,Zilliqa,$0.096695214664,0.6%,5.1%,5.2%,"$65,999,770","$1,222,220,674"
96,IOST,$0.052098239595,0.2%,4.4%,-3.6%,"$104,309,097","$1,189,838,924"
97,dYdX,$20.64,0.2%,3.4%,-7.8%,"$312,593,078","$1,173,364,912"
98,Mina Protocol,$4.34,0.7%,7.6%,3.5%,"$43,300,064","$1,172,065,952"


### Storing it in Excel

In [18]:
crypto.to_excel('single_crypto_page.xlsx', index = False)

## Pagination - For 1000 results

In [19]:
name = []
price = []
_1h_change = []
_24h_change = []
_7d_change = []
_24h_volume = []
market_cap = []

for i in range(1,11):

    #website
    website = 'https://www.coingecko.com/en?page=' + str(i)

    #response
    response = requests.get(website)

    #soup object
    bs = BeautifulSoup(response.content, 'html.parser')

    #result
    results = bs.find('table',{'class' : 'table-scrollable'}).find('tbody').find_all('tr')

    for result in results:

        #name
        try:
            name.append(result.find('a', {'class' : 'tw-hidden lg:tw-flex font-bold tw-items-center tw-justify-between'}).get_text().strip())
        except:
            name.append('n/a')

        #price
        try:
            price.append(result.find('td', {'class' : 'td-price'}).get_text().strip())
        except:
            price.append('n/a')

        #_1h_change
        try:
            _1h_change.append(result.find('td', {'class' : 'td-change1h'}).get_text().strip())
        except:
            _1h_change.append('n/a')

        #_24h_change
        try:
            _24h_change.append(result.find('td', {'class' : 'td-change24h'}).get_text().strip())
        except:
            _24h_change.append('n/a')

        #_7d_change
        try:
            _7d_change.append(result.find('td', {'class' : 'td-change7d'}).get_text().strip())
        except:
            _7d_change.append('n/a')

        #_24h_volume
        try:
            _24h_volume.append(result.find('td', {'class' : 'td-liquidity_score'}).get_text().strip())
        except:
            _24h_volume.append('n/a')

        #market_cap
        try:
            market_cap.append(result.find('td', {'class' : 'td-market_cap'}).get_text().strip())
        except:
            market_cap.append('n/a')



### Creating a DataFrame 

In [21]:

crypto = pd.DataFrame({'Name' : name, 'Price' : price, '1h_change' : _1h_change, '24h_change' : _24h_change,
                     '7d_change' : _7d_change, '24h_volume' : _24h_volume ,'Market Cap' : market_cap})

In [22]:
crypto

Unnamed: 0,Name,Price,1h_change,24h_change,7d_change,24h_volume,Market Cap
0,Bitcoin,"$66,824",0.2%,5.1%,18.6%,"$44,249,303,564","$1,260,863,299,967"
1,Ethereum,"$4,124.45",0.8%,8.0%,17.9%,"$21,086,101,020","$486,685,284,073"
2,Binance Coin,$505.20,0.6%,4.3%,14.7%,"$1,835,311,929","$85,047,351,861"
3,Cardano,$2.20,0.2%,3.8%,3.7%,"$1,996,472,511","$70,496,812,938"
4,Tether,$1.00,-0.1%,0.2%,0.2%,"$61,199,951,230","$70,245,586,861"
...,...,...,...,...,...,...,...
995,Bread,$0.193046,0.7%,-6.1%,18.2%,"$1,517,511","$16,487,359"
996,Nafter,$0.037196373110,2.1%,-5.4%,-18.0%,"$2,743,590","$16,461,997"
997,Quantum Resistant Ledger,$0.219044,2.3%,-21.4%,3.7%,"$287,847","$16,453,575"
998,GrimToken,$1.78,2.9%,18.5%,251.8%,"$651,463","$16,451,890"


### Checking the data

In [23]:
crypto.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Name        1000 non-null   object
 1   Price       1000 non-null   object
 2   1h_change   1000 non-null   object
 3   24h_change  1000 non-null   object
 4   7d_change   1000 non-null   object
 5   24h_volume  1000 non-null   object
 6   Market Cap  1000 non-null   object
dtypes: object(7)
memory usage: 54.8+ KB


### Storing it in Excel

In [24]:
crypto.to_excel('multiple_crypto_page.xlsx', index= False)