In [1]:
# load EDA Packages
import pandas as pd
import numpy as np

# load data vizualization packages
import seaborn as sb
import matplotlib.pyplot as plt
%matplotlib inline

# other libraries for visualization
import matplotlib.dates as mdates
import matplotlib.ticker as mtick
import plotly.express as px
import plotly.graph_objects as go
from plotly.offline import iplot
import plotly.io as pio


# load web scrapping packages
import requests
from bs4 import BeautifulSoup as soup
from urllib.request import urlopen
import os

# Getting the Dataset

In [2]:
# stating the url for web scrapping
my_url = 'https://cryptowat.ch/'

In [3]:
# using requests method to get the content of the url
html_data = requests.get(my_url).text

In [4]:
# passing the url content via beautiful soup parser
soup_object = soup(html_data, 'html.parser')

In [5]:
# checking for all the links in our url
for link in soup_object.find_all('a', href=True):       # In html anchor/link is represented by the tag <a>
    print(link.get('href'))

/
/account/login
/account/create
/charts
/assets
/nft-prices
/exchanges
/desktop
/mobile
/pricing
/markets
/correlations
/products/cryptocurrency-market-data-api
/converter


In [6]:
# using a specfic link which is the market link
my_url = 'https://api.cryptowat.ch/markets'

In [7]:
# getting the content of our url
html_data = requests.get(my_url).text

In [8]:
# passing the url content via beautiful soup parser
soup_obj = soup(html_data, 'html.parser')

In [9]:
# scrape the data by creating a fuction
def get_coin_price(symbol, exchange='binance', after='2018-09-01'):
    my_url = 'https://api.cryptowat.ch/markets/{exchange}/{symbol}usd/ohlc'.format(symbol=symbol, exchange=exchange)
    response = requests.get(my_url, params={
        'periods': '7200',
        'after': str(int(pd.Timestamp(after).timestamp()))
    })
    
    html_data = response.json()
    df = pd.DataFrame(html_data['result']['7200'], columns=[
        'CloseTime', 'OpenPrice', 'HighPrice', 'LowPrice', 'ClosePrice', 'volume', 'MarketCap'
    ])
    df['CloseTime'] = pd.to_datetime(df['CloseTime'], unit='s')
    df.set_index('CloseTime', inplace=True)
    return df

In [10]:
# pull data from crypto for btc and eth
btc = get_coin_price('btc', 'bitstamp')
eth = get_coin_price('eth', 'bitstamp') 


# pull others for comparison
ada = get_coin_price('ada', 'bitstamp')
xrp = get_coin_price('xrp', 'bitstamp')
usdt = get_coin_price('usdt', 'bitstamp')
matic = get_coin_price('matic', 'bitstamp')
usdc = get_coin_price('usdc', 'bitstamp')
shib = get_coin_price('shib', 'bitstamp')
ftm = get_coin_price('ftm', 'bitstamp')
uma = get_coin_price('uma', 'bitstamp')

In [11]:
# loading the btc data into a padas dataframe
btc

Unnamed: 0_level_0,OpenPrice,HighPrice,LowPrice,ClosePrice,volume,MarketCap
CloseTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-04-20 10:00:00,54461.85,55479.62,54284.33,54940.10,420.173397,2.311098e+07
2021-04-20 12:00:00,54940.10,56466.32,54884.22,56348.13,524.660874,2.932419e+07
2021-04-20 14:00:00,56356.72,56403.98,55661.65,56379.84,408.564245,2.290104e+07
2021-04-20 16:00:00,56406.02,56542.92,54700.00,55355.58,646.185067,3.587516e+07
2021-04-20 18:00:00,55384.65,56010.61,55155.76,55942.70,386.479547,2.148352e+07
...,...,...,...,...,...,...
2022-09-02 00:00:00,20095.00,20174.00,20061.00,20128.00,81.950392,1.647610e+06
2022-09-02 02:00:00,20133.00,20133.00,20016.00,20048.00,112.853307,2.264364e+06
2022-09-02 04:00:00,20048.00,20235.00,19957.00,20161.00,82.271167,1.653513e+06
2022-09-02 06:00:00,20158.00,20295.00,20075.00,20125.00,121.282143,2.449675e+06


In [12]:
# export to csv to make the index a column
btc.to_csv('btc.csv', index = True)
eth.to_csv('eth.csv', index = True)
ada.to_csv('ada.csv', index = True)
xrp.to_csv('xrp.csv', index = True)
usdt.to_csv('usdt.csv', index = True)
matic.to_csv('matic.csv', index = True)
usdc.to_csv('usdc.csv', index = True)
shib.to_csv('shib.csv', index = True)
ftm.to_csv('ftm.csv', index = True)
uma.to_csv('uma.csv', index = True)

I stopped here and continued with the Data Wrangling and Exploratory Data Analysis in the Part_2 file. This is so I don't keep running the above codes which will lead to continous scrapping of the website.