# Bitcoin price collection

In [3]:
import numpy as np
import pandas as pd
import yfinance as yf 
import plotly.graph_objs as plot
import datetime
import time
import os

PRICE_FOLDER    = "data/price/"
TWITTER_FOLDER  = "data/twitter/"
tweets_raw_file = './data/twitter/bitcoin_tweets_raw.csv'
tweets_clean_file = './data/twitter/bitcoin_tweets_clean.csv'
price_raw_file = './data/price/bitcoin_price_raw.csv'
price_clean_file = './data/price/bitcoin_price_clean.csv'


Retrieve Bitcoin hourly price from Yahoo Finance

- Base price data is the first retrieval to a dataframe
- Updated data retrieves new hourly data to the same dataframe
- Data is exported to .csv after each updated retrieval

In [8]:
df_price_raw = pd.read_csv(price_raw_file,low_memory=False)

df_price_clean = pd.read_csv(price_clean_file,low_memory=False)

## Base price data

In [11]:
# Retrieve hourly price data
price_data = yf.download(tickers='BTC-USD', period='24h', interval='1h')
# Rename df index
price_data.index.name = 'Time'
# Head data
price_data.head(5)

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-09-13 02:00:00+01:00,46042.578125,46072.230469,45174.433594,45178.304688,45178.304688,0
2021-09-13 03:00:00+01:00,45204.027344,45246.617188,44918.695312,44918.695312,44918.695312,469590016
2021-09-13 04:00:00+01:00,44962.632812,45116.09375,44863.550781,44863.550781,44863.550781,0
2021-09-13 05:00:00+01:00,44660.148438,44768.09375,44589.972656,44692.058594,44692.058594,186744832
2021-09-13 06:00:00+01:00,44717.527344,44959.621094,44458.875,44720.082031,44720.082031,1137813504


In [15]:
# Drop last row of df
price_data = price_data[:-1]

## To .csv

In [None]:
# price_data to .csv
# Import same .csv to update

## Updated data

In [4]:
# Retrieve hourly price data
price_data_update = yf.download(tickers='BTC-USD', period='24h', interval='1h')
# Rename df index
price_data_update.index.name = 'Time'
# Drop last row
price_data_update = price_data_update[:-1]
# Head data
price_data_update.head(5)

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-09-12 23:00:00+01:00,45300.644531,45699.824219,45300.644531,45678.808594,45678.808594,0
2021-09-13 00:00:00+01:00,45764.859375,46364.878906,45764.859375,46063.269531,46063.269531,142313472
2021-09-13 01:00:00+01:00,46057.214844,46237.136719,45951.882812,46058.765625,46058.765625,0
2021-09-13 02:00:00+01:00,46042.578125,46072.230469,45174.433594,45178.304688,45178.304688,1229885440
2021-09-13 03:00:00+01:00,45204.027344,45246.617188,44918.695312,44918.695312,44918.695312,469590016


## Merge data

In [5]:
# Concetenate original price data with updated data
price_data_live = pd.concat([price_data, price_data_update])
# Remove duplicate indices
price_data_live = price_data_live[~price_data_live.index.duplicated(keep='first')]

In [6]:
# Convert to .csv
price_data_live.to_csv(price_raw_file, encoding='utf-8')

## Plot data

In [300]:
#declare figure
fig = plot.Figure()

#Candlestick
fig.add_trace(plot.Candlestick(x=price_data_live.index,
                open=price_data_live['Open'],
                high=price_data_live['High'],
                low=price_data_live['Low'],
                close=price_data_live['Close'], name = 'market price data'))

# Add titles
fig.update_layout(
    title='Bitcoin live share price evolution',
    yaxis_title='Bitcoin Price (kUS Dollars)')

# X-Axes
fig.update_xaxes(
    rangeslider_visible=True,
    rangeselector=dict(
        buttons=list([
            dict(count=15, label="15m", step="minute", stepmode="backward"),
            dict(count=45, label="45m", step="minute", stepmode="backward"),
            dict(count=1, label="HTD", step="hour", stepmode="todate"),
            dict(count=6, label="6h", step="hour", stepmode="backward"),
            dict(step="all")
        ])
    )
)

#Show
fig.show()

In [7]:
!jupyter nbconvert --to script --no-prompt 04_BitcoinPriceDataCollection.ipynb

[NbConvertApp] Converting notebook 04_BitcoinPriceDataCollection.ipynb to script
[NbConvertApp] Writing 3026 bytes to 04_BitcoinPriceDataCollection.py
