# Bitcoin price collection

In [5]:
import numpy as np
import pandas as pd
import yfinance as yf 
import plotly.graph_objs as plot
import datetime
import time
import os

PRICE_FOLDER    = "data/price/"
TWITTER_FOLDER  = "data/twitter/"
tweets_raw_file = './data/twitter/bitcoin_tweets_raw.csv'
tweets_clean_file = './data/twitter/bitcoin_tweets_clean.csv'
price_raw_file = './data/price/bitcoin_price_raw.csv'
price_clean_file = './data/price/bitcoin_price_clean.csv'

Retrieve Bitcoin hourly price from Yahoo Finance

- Base price data is the first retrieval to a dataframe
- Updated data retrieves new hourly data to the same dataframe
- Data is exported to .csv after each updated retrieval

## Read csv file of stored price

In [299]:
# Read csv
df = pd.read_csv(price_raw_file, low_memory=False)
# Edit datetimeindex
df['Time'] = df['Time'].astype(str).str[:-6]
# Set datetime
df['Time'] = pd.to_datetime(df['Time'])
# Set index
df = df.set_index('Time')
# View head
df.head(5)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-09-12 23:00:00,45300.644531,45699.824219,45300.644531,45678.808594,45678.808594,0
2021-09-13 00:00:00,45764.859375,46364.878906,45764.859375,46063.269531,46063.269531,142313472
2021-09-13 01:00:00,46057.214844,46237.136719,45951.882812,46058.765625,46058.765625,0
2021-09-13 02:00:00,46042.578125,46072.230469,45174.433594,45178.304688,45178.304688,1229885440
2021-09-13 03:00:00,45204.027344,45246.617188,44918.695312,44918.695312,44918.695312,469590016


## Retrieve updated data

In [351]:
# Retrieve hourly price data
price_data_update = yf.download(tickers='BTC-USD', period='24h', interval='1h')
# Rename df index
price_data_update.index.name = 'Time'
# Edit datetimeindex
price_data_update.index = price_data_update.index.astype(str).str[:-6]
# Set index type as datetime
price_data_update.index = pd.to_datetime(price_data_update.index)
# Drop last row
price_data_update = price_data_update[:-1]
# Head data
price_data_update.head(5)


[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-09-14 01:00:00,44960.050781,45277.441406,44752.332031,45201.953125,45201.953125,0
2021-09-14 02:00:00,45183.253906,45353.042969,45123.71875,45156.507812,45156.507812,310390784
2021-09-14 03:00:00,45160.417969,45181.898438,45010.6875,45107.910156,45107.910156,0
2021-09-14 04:00:00,45085.078125,45326.109375,45085.078125,45228.804688,45228.804688,0
2021-09-14 05:00:00,45242.101562,45352.996094,45124.78125,45125.746094,45125.746094,736956416


## Merge data

In [361]:
# Concetenate original price data with updated data
df_updated = pd.concat([df, price_data_update])
# Remove duplicate indices
df_updated = df_updated[~df_updated.index.duplicated(keep='first')]

In [403]:
print(len(df_updated),'hours of Bitcoin price data')

50 hours of Bitcoin price data


## Convert to csv

In [214]:
# Convert to .csv - Raw file
df_updated.to_csv(price_clean_file, encoding='utf-8')
# Convert to .csv - Clean file
df_updated.to_csv(price_raw_file, encoding='utf-8')

## Export py script

In [368]:
!jupyter nbconvert --to script --no-prompt 04_01_BitcoinPriceDataUpdate.ipynb

[NbConvertApp] Converting notebook 04_01_BitcoinPriceDataUpdate.ipynb to script
[NbConvertApp] Writing 3389 bytes to 04_01_BitcoinPriceDataUpdate.py
