In [1]:
import pandas as pd
import numpy as np
import os
import gzip
import pickle
import re
import copy
from tqdm import tqdm
import time
import json
from datetime import datetime, timedelta

In [2]:
# Timestamp Graph,Intervals (Row),Network hashrate (TH/s),Bitcoins per block (BTC)
# 02/01/2009,1,4.97E-08,50
BitcoinData = pd.read_csv('BitcoinData_orig.csv') 

df2 = pd.DataFrame(columns=['date', 'hashrate','coins_per_block'])

df2['date'] = pd.to_datetime(BitcoinData['Timestamp Graph'], format='%d/%m/%Y')
df2['hashrate'] = BitcoinData['Network hashrate (TH/s)']*1e12
df2['coins_per_block'] = BitcoinData['Bitcoins per block (BTC)']

df2 = df2.dropna()
df2 

Unnamed: 0,date,hashrate,coins_per_block
0,2009-01-02,4.970000e+04,50.00
1,2009-01-05,4.970000e+04,50.00
2,2009-01-08,6.960000e+05,50.00
3,2009-01-11,4.420000e+06,50.00
4,2009-01-14,6.310000e+06,50.00
...,...,...,...
1638,2022-06-18,2.350000e+20,6.25
1639,2022-06-21,2.260000e+20,6.25
1640,2022-06-24,1.900000e+20,6.25
1641,2022-06-27,2.000000e+20,6.25


In [3]:
hashrate = json.load(open('hashrate.json'))

# x	y
# 1231545600000	1.065220e-07
BitcoinData2 = pd.DataFrame(hashrate["hash-rate"])
BitcoinData2["date"] = pd.to_datetime(BitcoinData2["x"], unit='ms')
BitcoinData2["hashrate"] = BitcoinData2["y"]*1e12
BitcoinData2 = BitcoinData2.drop(columns=["x", "y"])

In [4]:
BitcoinData2

Unnamed: 0,date,hashrate
0,2009-01-10,1.065220e+05
1,2009-01-14,2.521021e+06
2,2009-01-18,5.588855e+06
3,2009-01-22,5.681174e+06
4,2009-01-26,6.270595e+06
...,...,...
1384,2024-03-08,5.928970e+20
1385,2024-03-12,6.294408e+20
1386,2024-03-16,5.963760e+20
1387,2024-03-20,6.010169e+20


In [5]:
temp = BitcoinData2[BitcoinData2["date"] > df2["date"].max()]
temp

Unnamed: 0,date,hashrate
1230,2022-07-01,2.146114e+20
1231,2022-07-05,2.179713e+20
1232,2022-07-09,2.061507e+20
1233,2022-07-13,2.059578e+20
1234,2022-07-17,1.968833e+20
...,...,...
1384,2024-03-08,5.928970e+20
1385,2024-03-12,6.294408e+20
1386,2024-03-16,5.963760e+20
1387,2024-03-20,6.010169e+20


In [6]:
# df2 is not defined for values above 2022
df2 = pd.concat([df2, temp])

# if block is nan, it should be 6.25
df2['coins_per_block'] = df2['coins_per_block'].fillna(6.25)

In [7]:
df2

Unnamed: 0,date,hashrate,coins_per_block
0,2009-01-02,4.970000e+04,50.00
1,2009-01-05,4.970000e+04,50.00
2,2009-01-08,6.960000e+05,50.00
3,2009-01-11,4.420000e+06,50.00
4,2009-01-14,6.310000e+06,50.00
...,...,...,...
1384,2024-03-08,5.928970e+20,6.25
1385,2024-03-12,6.294408e+20,6.25
1386,2024-03-16,5.963760e+20,6.25
1387,2024-03-20,6.010169e+20,6.25


In [8]:
# for each month in df2, get the monthly average and store it with the date being the first day of the month

# Create a 'month' column
from datetime import timedelta
df2['month'] = (df2['date']).dt.to_period('M')

# Group by 'month' and calculate the mean
df2 = df2.groupby('month')[["hashrate","coins_per_block"]].mean().reset_index()

# Convert 'month' back to datetime (first day of the month)
df2['date'] = df2['month'].dt.to_timestamp()
df2 = df2.drop(columns=["month"])
df2

Unnamed: 0,hashrate,coins_per_block,date
0,4.318540e+06,50.00,2009-01-01
1,6.009000e+06,50.00,2009-02-01
2,5.623000e+06,50.00,2009-03-01
3,5.712000e+06,50.00,2009-04-01
4,5.710000e+06,50.00,2009-05-01
...,...,...,...
178,4.722977e+20,6.25,2023-11-01
179,5.008642e+20,6.25,2023-12-01
180,5.166878e+20,6.25,2024-01-01
181,5.686831e+20,6.25,2024-02-01


In [9]:
df2.to_csv('BitcoinData.csv', index=False)