In [87]:
import pandas as pd
import numpy as np
import os
import gzip
import pickle
import re
import copy
from tqdm import tqdm
import time
import json
# pd.set_option('display.max_columns', 5000)
pd.set_option('display.max_rows', 100)

In [88]:
# Timestamp Graph,Intervals (Row),Network hashrate (TH/s),Bitcoins per block (BTC)
# 02/01/2009,1,4.97E-08,50
BitcoinData = pd.read_csv('BitcoinData.csv') 

df2 = pd.DataFrame(columns=['date', 'hashrate','block'])

df2['date'] = pd.to_datetime(BitcoinData['Timestamp Graph'], format='%d/%m/%Y')
df2['hashrate'] = BitcoinData['Network hashrate (TH/s)']*1e12
df2['block'] = BitcoinData['Bitcoins per block (BTC)']

df2 = df2.dropna()
df2 

Unnamed: 0,date,hashrate,block
0,2009-01-02,4.970000e+04,50.00
1,2009-01-05,4.970000e+04,50.00
2,2009-01-08,6.960000e+05,50.00
3,2009-01-11,4.420000e+06,50.00
4,2009-01-14,6.310000e+06,50.00
...,...,...,...
1638,2022-06-18,2.350000e+20,6.25
1639,2022-06-21,2.260000e+20,6.25
1640,2022-06-24,1.900000e+20,6.25
1641,2022-06-27,2.000000e+20,6.25


In [89]:
hashrate = json.load(open('hashrate.json'))

# x	y
# 1231545600000	1.065220e-07
BitcoinData2 = pd.DataFrame(hashrate["hash-rate"])
BitcoinData2["date"] = pd.to_datetime(BitcoinData2["x"], unit='ms')
BitcoinData2["hashrate"] = BitcoinData2["y"]*1e12
BitcoinData2 = BitcoinData2.drop(columns=["x", "y"])

In [90]:
BitcoinData2

Unnamed: 0,date,hashrate
0,2009-01-10,1.065220e+05
1,2009-01-14,2.521021e+06
2,2009-01-18,5.588855e+06
3,2009-01-22,5.681174e+06
4,2009-01-26,6.270595e+06
...,...,...
1384,2024-03-08,5.928970e+20
1385,2024-03-12,6.294408e+20
1386,2024-03-16,5.963760e+20
1387,2024-03-20,6.010169e+20


In [91]:
temp = BitcoinData2[BitcoinData2["date"] > df2["date"].max()]
temp

Unnamed: 0,date,hashrate
1230,2022-07-01,2.146114e+20
1231,2022-07-05,2.179713e+20
1232,2022-07-09,2.061507e+20
1233,2022-07-13,2.059578e+20
1234,2022-07-17,1.968833e+20
...,...,...
1384,2024-03-08,5.928970e+20
1385,2024-03-12,6.294408e+20
1386,2024-03-16,5.963760e+20
1387,2024-03-20,6.010169e+20


In [92]:


# df2 is not defined for values above 2022
df2 = pd.concat([df2, temp])

# if block is nan, it should be 6.25
df2['block'] = df2['block'].fillna(6.25)

In [93]:
df2

Unnamed: 0,date,hashrate,block
0,2009-01-02,4.970000e+04,50.00
1,2009-01-05,4.970000e+04,50.00
2,2009-01-08,6.960000e+05,50.00
3,2009-01-11,4.420000e+06,50.00
4,2009-01-14,6.310000e+06,50.00
...,...,...,...
1384,2024-03-08,5.928970e+20,6.25
1385,2024-03-12,6.294408e+20,6.25
1386,2024-03-16,5.963760e+20,6.25
1387,2024-03-20,6.010169e+20,6.25


In [94]:
# for each month in df2, get the monthly average and store it with the date being the first day of the month

# Create a 'month' column by offsetting the 'date' by 15 days and then using to_period
from datetime import timedelta
df2['month'] = (df2['date'] - timedelta(days=15)).dt.to_period('M')

# Group by 'month' and calculate the mean
df2 = df2.groupby('month')[["hashrate","block"]].mean().reset_index()

# Convert 'month' back to datetime (first day of the month)
df2['date'] = df2['month'].dt.to_timestamp()
df2 = df2.drop(columns=["month"])
df2

Unnamed: 0,hashrate,block,date
0,2.305080e+06,50.00,2008-12-01
1,6.416000e+06,50.00,2009-01-01
2,5.573000e+06,50.00,2009-02-01
3,5.564000e+06,50.00,2009-03-01
4,5.999000e+06,50.00,2009-04-01
...,...,...,...
179,4.852831e+20,6.25,2023-11-01
180,5.167069e+20,6.25,2023-12-01
181,5.446286e+20,6.25,2024-01-01
182,5.782587e+20,6.25,2024-02-01


In [95]:
df2.to_csv('BitcoinData2.csv', index=False)

In [96]:
# Start,End,Open,High,Low,Close,Volume,Market Cap
# 2024-03-08,2024-03-09,66871.2,69451.15,66398.03,68202.87,71117263448.78049,1329444482209.993
price = pd.read_csv('../../pricehistory/price_full.csv') 



# Date,ln(P_max),Data monthly average,Model ln(P_eff)
# 2011-04-01,-12.485786548332777,1.6612383008356546e-06,1.5657608695648479e-06
efficiency = pd.read_csv('../7_plotting/efficiency.csv') 

df1 = pd.DataFrame(columns=['date', 'price'])
df3 = pd.DataFrame(columns=['date', 'efficiency'])


df1['date'] = pd.to_datetime(price['Start'])
df1['price'] = price['Close']

df3['date'] = pd.to_datetime(efficiency['month'], format='%Y-%m-%d')
df3['efficiency'] = efficiency['TH/J']*1e12

joined = pd.merge(df1, df2, on='date', how='inner')
joined = pd.merge(joined, df3, on='date', how='inner')
joined = joined.dropna()

In [97]:
pd.set_option('display.max_rows', None)
joined.sample(10)

Unnamed: 0,date,price,hashrate,block,efficiency
130,2012-12-01,12.562,22570000000000.0,25.0,10892470.0
124,2013-06-01,129.3,182400000000000.0,25.0,505717300.0
103,2015-03-01,260.371,3.516e+17,25.0,1257919000.0
95,2015-11-01,325.732,5.947e+17,25.0,2093899000.0
28,2021-06-01,36606.39,1.0032e+20,6.25,17682470000.0
16,2022-06-01,29789.421134,2.16299e+20,6.25,15446860000.0
46,2019-12-01,7417.184162,1.0132e+20,12.5,11829350000.0
14,2022-08-01,23311.592835,2.185533e+20,6.25,14804760000.0
49,2019-09-01,9716.72191,9.177e+19,12.5,13173360000.0
18,2022-04-01,46231.004759,2.112e+20,6.25,17372820000.0


In [98]:
joined["reward"] = joined["block"]*joined["price"]

data = []

for i in range(len(joined)):
    date = joined['date'].iloc[i]
    hashrate = joined['hashrate'].iloc[i]
    efficiency = joined['efficiency'].iloc[i]
    reward = joined['reward'].iloc[i]
    aux = 2.0
    electricity_cost = 0.05/(1000*3600)
    constant = 6
    watts = hashrate/efficiency
    top = watts*aux*electricity_cost
    reward_per_second = reward/600
    bottom = constant*reward_per_second
    cost = top/bottom
    data.append([date, cost])
    
    print(f"date: {date}")
    # print(f"hashrate: {hashrate}")
    # print(f"efficiency: {efficiency}")
    print(f"watts: {watts}")
    print(f"top: {top}")
    print(f"reward: {reward}")
    print(f"bottom: {bottom}")
    print(f"cost: {cost}")
    print("\n\n")

df = pd.DataFrame(data, columns=['date', 'cost'])

date: 2023-10-01 00:00:00
watts: 15507097314.823082
top: 430.7527031895301
reward: 174863.125
bottom: 1748.63125
cost: 0.24633707260437565



date: 2023-09-01 00:00:00
watts: 20906375194.19472
top: 580.7326442831868
reward: 161353.5625
bottom: 1613.535625
cost: 0.35991312201934605



date: 2023-08-01 00:00:00
watts: 27744290860.884155
top: 770.6747461356711
reward: 185182.75
bottom: 1851.8275
cost: 0.4161698355465998



date: 2023-07-01 00:00:00
watts: 17164123372.659256
top: 476.7812047960905
reward: 191181.5625
bottom: 1911.8156250000002
cost: 0.24938660326938716



date: 2023-06-01 00:00:00
watts: 18862482747.383854
top: 523.957854093996
reward: 167543.625
bottom: 1675.43625
cost: 0.3127292095381105



date: 2023-05-01 00:00:00
watts: 17890367781.45343
top: 496.95466059592866
reward: 175527.875
bottom: 1755.27875
cost: 0.28312008026983104



date: 2023-04-01 00:00:00
watts: 23468200342.987675
top: 651.8944539718799
reward: 177905.08761113856
bottom: 1779.0508761113856
cost: 0.366428

In [99]:
df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d')
# from datetime import timedelta
# df['avg'] = (df['date'] - timedelta(days=15)).dt.to_period('Q')
df.sample(10)

Unnamed: 0,date,cost
60,2018-10-01,0.17628
59,2018-11-01,0.15828
102,2015-04-01,0.120483
51,2019-07-01,0.127757
148,2011-06-01,0.037854
96,2015-10-01,0.116216
10,2022-12-01,0.477162
56,2019-02-01,0.28478
145,2011-09-01,0.044471
5,2023-05-01,0.28312


In [100]:
# avg_data = df.groupby('avg')['cost'].mean().reset_index()
# avg_data

In [101]:
df.to_csv('cost.csv', index=False)