In [1]:
import pandas as pd
import os
import gzip
import pickle
import openai
import torch.nn.functional as F
import torch
import re
import copy
from tqdm import tqdm
from openai import OpenAI
client = OpenAI()

In [2]:
from hardware_mapping import map_hardware_to_table
hardware_instances = pd.read_csv('hardware_instances.csv') # columns: date,hardware
hardware_instances = hardware_instances.assign(hardware_mapped = hardware_instances["hardware"].apply(map_hardware_to_table))

In [3]:
from hardware_mapping2 import map_hardware_to_table2
# apply mapping2 where hardware_mapped is "not found"
for index, row in hardware_instances.iterrows():
    if row["hardware_mapped"] == "not found":
        hardware_instances.at[index, "hardware_mapped"] = map_hardware_to_table2(row["hardware"])

In [4]:
for index, row in hardware_instances.iterrows():
    if row["hardware_mapped"] == "bfl single 'sc'" and row["date"] < "2013-04-01":
        hardware_instances.at[index, "hardware_mapped"] = "bitforce sha256 single"

In [5]:
hardware_instances.head(10)

Unnamed: 0,date,hardware,hardware_mapped
0,2010-09-09 12:59:39,gtx 460,gtx460
1,2010-09-09 12:59:39,macbook pro,unknown
2,2010-09-09 12:59:39,gt330m,unknown
3,2010-09-09 12:59:39,8800gtx,unknown
4,2010-10-06 20:25:17,ati 4350,4350
5,2010-10-06 20:25:17,radeons 5870,5870
6,2010-10-06 20:25:17,5770,5770
7,2010-10-06 20:25:17,8600gt,unknown
8,2010-10-06 20:25:17,gtx260,gtx260
9,2010-11-27 01:28:24,dual-core,unknown


In [6]:
len(hardware_instances)

53366

In [7]:
efficiency = pd.read_csv('../../hardwarelist/hardware_merged.csv') # columns: hardware_name,Mhash/J
efficiency = efficiency.rename(columns={"hardware_name":"hardware_mapped"})

efficiency.head(2)

Unnamed: 0,hardware_mapped,Mhash/J
0,3410,0.074
1,4350,0.346


In [8]:
joined = hardware_instances.merge(efficiency, on="hardware_mapped", how="left")
joined = joined.dropna(subset=["Mhash/J"])
joined.head(4)

Unnamed: 0,date,hardware,hardware_mapped,Mhash/J
0,2010-09-09 12:59:39,gtx 460,gtx460,0.427
4,2010-10-06 20:25:17,ati 4350,4350,0.346
5,2010-10-06 20:25:17,radeons 5870,5870,1.906
6,2010-10-06 20:25:17,5770,5770,1.9401


In [9]:
print(len(joined))

14541


In [10]:
table = joined[["date","hardware_mapped","Mhash/J"]]
table = table.rename(columns={"hardware_mapped":"hardware_name"})
table["Mhash/J"] = table["Mhash/J"].astype(float).map(lambda x: x/1000000).map(lambda x: f"{x:.10f}")
table = table.rename(columns={"Mhash/J":"TH/J"})
table = table.sort_values(["date","hardware_name"])
table = table.reset_index(drop=True)
table.head(8)

Unnamed: 0,date,hardware_name,TH/J
0,2010-09-09 12:59:39,gtx460,4.27e-07
1,2010-10-06 20:25:17,4350,3.46e-07
2,2010-10-06 20:25:17,5770,1.9401e-06
3,2010-10-06 20:25:17,5870,1.906e-06
4,2010-10-06 20:25:17,gtx260,2.1e-07
5,2010-12-23 21:26:33,8800gt,2.672e-07
6,2010-12-23 21:26:33,gtx275,2.32e-07
7,2011-01-01 20:53:30,gt240,2.81e-07


In [None]:
# all of these are manually verified to be random noobs trying to use a gpu during the asic era
for index, row in table.iterrows():
    if row["date"] > "2015-07-01" and row["ln_efficiency"] < 7.5:
        hardware_instances.at[index, "hardware_mapped"] = "unknown"

In [11]:
table.to_csv("hardware_instances_with_efficiency.csv", index=False)

In [12]:
df = pd.read_csv('hardware_instances_with_efficiency.csv') # date,hardware_name,TH/J

max_efficiency = pd.read_csv('../../hardwarelist/Bitcoin max updated2.csv') # rows are date,max (TH/J),archaicity (TH/J)

def get_max_efficiency(date):
    date = str(date)[:10]
    try:
        return max_efficiency[max_efficiency['date'] == date]['max (TH/J)'].values[0]
    except:
        return -1
    

# print rows where TH/J is larger than the max efficiency for that date
df['max_efficiency'] = df['date'].apply(lambda x: get_max_efficiency(x))

In [13]:
# Convert the 'date' column to a datetime format
df['date'] = pd.to_datetime(df['date'])

# Extract year from the 'date' column and create a new column 'year'
df['year'] = df['date'].dt.year
df

Unnamed: 0,date,hardware_name,TH/J,max_efficiency,year
0,2010-09-09 12:59:39,gtx460,4.270000e-07,0.000004,2010
1,2010-10-06 20:25:17,4350,3.460000e-07,0.000004,2010
2,2010-10-06 20:25:17,5770,1.940100e-06,0.000004,2010
3,2010-10-06 20:25:17,5870,1.906000e-06,0.000004,2010
4,2010-10-06 20:25:17,gtx260,2.100000e-07,0.000004,2010
...,...,...,...,...,...
14536,2023-10-27 16:39:20,canaan avalonminer 1166 pro,2.382353e-02,-1.000000,2023
14537,2023-10-29 07:11:42,antminer s19,2.923077e-02,-1.000000,2023
14538,2023-10-29 07:11:42,antminer s19 pro,3.384615e-02,-1.000000,2023
14539,2023-10-29 07:11:42,antminer s19 xp,4.651163e-02,-1.000000,2023


In [14]:
df.to_csv('plotdata.csv', index=False)