In [1]:
import pandas as pd
import os
import gzip
import pickle
import openai
import torch.nn.functional as F
import torch
import re
import copy

In [None]:
with open('hardware.txt', 'r') as file:
    hardware_txt = file.read()

hardware_df = pd.DataFrame([x.split(';') for x in hardware_txt.split('\n')], columns=['hardware_name', 'hashrate', 'efficiency'])


#remove rows that contain x2,x3 etc
hardware_df = hardware_df[~hardware_df['hardware_name'].str.contains("x[0-9]")]
hardware_df = hardware_df[~hardware_df['hardware_name'].str.contains("cards")]

#remove text in brackets from hardware_name
hardware_df['hardware_name'] = hardware_df['hardware_name'].apply(lambda x: re.sub(r"\(.*\)","", x).strip())
hardware_df['hardware_name'] = hardware_df['hardware_name'].apply(lambda x: re.sub(r"OC","", x).strip())
hardware_df['hardware_name'] = hardware_df['hardware_name'].apply(lambda x: re.sub(r"\d+ *Gh/s","", x).strip())
hardware_df['hardware_name'] = hardware_df['hardware_name'].apply(lambda x: re.sub(r"\d+ *GH/S","", x).strip())
hardware_df['hardware_name'] = hardware_df['hardware_name'].apply(lambda x: re.sub(r"\d+ *GH/s","", x).strip())

#remove duplicate hardware names
hardware_df = hardware_df.drop_duplicates(subset=['hardware_name'])

#reset index
hardware_df = hardware_df.reset_index(drop=True)
hardware_df["hardware_index"] = hardware_df.index
hardware_df.to_csv('hardware_full.csv', index=False)

# save it as a csv with columns "index,hardware_name"
hardware_df = hardware_df[['hardware_index', 'hardware_name']]
hardware_df.to_csv('hardware_index.csv', index=False)

In [4]:
with open('hardware_new.txt', 'r') as file:
    hardware_txt = file.read()

hardware_df = pd.DataFrame([x.split(';') for x in hardware_txt.split('\n')], columns=['hardware_name', 'date', 'speed','power','noise','hash','profit'])

# keep only SHA-256
hardware_df = hardware_df[hardware_df['hash'] == "SHA-256"]

# efficiency = speed/power
hardware_df['Mhash/J'] = hardware_df['speed'].str.replace("Th/s","").astype(float)/hardware_df['power'].str.replace("W","").astype(float) * 1000000

#remove text in brackets from hardware_name
hardware_df['hardware_name'] = hardware_df['hardware_name'].apply(lambda x: re.sub(r"\(.*\)","", x).strip())
hardware_df['hardware_name'] = hardware_df['hardware_name'].apply(lambda x: re.sub(r"OC","", x).strip())
hardware_df['hardware_name'] = hardware_df['hardware_name'].apply(lambda x: re.sub(r"\d+ *Th/s","", x).strip())

# rename date to hardware_release_date
hardware_df = hardware_df.rename(columns={"date": "hardware_release_date"})

#reset index
hardware_df = hardware_df.reset_index(drop=True)
hardware_df["hardware_index"] = hardware_df.index
hardware_df.to_csv('hardware_full_new.csv', index=False)

# save it as a csv with columns "index,hardware_name"
hardware_df = hardware_df[['hardware_index', 'hardware_name']]
hardware_df.to_csv('hardware_index_new.csv', index=False)

In [24]:
old = pd.read_csv('hardware_full.csv') #hardware_name,hashrate,efficiency,hardware_index
new = pd.read_csv('hardware_full_new.csv') #hardware_name,hardware_release_date,speed,power,noise,hash,profit,Mhash/J,hardware_index

# rename columns
old = old.rename(columns={"hashrate": "speed", "efficiency": "Mhash/J"})

old = old[['hardware_name', "Mhash/J"]]
new = new[['hardware_name', "Mhash/J"]]

# merge the two hardware csvs
hardware_df = pd.concat([old, new]).reset_index(drop=True)

# remove rows with "unknown" Mhash/J
hardware_df = hardware_df[hardware_df['Mhash/J'] != "unknown"]

# make all names lowercase
hardware_df['hardware_name'] = hardware_df['hardware_name'].str.lower()

# remove duplicate hardware names
hardware_df = hardware_df.drop_duplicates(subset=['hardware_name'])

# sort by hardware_name
hardware_df = hardware_df.sort_values(by=['hardware_name'])


In [25]:
hardware_df

Unnamed: 0,hardware_name,Mhash/J
267,2x opteron 6128,32.4
266,2x opteron 6172,55
168,320m,0.35
96,3410,0.074
97,4350,0.346
...,...,...
378,xeon w3680,18
360,xeon x5355,16.445
362,xeon x5365,26
363,xeon x5650,28.6


In [26]:
hardware_df.to_csv('hardware_merged.csv', index=False)