In [34]:
import pandas as pd
import os
import gzip
import pickle
import openai
import torch.nn.functional as F
import torch

In [35]:
def get_openai_response(prompt, tokens = 2000, model="gpt-4"):
    # model="gpt-3.5-turbo"
    response = openai.ChatCompletion.create(
        model=model,
        messages=[
            {
                "role": "system",
                "content": "You are a helpful assistant"
            },
            {
                "role": "user",
                "content": prompt
            }
            ],
        temperature=0,
        max_tokens=tokens,
        top_p=1,
    )
    text = response.choices[0].message.content


    # print("returning ai text:", text)
    return text

In [36]:
df = pd.read_csv('datasets/extracted/name_and_price.csv')

# sort by date
df = df.sort_values(by=['date'])

# remove rows where hardware_price contains the string "unknown"
df = df[df['hardware_price'].str.contains("unknown") == False]

# save
df.to_csv('datasets/extracted/name_and_price_no_unknown.csv', index=False)

# GPT-4 processing

In [37]:
#read the file back as a string
with open('datasets/extracted/name_and_price_no_unknown.csv', 'r') as file:
    table_text = file.read()

with open('hardware.txt', 'r') as file:
    hardware = file.read()

prompt = f"""
{hardware}


This is a list of hardware that is used for mining bitcoin.
The values are separated by semi-colons.
The first value is the name, the second value is the hashrate, the third value is the efficiency. (don't worry about the units)
If a piece of hardware only has numbers, it's a radeon card.


{table_text}


I have analyzed posts from a bitcoin forum and created this csv.
Rewrite the csv with the following changes:

1. Rewrite the price in a standard format, use this format for all rows: "1234$" or "1234BTC" or "1234€" etc. depending on the currency
If a price is per unit, keep only the price
If a price is a range, keep only the first price
If a price has no currency, assume it is in $

2. Add 2 new columns: "hashrate" and "efficiency"
If a hardware name is unclear or not in the list, write "unknown" for both values. The names don't need to match exactly, use common sense.

""".strip()



print(prompt)

# response = get_openai_response(prompt) # the response is a csv as a string with columns date,hardware_name,hardware_price,hashrate,efficiency


# # save the response
# with open('datasets/extracted/name_and_price_cleaned.csv', 'w') as file:
#     file.write(response)


df = pd.read_csv('datasets/extracted/name_and_price_cleaned.csv')


AntMiner S1;180000;500
AntMiner S2;100000;900
AntMiner S3;441000;1300
AntMiner S3+;453000;1282
AntMiner S4;200000;1429
AntMiner S5;115500;1957
AntMiner S5+;772200;2247
AntMiner S7;486000;4000
AntMiner S9;140000;10182
AntMiner U1;1600;800
AntMiner U2+;2000;1000
AntMiner U3;63000;1000
ASICMiner BE Blade;10752;129
ASICMiner BE Cube;30000;150
ASICMiner BE Prisma;140000;1333
ASICMiner BE Sapphire;336;130
ASICMiner BE Tube;800000;888
Avalon Batch 1;663001;107
Avalon Batch 2;820001;117
Avalon Batch 3;820001;117
Avalon2;300000;unknown
Avalon3;800000;unknown
Avalon6;350000;unknown
Avalon721;600000;6000
Avalon741;730000;6350
Avalon761;880000;6670
Avalon821;110000;9170
BFL 230 GH/s Rack Mount;230000;unknown
BFL 500 GH/s Mini Rig SC;500000;185
BFL Little Single;30000;unknown
BFL Monarch 700GH/s;700000;1428
BFL SC 10 Gh/s;10000;unknown
BFL SC 25 Gh/s;25000;166
BFL SC 50 Gh/s;50000;166
BFL SC 5Gh/s;5000;166
BFL Single 'SC';60000;250
bi*fury;5000;1176
BitFury S.B.;unknown;unknown
Bitmine.ch Avalon Cl

# fix btc prices

In [38]:
from torch import save, load
date_to_btc_price = load('date_open_dict.pt')

In [39]:
# for every row where hardware_price contains "BTC", get the price in dollars using the dict and the amount of BTC
for index, row in df.iterrows():
    if "BTC" in row['hardware_price']:
        btc_price = row['hardware_price'].replace("BTC", "").replace(",", "").strip()
        btc_price = float(btc_price)
        date = row['date']
        
        # get the price in dollars
        dollars_price = date_to_btc_price[date] * btc_price
        dollars_price = round(dollars_price, 3)

        # replace the price in the dataframe
        df.at[index, 'hardware_price'] = f"{dollars_price}$"

# save the dataframe
df.to_csv('datasets/extracted/name_and_price_cleaned_usd.csv', index=False)
        