In [31]:
import pandas as pd
import os
import gzip
import pickle
import openai
import torch.nn.functional as F
import torch
import re
import copy
import tqdm

In [32]:
def get_openai_response(prompt, tokens = 99999, model="gpt-4"):
    # model="gpt-3.5-turbo-16k"
    response = openai.ChatCompletion.create(
        model=model,
        messages=[
            {
                "role": "system",
                "content": "Always answer with the requested csv file."
            },
            {
                "role": "user",
                "content": prompt
            }
            ],
        temperature=0,
        # max_tokens=tokens,
    )
    text = response.choices[0].message.content


    # print("returning ai text:", text)
    return text

In [33]:
df = pd.read_csv('datasets/extracted/date_name_price_original.csv')

# sort by date
df = df.sort_values(by=['date'])

# add row index
df = df.reset_index(drop=True)
df["row_index"] = df.index

# move row index left
cols = df.columns.tolist()
cols = cols[-1:] + cols[:-1]
df = df[cols]

df_price_only = copy.deepcopy(df)
df_name_only = copy.deepcopy(df)
df_date_only = copy.deepcopy(df)

In [34]:
# remove rows where hardware_price contains the string "unknown"
df_price_only = df_price_only[df_price_only['hardware_price'].str.contains("unknown") == False]

df_price_only = df_price_only.drop(columns=['date'])
df_price_only = df_price_only.drop(columns=['hardware_name'])

# save
df_price_only.to_csv('datasets/extracted/price_only.csv', index=False)

In [35]:
# drop the date, hardware_price columns
df_name_only = df_name_only.drop(columns=['date'])
df_name_only = df_name_only.drop(columns=['hardware_price'])

# save
df_name_only.to_csv('datasets/extracted/name_only.csv', index=False)

In [36]:
# drop the hardware_price, hardware_name columns
df_date_only = df_date_only.drop(columns=['hardware_price'])
df_date_only = df_date_only.drop(columns=['hardware_name'])

# save
df_date_only.to_csv('datasets/extracted/date_only.csv', index=False)

# GPT-4 processing (prices)

In [6]:
#read the file back as a string
with open('datasets/extracted/price_only.csv', 'r') as file:
    table_text = file.read()

prompt = f"""
{table_text}


The prices in this csv need to be cleaned.
Rewrite the csv with the following change:
Rewrite the price in a standard format, use this format for all rows: "1234$" or "1234BTC" or "1234€" etc. depending on the currency
If a price is a range, keep only the first price. For example "6$-7$" should be written as "6$"
If a price has no currency then assume it is in dollars if it is above 5, otherwise assume it is in BTC
If a price is not valid, for example "USB bitcoin miners", write "unknown"
If a price is per unit of something then here are some examples of how to handle them:
"1.1 btc/Blade" should be written as "1.1BTC per Blade"
".071 / 13 GH/s" should be written as "0.071BTC per 13 GH/s"
"0.40btc each" should be written as "0.40BTC per unit"
"0.5 BTC shares" should be written as "0.5BTC per share"
etc.

""".strip()



print(prompt)

# response = get_openai_response(prompt) # the response is a csv as a string with columns date,hardware_name,hardware_price,hashrate,efficiency


# # save the response
# with open('datasets/extracted/name_and_price_cleaned.csv', 'w') as file:
#     file.write(response)


# df = pd.read_csv('datasets/extracted/price_only_cleaned.csv')


row_index,hardware_price
5,$70
11,$2.10 per hour
13,$200
17,$220
22,$55
23,$79
33,$185
36,$99
45,$280
46,$126
47,$288
52,$109
58,$150
59,$200-300
85,$800
102,107 BTC
103,$580
104,$89
113,$150
135,26 BTC
142,$75 and $101
148,$80
150,$275
151,"$2,000"
152,$1200
153,$200
158,807.1501152800002 BTC
164,$599
171,0.079 BTC
173,$320
176,$2.03636
177,$2.03636
182,$5000
183,4.5 Ghz
184,0.5 BTC shares
185,60EUR
190,0.082BTC
191,0.5 BTC shares
192,2.3 BTC
193,0.5 BTC shares
196,20.7
197,60EUR
198,0.082BTC
199,$2.04236
200,2.04236 btc
209,1 BTC per share
210,0.65 BTC
213,175 GHs Miner
214,.57 btc per chip
215,4.56 BTC
217,4 Btc
224,1.01618 ea.
226,0.082BTC
227,75$
228,60EUR
230,0.081 BTC
235,$7k
237,0.99 BTC
238,1.01693878 ea.
239,$59/chip
240,0.89BTC
241,1BTC/share
243,1btc= 3 GH/s
244,$0.95 per device
245,BTC1.15
246,BTC1.15
247,0.95 BTC
251,4BTC
253,$65
262,"$27,5/chip"
265,.41 to .55 btc
266,$58
267,$20
271,$58
272,0.35 BTC
273,$45
274,1.475 BTC
275,0.10 btc
276,1.65 btc
277,฿1.65
282,1k
285,$4

# adding efficiency and hashrate

In [7]:
#read the file back as a string
with open('datasets/extracted/name_only.csv', 'r') as file:
    table_text = file.read()
    # table_text = table_text.replace("hardware_name", "").strip()



with open('hardware_index.csv', 'r') as file:
    hardware = file.read()

def split_string(s, num_chunks):
    lines = s.splitlines()
    num_lines = len(lines)
    chunk_size = num_lines // num_chunks
    chunks = [lines[i:i+chunk_size] for i in range(0, num_lines, chunk_size)]
    # If there are leftover lines, add them to the last chunk
    if num_lines % num_chunks != 0:
        chunks[-1] += lines[num_chunks*chunk_size:]
    return ['\n'.join(chunk) for chunk in chunks]

In [8]:
output_csv = ""

# for text in split_string(table_text, 5):
for (i,text) in enumerate(tqdm.tqdm(split_string(table_text, 12))):

    prompt = f"""
Here is a table of hardware that is used for mining bitcoin:

{hardware}




I have analyzed posts from a bitcoin forum and created this csv of hardware:

{text.replace("Bitmain ","").replace("  "," ")}



Using the hardware table, rewrite this csv with columns: row_index,hardware_name,hardware_index
If a hardware name is unclear or not in the table of mining hardware, write "unknown" for the hardware_index value. The names don't need to match exactly, use common sense. For example, "Sapphire 7950 Vapor X" refers to the 7950 in the hardware table.

    """.strip()



    # print(prompt)
    # print("\n"*100)

    response = get_openai_response(prompt) # the response is a csv as a string with columns row_index,hardware_name,hardware_index

    print(response)
    print("\n"*100)

    if i == 0:
        output_csv = response
    else:
        output_csv += "\n" + response.replace("row_index,hardware_name,hardware_index", "").strip()


print(output_csv)
# save the output_csv
with open('datasets/extracted/name_index.csv', 'w') as file:
    file.write(output_csv)




  8%|▊         | 1/13 [01:06<13:20, 66.68s/it]

row_index,hardware_name,hardware_index
0,5870,118
1,6870,127
2,poclbm,unknown
3,HD5970,119
4,5870,118
5,5850,117
6,5870,118
7,Intel Graphics Media Accelerator GMA 4500M,unknown
8,5970,119
9,5870,118
10,Radeon 5970,119
11,Amazon EC2 Cluster GPU Instances,unknown
12,Nvidia 295,unknown
13,Thermaltake W0133RU ToughPower 1200W Modular Power Supply,unknown
14,iPhone,unknown
15,5870 bios,118
16,3x5850,117
17,ToughPower 1350W supply,unknown
18,non-reference Sapphire 5870,118
19,graphics cards,unknown
20,reference 5850,117
21,Intel E6300,unknown
22,Intel D975XBX,unknown
23,Coolermaster 700W Silent Pro,unknown
24,motherboards,unknown
25,2GB DDR2 667,unknown
26,PSU,unknown
27,Zotac ION,244
28,550w,unknown
29,69xx,unknown
30,5970,119
31,6990,131
32,5870,118
33,5850,117
34,5850,117
35,58xx,unknown
36,5830,116
37,2.5 dual core,unknown
38,MSI,unknown
39,5800 series cards,unknown
40,GeForce 9600 GSO,188
41,ATI 5xxx GPUs,unknown
42,Radeon 6990,131
43,5870s,118
44,5870s,118
45,HD5830,116
46,HD5830,116
4

 15%|█▌        | 2/13 [02:05<11:22, 62.09s/it]

row_index,hardware_name,hardware_index
100,Rig,unknown
101,HD 5970s,119
102,x6500,92
103,x6500 fpga,92
104,BeagleBone,unknown
105,BitForce,84
106,ATI 3800,unknown
107,ATI 3870x2,unknown
108,BTCMiner,unknown
109,bemicro,unknown
110,ZTEX boards,93
111,2 6850's,126
112,5830,116
113,BitForce Jalapeno,84
114,5830,116
115,5770,115
116,5850,117
117,Radeon 5970,119
118,cgminer,unknown
119,BFL Single,32
120,mod miner quad,90
121,FPGAs,unknown
122,5870m,unknown
123,Alienware M17xr2 laptop,unknown
124,DD-WRT router,unknown
125,Ztex boards,93
126,single sc,32
127,5830,116
128,6950,129
129,pentium 4 pc,unknown
130,6970,130
131,5850,117
132,Avlon,unknown
133,Avalon,20
134,BFL,32
135,Icarus FPGA,87
136,BFL Announcement Trolling Museum #2,unknown
137,Avalon,20
138,BFL,32
139,5970,119
140,Gigabyte GV-R797OC-3GD,unknown
141,Avalon ASIC,20
142,AVALON,20
143,Avalon,20
144,Avalon,20
145,BFGMiner,unknown
146,Avalon ASIC Batch 3,20
147,6950,129
148,6870,127
149,BFL ASIC,32
150,BFL 5 gh/s units,32
151,25 GH/s

 23%|██▎       | 3/13 [03:09<10:28, 62.84s/it]

row_index,hardware_name,hardware_index
201,K16 miner from 40 EUR,unknown
202,Avalon chips,unknown
203,Block Erupter USB,unknown
204,Bitfury ASIC,unknown
205,Avalon chips,unknown
206,K16 miner from 40 EUR,unknown
207,Avalon chips,unknown
208,K16 miner from 40 EUR,unknown
209,Terrahash DX,74
210,BFL 4 GH/s Chips,unknown
211,BFL Single SC Power Brick,unknown
212,Avalon ASIC chips,unknown
213,KNC SATURN,54
214,BFL 4 GH/s Chips,unknown
215,Spieder 8 Chips,unknown
216,KNCMiner,unknown
217,KNC Miner Saturn,54
218,Avalon Chip,unknown
219,XFX 6770,124
220,Jupiter KNCMiner,54
221,KNCMiner,unknown
222,ASICMINER USB Block Erupter,unknown
223,KNCMiner,unknown
224,ASICMiner Erupter USB,unknown
225,BFL asic,unknown
226,AVALON CHIPs,unknown
227,BFL(BitPay),unknown
228,K16 Miner Assembly,unknown
229,K16 from 40 EUR,unknown
230,Avalon Chips,unknown
231,Avalon chips,unknown
232,Avalon reference design from BitSyncom,unknown
233,KnC,unknown
234,BITFURY group buy,unknown
235,KNC Jupiter,54
236,Avalon,unkno

 31%|███       | 4/13 [04:13<09:32, 63.57s/it]

row_index,hardware_name,hardware_index
302,Avalon ASIC Chips,unknown
303,GPUs,unknown
304,ASICs,unknown
305,HP Superdome SX2000 Itanium 2,unknown
306,BitFury Asic Units,unknown
307,Cointerra Shares,unknown
308,GPUs,unknown
309,CPUs,unknown
310,Single efficient machine (20GH),unknown
311,ASICminer Erupter GB#7,unknown
312,ASICMiner blades,unknown
313,HashBug development board,unknown
314,GB BabyJet batch #2,unknown
315,KnCMiner,54
316,BitFury,34
317,ASICMiner USB,unknown
318,CoinCraft Desk,unknown
319,Bitmine,35
320,Cointerra,40
321,ASICMiner Block Erupters,unknown
322,HashFast,51
323,"BPMC ""RED FURY"" USB MINERS",unknown
324,HashFast,51
325,RedFury USB miner 2.2 - 2.7 GH/s,unknown
326,AM Erupters,unknown
327,HashFast Sierra,52
328,HashFast Flash Sale,unknown
329,VMC Board,unknown
330,Bitfury miner,34
331,ZTEX project ZtexBTCMiner-120703,unknown
332,HashFast Sierra,52
333,HashFast Sierra,52
334,Cointerra Terraminer IV,40
335,Sierra,52
336,Hash Fast Baby Jet,50
337,GB HashFast unit,unkn

 38%|███▊      | 5/13 [05:25<08:52, 66.55s/it]

row_index,hardware_name,hardware_index
403,batch #32/31,unknown
404,Plesk,unknown
405,14 USB,unknown
406,Blade miners,unknown
407,ASICMINER ERUPTER USB MINER,unknown
408,KnCMiner Neptune,55
409,BFL 4 GH/s Chips,unknown
410,R19X KnC Neptune,unknown
411,CoinTerra ASIC,40
412,BFL SC single,31
413,KnC,unknown
414,ASIC USB miner,unknown
415,AsicMiner Blade and Cube Miners,unknown
416,AntMiner S1,0
417,Block Erupters,unknown
418,KnCMiner Neptune,55
419,KnCMiner Neptune,55
420,2X NEPTUNE #1,unknown
421,KNC Jupiter,54
422,BFL singles,unknown
423,Bitfury 8board rig,unknown
424,KnC Neptune,55
425,AntMiner S1,0
426,Cubes,unknown
427,2 KnC Jupiters,unknown
428,Neptune,unknown
429,KnC Neptune,55
430,KNC Neptune,55
431,Black Arrow Prospero X-3,37
432,Terraminer IV,unknown
433,BitMine CoinCraft,unknown
434,KNC Neptune,55
435,BitMine CoinCraft,unknown
436,BitMine CoinCraft,unknown
437,KNC Neptune,55
438,Cointerra,unknown
439,Antminer U1 USB Erupter,unknown
440,Antminer U1 USB Erupter,unknown
441,Bitfu

 46%|████▌     | 6/13 [06:28<07:37, 65.38s/it]

row_index,hardware_name,hardware_index
504,Avalon mining machine,unknown
505,BitFury StarterKit,unknown
506,TerraMiner IV,40
507,TerraMiner IV 3,unknown
508,TerraMiner IV 2,unknown
509,Nvidia GeForce GTX 275,223
510,Nvidia GeForce GTX 580,240
511,CoinTerra,unknown
512,CoinTerra,unknown
513,CoinTerra,unknown
514,Bitmine CoinCraft,unknown
515,Fury's miner,unknown
516,TerraMiner IV,40
517,2,unknown
518,2,unknown
519,1,unknown
520,1,unknown
521,1,unknown
522,LA1THS Bitcoin miner,unknown
523,2,unknown
524,10,unknown
525,10,unknown
526,2,unknown
527,GRIDSEED MINER,unknown
528,1,unknown
529,1,unknown
530,1,unknown
531,1,unknown
532,1,unknown
533,1,unknown
534,10,unknown
535,2,unknown
536,1,unknown
537,1,unknown
538,2,unknown
539,1,unknown
540,2,unknown
541,1,unknown
542,1,unknown
543,2,unknown
544,1,unknown
545,10,unknown
546,10,unknown
547,2,unknown
548,Gridseed Dual Asic,unknown
549,1,unknown
550,Antminer,unknown
551,antminer asics,unknown
552,Asicminer USBs,unknown
553,Antminer U1,9
554,An

 54%|█████▍    | 7/13 [07:42<06:47, 67.93s/it]

row_index,hardware_name,hardware_index
605,R9 290,unknown
606,BFL,unknown
607,Ninja Group Buy #3 Hosted,unknown
608,NinjaGRP#7 800GH Rackmount Avalon3 Miner,unknown
609,Antminer U2 2Gh/s USB miner,10
610,AMD ATI Radeon 6300M,unknown
611,Monarch,30
612,S2,1
613,Cointerra,40
614,Dragon,unknown
615,Avalon miner,unknown
616,Technobit HEX8A1,unknown
617,Gridseed USB,unknown
618,BLADE,12
619,Antminer s1,0
620,bitcoin client exe,unknown
621,GUIMiner,unknown
622,Antminer S-1,0
623,Dragon Miner,unknown
624,Antminer S2,1
625,Antminer S2,1
626,A1 Coincraft,unknown
627,Antminer S2,1
628,Antminer S2,1
629,Antminer S2,1
630,Spondoolies Tech SP10,69
631,A1 Coincraft,unknown
632,Ant Miner,unknown
633,Antminer S2,1
634,VMC bitcoin miner,unknown
635,Corsair CX750M,unknown
636,SP-Tech SP15,unknown
637,ASIC chip,unknown
638,Rockminer R-BOX,63
639,Gridseed,unknown
640,Antminer U2,10
641,SP30,71
642,Avalon chips,unknown
643,asic antminer s1,0
644,Rockminer R-Box,63
645,Avalon3 Single Module-290,unknown
646,

 62%|██████▏   | 8/13 [08:46<05:33, 66.70s/it]

row_index,hardware_name,hardware_index
706,ASICMiner Prisma,14
707,Antminer S1 Conversion Kits,unknown
708,Bitfury Bare Chips,unknown
709,ASICMINER TUBE,16
710,Bitmaintech,unknown
711,Alpha Technology,unknown
712,Bitcoinrigs.org,unknown
713,KnC Jupiter,54
714,ASICMINER TUBE,16
715,Bitmine CoinCraft A1,unknown
716,Terraminers,unknown
717,KNC,unknown
718,Cointerra,unknown
719,Ant Miners,unknown
720,Spondoolies Tech SP10,69
721,Spondoolies Tech SP20,70
722,ANTMINER U3,11
723,ASICMiner Prisma,14
724,ASICMiner Prisma,14
725,KNC Neptune,55
726,Rock miner,unknown
727,Gridseeds,unknown
728,Fury,unknown
729,Ant S1,0
730,Lancelot,89
731,Icarus,87
732,usb miner,unknown
733,S-5,unknown
734,Antminer S5,5
735,AntMiner S3,2
736,Gridseed 5 Chip miners,unknown
737,Minera 0.4.0,unknown
738,AntMiner S5,5
739,R-BOX,unknown
740,Butterfly labs Jalapeo,unknown
741,Antminer S3,2
742,Hashblaster,unknown
743,Spondoolies Tech SP35,73
744,SP20,unknown
745,Corsair 750,unknown
746,evga 1300G2 Supernova,unknown
747,

 69%|██████▉   | 9/13 [09:52<04:26, 66.72s/it]

row_index,hardware_name,hardware_index
807,SIDEHACK STICK,unknown
808,Avalon Nano 3 USB,unknown
809,S5,5
810,Smart Miner 3.0,unknown
811,Radeon R9 290X,unknown
812,Antminer USB,unknown
813,S7,7
814,BM1384,unknown
815,avalon 4.1,unknown
816,antminer U2,10
817,sidehacks compacs,unknown
818,Antminer S5,5
819,Innosilicon A2 Terminator,unknown
820,Antminer S7,7
821,S5,5
822,Prismas,unknown
823,BFL 60,unknown
824,coincraft A1,unknown
825,technobit Hex miners,unknown
826,Dragon 1T,unknown
827,A1 miners,unknown
828,Bitmine Coincraft Desk V2,unknown
829,Dragon 1.5T,unknown
830,S5,5
831,Dragon miner,unknown
832,S3,2
833,Dragons,unknown
834,A3,unknown
835,21 Bitcoin Computer,unknown
836,Spondoolies Tech SP 20,70
837,Sidehack Stick,unknown
838,Sidehack Compac,unknown
839,Block Erupter 330MH/s,unknown
840,S3,2
841,SHA256d,unknown
842,CoincraftDesk,unknown
843,CoincraftDesk,unknown
844,CoincraftDesk,unknown
845,Gridseed Dual Asic Round 2,unknown
846,CoincraftDesk,unknown
847,CoincraftDesk,unknown
84

 77%|███████▋  | 10/13 [11:02<03:22, 67.61s/it]

row_index,hardware_name,hardware_index
908,S9,8
909,S7,7
910,Bitfury,unknown
911,Avalon,unknown
912,Antminer S9,8
913,AntMiner S7,7
914,Antiminers,unknown
915,gpu rig,unknown
916,S1,0
917,s7,7
918,s9,8
919,1600w PSU,unknown
920,HP 1200,unknown
921,Antminer R4,unknown
922,T9,unknown
923,bitcoin miner s5,5
924,usb miner,unknown
925,Avalon nano,unknown
926,old miner,unknown
927,AvalonMiner 721,23
928,A7,unknown
929,Avalon A6,22
930,FSP AURUM PRO 1200W 80+GOLD,unknown
931,HP 441830-001 1200W DL580 G5 DPS-1200FB A,unknown
932,DPS-2000,unknown
933,DPS-2500,unknown
934,IBM 2880,unknown
935,Sidehack 1200,unknown
936,GUI Miner,unknown
937,BITMAIN ANTMINERS,unknown
938,DIY PCI-e cables,unknown
939,GekkoScience,unknown
940,Antminer S7,7
941,S9 Board,unknown
942,S9,8
943,Pandaminer,unknown
944,Block Erupter USB,unknown
945,bm1385 chips,unknown
946,S5 chip,unknown
947,Stud hub from Klintay,unknown
948,s9s,8
949,Bitfury,unknown
950,B1 Avalon,unknown
951,Antminers,unknown
952,PSU,unknown
953,EVGA1600

 85%|████████▍ | 11/13 [12:23<02:23, 71.65s/it]

row_index,hardware_name,hardware_index
1009,inline ethernet surge protector,unknown
1010,server PSUs,unknown
1011,fiber link,unknown
1012,surge arrestor,unknown
1013,APW3,unknown
1014,ATX psu,unknown
1015,Avalon 6,22
1016,Antminer S9,8
1017,HP Server Power Supplies,unknown
1018,Antminer S9,8
1019,BTMine v1 Miner,unknown
1020,Antminer s2,1
1021,Antminer s1,0
1022,Halong Mining DragonMint T1,unknown
1023,Antminer S9,8
1024,Avalon 741,24
1025,Antminer S9,8
1026,Innosilicon T2,unknown
1027,S9,8
1028,BFGMiner,unknown
1029,CGMiner,unknown
1030,CGWatcher 1.4.0,unknown
1031,Antminer S9,8
1032,Antminer S7,7
1033,$5600.00,unknown
1034,S9 Rackmount Self,unknown
1035,Antminer T9,unknown
1036,Antminer S15,unknown
1037,Antminer S9,8
1038,Whatsminer M20S,unknown
1039,S17,unknown
1040,T2,unknown
1041,T30,unknown
1042,T15,unknown
1043,S9k,unknown
1044,S15,unknown
1045,Delta QFR1212GHE,unknown
1046,T9+,unknown
1047,Beagleboard,unknown
1048,Noctua industrialPPC,unknown
1049,Avalon841,unknown
1050,Antmine

 92%|█████████▏| 12/13 [13:32<01:10, 70.85s/it]

row_index,hardware_name,hardware_index
1110,s9,8
1111,Avalon 1047,unknown
1112,Black arrow Prospero X1,unknown
1113,Asic S19j Pro,unknown
1114,Antminer S9,8
1115,S9,8
1116,S17 Pro,unknown
1117,Whatsminers,unknown
1118,L3,unknown
1119,Antminer S19,unknown
1120,S17,unknown
1121,S19J Pro 104T,unknown
1122,Avalon A1166 81T,unknown
1123,S17+,unknown
1124,S17,unknown
1125,Canaan Avalon 1246,unknown
1126,Antminer s9,8
1127,cpuminer,unknown
1128,M20s,unknown
1129,A1246,unknown
1130,S17,unknown
1131,Antminer,unknown
1132,Butterfly Labs,unknown
1133,S9,8
1134,S9k,unknown
1135,Compac F,unknown
1136,Apollo BTC,unknown
1137,T17,unknown
1138,Whatsminer,unknown
1139,Ebang Ebit 10.1,unknown
1140,T17,unknown
1141,S19 XP,unknown
1142,T19 Hydro,unknown
1143,741,24
1144,Apollo BTC,unknown
1145,A1066,unknown
1146,A1246 Series,unknown
1147,A1166,unknown
1148,A721,23
1149,821,26
1150,1047,unknown
1151,A10 Series,unknown
1152,A1246 93TH,unknown
1153,1041,unknown
1154,921,unknown
1155,A721,23
1156,841,unknown


100%|██████████| 13/13 [13:41<00:00, 63.19s/it]

row_index,hardware_name,hardware_index
1211,Antminer S15,unknown
1212,Goldshell KD2,unknown
1213,bitaxeUltra,unknown
1214,insilicon A11 pro,unknown
1215,Microbt,unknown
1216,Antminer S21,unknown
1211,Antminer S15,unknown
1212,Goldshell KD2,unknown
1213,bitaxeUltra,unknown
1214,insilicon A11 pro,unknown
1215,Microbt,unknown
1216,Antminer S21,unknown





































































































row_index,hardware_name,hardware_index
0,5870,118
1,6870,127
2,poclbm,unknown
3,HD5970,119
4,5870,118
5,5850,117
6,5870,118
7,Intel Graphics Media Accelerator GMA 4500M,unknown
8,5970,119
9,5870,118
10,Radeon 5970,119
11,Amazon EC2 Cluster GPU Instances,unknown
12,Nvidia 295,unknown
13,Thermaltake W0133RU ToughPower 1200W Modular Power Supply,unknown
14,iPhone,unknown
15,5870 bios,118
16,3x5850,117
17,ToughPower 1350W supply,unknown
18,non-reference Sapphire 5870,118
19,graphics cards,unknown
20,reference 5850,117
21,Intel E6300,unknown
22,In




# join the tables

In [37]:
name_index = pd.read_csv('datasets/extracted/name_index.csv') # columns: row_index,hardware_name,hardware_index
hardware_full = pd.read_csv('hardware_full.csv') # columns: hardware_name,hashrate,efficiency,hardware_index
price_only_cleaned = pd.read_csv('datasets/extracted/price_only_cleaned.csv') # columns: row_index,hardware_price
date_only = pd.read_csv('datasets/extracted/date_only.csv') # columns: row_index,date

# left join name_index and price_only_cleaned on row_index
df = pd.merge(name_index, price_only_cleaned, on='row_index', how='left')

# left join df and date_only on row_index
df = pd.merge(df, date_only, on='row_index', how='left')

# replace "unknown" with -1
df["hardware_index"]  = df["hardware_index"].replace("unknown", -1)

# left join df and hardware_full on hardware_index
df["hardware_index"] = df["hardware_index"].astype(int)
hardware_full["hardware_index"] = hardware_full["hardware_index"].astype(int)
hardware_full = hardware_full.drop(columns=['hardware_name'])
df = pd.merge(df, hardware_full, on='hardware_index', how='left')

# save it
df.to_csv('datasets/extracted/full_table.csv', index=False)

In [38]:
df.sample(20)

Unnamed: 0,row_index,hardware_name,hardware_index,hardware_price,date,hashrate,efficiency
951,951,Antminers,-1,,2017-05-23,,
371,371,BLACK ARROW Bullet Run,-1,123$,2013-10-28,,
744,744,SP20,-1,,2015-02-13,,
558,558,hex miner 15 Gigahashes USB Stick,-1,,2014-03-12,,
143,143,Avalon,20,,2013-03-26,300000.0,unknown
725,725,KNC Neptune,55,0.6BTC,2014-11-20,300000.0,1429
635,635,Corsair CX750M,-1,129.99$,2014-06-04,,
475,475,P2Pool,-1,,2014-01-15,,
721,721,Spondoolies Tech SP20,70,,2014-11-10,170000.0,1545
871,871,r7 370,-1,,2016-03-01,,


# fix btc prices

In [39]:
df = pd.read_csv('datasets/extracted/full_table.csv')
df['hardware_price'] = df['hardware_price'].astype(str)

In [40]:
from torch import save, load
date_to_btc_price = load('date_open_dict.pt')

In [41]:
# for every row where hardware_price contains "BTC", get the price in dollars using the dict and the amount of BTC
for index, row in df.iterrows():
    if "BTC" in row['hardware_price']:
        btc_price = row['hardware_price'].replace(",", "").split("BTC")[0].strip()
        try:
            btc_price = float(btc_price)
        except:
            btc_price = -1
        date = row['date']
        
        # get the price in dollars
        dollars_price = date_to_btc_price[date] * btc_price
        dollars_price = round(dollars_price, 3)

        # replace the price in the dataframe
        if not btc_price == -1:
            df.at[index, 'hardware_price'] = f"{dollars_price}$"
        else:
            df.at[index, 'hardware_price'] = f""

# drop rows where hardware_price contains €
df = df[df['hardware_price'].str.contains("€") == False]

#remove all characters except for numbers and .
df['hardware_price'] = df['hardware_price'].apply(lambda x: re.sub(r"[^0-9.]", "", x).strip())

#columns are date,hardware_name,hardware_price,hashrate,efficiency
#they should be date,hardware_name,hardware_price_usd,Mhash/s/$,Mhash/J
#rename columns
df = df.rename(columns={"hardware_price": "hardware_price_usd", "hashrate": "Mhash/s/$", "efficiency": "Mhash/J"})

# save the dataframe
df.to_csv('datasets/extracted/final.csv', index=False)
        