In [1]:
import pandas as pd
import os
import gzip
import pickle
import openai
import torch.nn.functional as F
import torch

In [2]:
categories = [
    "groupbuys",
    "hardware",
    "miners",
    "mining",
    "mining_support",
    "pools",
]

df = pd.DataFrame()

#load every csv in the folder and append them
for cat in categories:
    with gzip.open('cleaned-data/'+cat+'.pkl.gz', 'rb') as f:
        df_cat = pickle.load(f)
        df_cat['category'] = cat
        df = pd.concat([df, df_cat], ignore_index=True)
    

In [3]:
def get_openai_response(prompt, tokens = 2000, model="gpt-3.5-turbo-instruct"):
    if model.__contains__("instruct"):
        response = openai.Completion.create(
            model=model,
            prompt=prompt,
            temperature=0,
            max_tokens=tokens,
            top_p=1,
            logit_bias = {
                "198": -100, # new lines
                },
            logprobs= 15,
        )
        text = response.choices[0].text

        return text
    else:
        response = openai.ChatCompletion.create(
            model=model,
            messages=[
                {
                    "role": "system",
                    "content": "You are a helpful assistant"
                },
                {
                    "role": "user",
                    "content": prompt
                }
                ],
            temperature=0,
            max_tokens=tokens,
            top_p=1,
        )
        text = response.choices[0].message.content


        # print("returning ai text:", text)
        return text

# hardware name

In [4]:
path = "datasets/extracted/"
file_name = "hardware_name.csv"


if not os.path.exists(path+file_name):
    prompt = """
    You are a bitcoin mining expert.

    The will be given a thread from a bitcoin forum.

    Your task is to analyze if the thread contains a name of mining hardware. 

    Example hardware names are ARM Cortex A9, X6500, AvalonMiner 1, Jupiter, RockerBox, BE300, PickAxe, Antminer S9 (there are many more)
    Sometimes common hardware is mentioned by its number only. For example, "5870" may refer to a Radeon card. "S9" may refer to the Antminer S9. Those are also valid hardware names.

    After reading the thread, write either "Hardware found: Nothing" or "Hardware found: <hardware name>".

    The thread:
    """.strip()

    dataset = pd.DataFrame()

    thread_count = 0
    for (id,row) in df.sample(10000).iterrows():
        if(len(row["post"]) < 100):
            continue

        date = row["dates"].split("<sep>")[-1]

        thread = ""
        # thread += "Category: " + row["category"] + "\n"
        thread += "Topic: " + row["topic"] + "\n"
        thread += "Date: " + date[:7] + "\n\n"
        thread += "### Original post:\n"
        i = 1
        for (post, date) in zip(row["post"].split("<sep>"), row["dates"].split("<sep>")):
            if len(post) > 800:
                thread += post[:800] + "<rest of post truncated>\n\n"
                thread += f"### Reply {i}:\n"
                i += 1
            elif len(post) < 5:
                pass
            else:
                thread += post + "\n\n"
                thread += f"### Reply {i}:\n"
                i += 1
        #remove the last line
        thread = thread[:-len(f"### Reply {i-1}:\n")]
        if len(thread) > 4000:
            thread = thread[:4000] + "<rest of thread truncated>\n"
        
        print(thread)

        actual_prompt = prompt + "\n" + thread + "\n\n\n\n\n\n\n\n" + ""
        # print(actual_prompt)

        response = get_openai_response(actual_prompt, tokens = 20)
        print("response:", response, "\n\n\n\n\n\n")

        if "Nothing" in response:
            continue

        dataset = pd.concat([dataset, pd.DataFrame([{
            "date": date[:-9],
            "hardware_name": response.replace("Hardware found: ", ""),
            }])], ignore_index=True)

        thread_count += 1  
        if thread_count > 20:
            break
        
    print(dataset.value_counts())

    #save the dataset
    dataset.to_csv(path+file_name, index=True)

Topic: Custom firmware for avalon?
Date: 2021-10

### Original post:
Hi people,Anyone have custom firmware for avalon asic? I have 1166 and 1246 asic, but with the original firmware we cant custom anythingThanks

### Reply 1:
Incorrect. The Avalon's are highly tweakable. Ref their privileged API user manual

### Reply 2:
eh not really we got them to open their api a few years ago and found they are really lacking a lot of useful stuff but it's a good start.

### Reply 3:
Bull. From day-1 the Avalons have always been open with full source code available on their git. As a matter of fact, when Canaan introduced the Avalons Kano wrote their API.Now their most recent miners starting with the A10, ja there are less things to tweak and Canaan made it a little harder to find what tweaks you can do but it is still open. Now, that's not to say that the tweaking process can't be improved, after all lots of folks just don't get using a CLI however, ain't no way in hell would I pay for a GUI-based

# hardware price

In [5]:
file_name = "hardware_price.csv"

if not os.path.exists(path+file_name):
    prompt = """
    You are a bitcoin mining expert.

    The will be given a thread from a bitcoin forum.

    Your task is to analyze if the thread mentions a hardware price.

    After reading the thread, write either "Hardware price found: Nothing" or "Hardware price found: <hardware price>".

    The thread:
    """.strip()

    dataset = pd.DataFrame()

    thread_count = 0
    for (id,row) in df.sample(10000).iterrows():
        if(len(row["post"]) < 100):
            continue
        # continue

        date = row["dates"].split("<sep>")[-1]

        thread = ""
        # thread += "Category: " + row["category"] + "\n"
        thread += "Topic: " + row["topic"] + "\n"
        thread += "Date: " + date[:7] + "\n\n"
        thread += "### Original post:\n"
        i = 1
        for (post, date) in zip(row["post"].split("<sep>"), row["dates"].split("<sep>")):
            if len(post) > 800:
                thread += post[:800] + "<rest of post truncated>\n\n"
                thread += f"### Reply {i}:\n"
                i += 1
            elif len(post) < 5:
                pass
            else:
                thread += post + "\n\n"
                thread += f"### Reply {i}:\n"
                i += 1
        #remove the last line
        thread = thread[:-len(f"### Reply {i-1}:\n")]
        if len(thread) > 4000:
            thread = thread[:4000] + "<rest of thread truncated>\n"

        # if not "$" in thread.lower():
        #     continue

        whitelist = [
            "$",
            "usd",
            "dollar",
            "€",
            "eur",
            "euro",
            "£",
            "gbp",
            "pound",
            "yen",
            "jpy",
            "cny",
            "rmb",
            "yuan",
            "ruble",
        ]

        if not any([x in thread.lower() for x in whitelist]):
            continue

          
        print(thread)

        actual_prompt = prompt + "\n" + thread + "\n\n\n\n\n\n\n\n" + ""
        # print(actual_prompt)

        response = get_openai_response(actual_prompt, tokens = 20)
        print("response:", response, "\n\n\n\n\n\n")

        if "Nothing" in response:
            continue

        dataset = pd.concat([dataset, pd.DataFrame([{
            "date": date[:-9],
            "hardware_price": response.replace("Hardware price found: ", "").replace("<hardware price>", "").strip(),
            }])], ignore_index=True)
        
        thread_count += 1  
        if thread_count > 20:
            break
        
    print(dataset.value_counts())

    #save the dataset
    dataset.to_csv(path+file_name, index=True)

Topic: Bitburner Fury - now for sale
Date: 2013-09

### Original post:
Is the price of EUR 699,- including VAT or without VAT?

### Reply 1:
No VAT will be applied for people outside the EU. For EU customers a 21% VAT will be applied. If youre running a business within the EU and have a valid VAT identification number then you dont have to pay VAT.

### Reply 2:
Without Risky proposition. Might make some money in week 1, then watch the difficulty rocket. Depends on KNC.

### Reply 3:
What is this "external clock" ? Is it provided on the board, or is it just a connector where the customer can provide a clock ? Does every chip have its own external clock, or are they chained ? "Demonstrated performance of 64GH/s per board" - Does this mean you have a working board with 16 chips, each doing 4GH/sec, or is it an extrapolation from the one test chip ?

### Reply 4:
Paid by SEPA - says I have to send proof of payment within 24 hours - where do I send proof to? Can't find an e-mail address an