In [17]:
import pandas as pd
import os
import gzip
import pickle
import openai
import torch.nn.functional as F
import torch

In [18]:
categories = [
    "groupbuys",
    "hardware",
    "miners",
    "mining",
    "mining_support",
    "pools",
]

df = pd.DataFrame()

#load every csv in the folder and append them
for cat in categories:
    with gzip.open('cleaned-data/'+cat+'.pkl.gz', 'rb') as f:
        df_cat = pickle.load(f)
        df_cat['category'] = cat
        df = pd.concat([df, df_cat], ignore_index=True)
    

In [19]:
def get_openai_response(prompt, tokens = 2000, model="gpt-3.5-turbo-instruct"):
    if model.__contains__("instruct"):
        response = openai.Completion.create(
            model=model,
            prompt=prompt,
            temperature=0,
            max_tokens=tokens,
            top_p=1,
            logit_bias = {
                "198": -100, # new lines
                },
            logprobs= 15,
        )
        text = response.choices[0].text

        return text
    else:
        response = openai.ChatCompletion.create(
            model=model,
            messages=[
                {
                    "role": "system",
                    "content": "You are a helpful assistant"
                },
                {
                    "role": "user",
                    "content": prompt
                }
                ],
            temperature=0,
            max_tokens=tokens,
            top_p=1,
        )
        text = response.choices[0].message.content


        # print("returning ai text:", text)
        return text

# hardware name

In [20]:
prompt = """
You are a bitcoin mining expert.

The will be given a thread from a bitcoin forum.

Your task is to analyze if the thread contains a name of mining hardware. 

Example hardware names are ARM Cortex A9, X6500, AvalonMiner 1, Jupiter, RockerBox, BE300, PickAxe, Antminer S9 (there are many more)
Sometimes common hardware is mentioned by its number only. For example, "5870" may refer to a Radeon card. "S9" may refer to the Antminer S9. Those are also valid hardware names.

After reading the thread, write either "Hardware found: Nothing" or "Hardware found: <hardware name>".

The thread:
""".strip()

# If the thread contains a name of mining hardware, say "Yes", followed by its name. Otherwise, say "No".

# Start by listing potential hardware names you saw in the thread. Then verify if those names are actually hardware names or just mining pools/cloud providers. Finally, write one more line with either "Hardware found: Nothing" or "Hardware found: <hardware name>".

dataset = pd.DataFrame()

for (id,row) in df.sample(100).iterrows():
    if(len(row["post"]) < 100):
        continue
    # continue

    thread = ""
    # thread += "Category: " + row["category"] + "\n"
    thread += "Topic: " + row["topic"] + "\n"
    thread += "Date: " + row["dates"].split("<sep>")[-1][:7] + "\n\n"
    thread += "### Original post:\n"
    i = 1
    for (post, date) in zip(row["post"].split("<sep>"), row["dates"].split("<sep>")):
        if len(post) > 800:
            thread += post[:800] + "<rest of post truncated>\n\n"
            thread += f"### Reply {i}:\n"
            i += 1
        elif len(post) < 5:
            pass
        else:
            thread += post + "\n\n"
            thread += f"### Reply {i}:\n"
            i += 1
    #remove the last line
    thread = thread[:-len(f"### Reply {i-1}:\n")]
    if len(thread) > 4000:
        thread = thread[:4000] + "<rest of thread truncated>\n"
    
    print(thread)

    actual_prompt = prompt + "\n" + thread + "\n\n\n\n\n\n\n\n" + ""
    # print(actual_prompt)

    response = get_openai_response(actual_prompt, tokens = 20)
    print("response:", response, "\n\n\n\n\n\n")

    dataset = pd.concat([dataset, pd.DataFrame([{
        "hardware_name": response,
        }])], ignore_index=True)
    
print(dataset.value_counts())

#save the dataset
dataset.to_parquet("datasets/hardware_name.parquet", index=True)

Topic: Interest in a GPU Group Buy?
Date: 2014-03

### Original post:
Hi guys,Ben from BuyAHash.com I am curious about this, as I've been in talks with a few people. Stores are mostly sold out of GPUs, or there are limits on what you can buy. I may have a way to purchase cards direct from the factory. The cards wouldn't be labeled nor have official warranties, but they are from OEM manufacturers like MSI, Sapphire, and the like. The idea would be to maybe go in and purchase 500 - 1000 to negotiate a favorable price. I am not sure what the actual costs are going to be, but they'd be below NewEgg prices, but wouldn't include the AMD promo cards. My estimate (and please do not quote me) would be about $250 + S&H per GPU in the US. Payments would be via crypto and credit cards. Would this be something that BitcoinTalk users would be interested in?

### Reply 1:
Depends on how fast delivery will be. With dedicated scrypt machines coming out soon (hopefully), gpu mining may not be viable unl

# testing