In [None]:
import requests
import json
from tqdm.notebook import tqdm

from pathlib import Path
base_dir = Path.cwd()

def ollama_completion(prompt, model="llama3.1:latest", host="http://localhost:11434"):
     url = f"{host}/api/generate"
     payload = {
        "model": model,
        "prompt": prompt,
        "stream": False
     }
     response = requests.post(url, json=payload)
     return response.json()['response']

with open(base_dir / "process_chains.csv") as f:
    file_text= f.read()

commands=[row.split(',')[-1] for row in file_text.splitlines()]

prompt_template=""" This prompt includes the command from a PID. \n
                    Please analyize this command and come up with a simple classification of the command (what is it doing or what is it for). \n
                    Return only a short string classification of the command. No additional reasoning or explanation should be provided. \n
                    If you cannot classify the command or if a command is not provided, return only the string 'unknown'. \n
                    The command is: """



In [None]:
### Generate initial PID classifications
responses=[]
for command in tqdm(commands[1:]):
    prompt=prompt_template+command
    response=("\""+ollama_completion(prompt)+"\"").replace('""','"')
    responses.append((response,command))

    with open(base_dir / "command_classifications.txt","a") as f:
        f.write(f"{response},{command}\n")


In [None]:
### Retrieve initial PID classifications 
with open(base_dir / "command_classifications.txt") as f:
    file_text= f.read()

classifications=[row.split(',')[0] for row in file_text.splitlines()]

In [None]:
### Simplify PID classifications with additional ollama call

prompt_template="""this prompt includes a series of classifications associated with PID commands. \n
                   Please analyze the classifications and come up with a more concise list of unique classifications. \n
                   Respond with ***ONLY*** a list of unique classifications, separated by commas.\n
                   You must not include any additional text, reasoning or explanation. \n
                   The classifications are: """

prompt=prompt_template+','.join(classifications)

simplfied_classifications=ollama_completion(prompt)

simplified_classifications_list=[cls.strip() for cls in simplfied_classifications.split(',')]

In [None]:
responses=[]
for command in tqdm(commands[1:]):

    prompt="""This prompt includes the command from a PID and a list of possible classifications. \n
                    Please analyize the command and assign it to one of the classifications from the list. \n
                    Return only the classification that best fits the command. You must provide no additional text, reasoning or explanation. \n
                    If you cannot classify the command or if a command is not provided, return only the string 'unknown'. \n
                    The command is: """ + command + """ \n
                    The classifications are: """ + ','.join(simplified_classifications_list) 
                    
    response=("\""+ollama_completion(prompt)+"\"").replace('""','"')
    responses.append((response,command))
    
    with open(base_dir / "command_normalized_classifications.txt","a") as f:
        f.write(f"{response},{command}\n")

  0%|          | 0/734 [00:00<?, ?it/s]