# 1) Installing dependencies


In [None]:
!pip install -qU \
  pinecone-client \
  sentence-transformers==2.2.2

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/179.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.4/179.4 kB[0m [31m1.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m179.4/179.4 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/86.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m58.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.5/62.5 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m300.4/300.4 kB[0m [

# 2) Open knowledge base dataset and format appropriately

In [None]:
import json

with open("/content/knowledge-base.jsonList", 'r') as file:
    data = json.load(file)

formatted_data = list(map(lambda entry: f"{entry}: class {data[entry]}", data))

print(formatted_data[:5])


['Ready-to-eat meals: class I', 'Canned goods: class I', 'Energy bars: class I', 'Crackers and biscuits: class I', 'Trail mix: class I']


# 3) Generate embeddings from knowledge base


In [None]:
from sentence_transformers import SentenceTransformer
import torch

device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = SentenceTransformer('all-MiniLM-L6-v2', device=device)

embeddings = [{'id': str(idx), 'values': model.encode(sentence).tolist(), 'metadata': {'text': sentence}} for idx, sentence in enumerate(formatted_data)]

dimensions = len(embeddings[0]['values'])
print(f'Embedding dimensions: {dimensions}')
print(embeddings[:1])

Embedding dimensions: 384
[{'id': '0', 'values': [-0.018903309479355812, 0.014547097496688366, 0.0018049427308142185, 0.07448364794254303, -0.07618480175733566, 0.02340506762266159, 0.028933241963386536, -0.07292620092630386, -0.05023764818906784, 0.0039814976043999195, 0.0321856290102005, -0.0010382087202742696, -0.057538967579603195, -0.043087951838970184, 0.02150295116007328, -0.07702628523111343, 0.15172737836837769, -0.09067443013191223, -0.020970914512872696, 0.004081401042640209, -0.030665768310427666, 0.05587448179721832, 0.006449646782130003, 0.02709435671567917, -0.006290411576628685, 0.00036694499431177974, 0.08656466007232666, -0.06687478721141815, -0.024677075445652008, -0.03569306805729866, -0.032425571233034134, -0.033530570566654205, 0.04410792514681816, 0.030293527990579605, -0.06449401378631592, 0.07058001309633255, 0.13439196348190308, -0.040091607719659805, -0.037033457309007645, 0.016013646498322487, -0.01172659732401371, -0.08185499906539917, -0.0439860038459301, 

# 4) Upsert embeddings to Pinecone

In [None]:
import pinecone

pinecone.init(api_key="your-pinecone-api-key", environment="your-pod-env")
index = pinecone.Index("your-index")

response = index.upsert(embeddings)

print(response)

{'upserted_count': 320}


# 5) Load test dataset

In [None]:
import json

with open('/content/test-data.jsonList', 'r') as file:
    test_data = json.load(file)

# 6) Classifying test data with gpt-3.5-turbo-16k:
- Define system message.
- For each entry in test dataset
  1.   Retrieve similar examples from Pinecone
  2.   Prompt model using COT.
  3.   Await response and push to list

In [None]:
sys_msg_cot = f'''
You are a helpful assistant that classifies supplies into an appropriate class (I, II, III, IV, V, VI, VII, VIII, IX, X).

Furthermore, you know that the guidelines for assigning a class are the following:
Class I – Rations – Subsistence (food and drinking water), gratuitous (free) health and comfort items.
Class II – Clothing And Equipment – individual equipment, tentage, some aerial delivery equipment, organizational tool sets and kits, hand tools, unclassified maps, administrative and housekeeping supplies and equipment.
Class III – POL – Petroleum, Oil and Lubricants (POL) (package and bulk): Petroleum, fuels, lubricants, hydraulic and insulating oils, preservatives, liquids and gases, bulk chemical products, coolants, deicer and antifreeze compounds, components, and additives of petroleum and chemical products, and coal.
Class IV – Construction materials, including installed equipment and all fortification and barrier materials.
Class V – Ammunition of all types, bombs, explosives, mines, fuses, detonators, pyrotechnics, missiles, rockets, propellants, and associated items.
Class VI – Personal demand items (such as health and hygiene products, soaps and toothpaste, writing material, snack food, beverages, cigarettes, batteries, alcohol, and cameras—nonmilitary sales items).
Class VII – Major end items such as launchers, tanks, mobile machine shops, some parachute systems and vehicles.
Class VIII – Medical material (equipment and consumables) including repair parts particular to medical equipment. (Class VIIIa – Medical consumable supplies not including blood & blood products; Class VIIIb – Blood & blood components (whole blood, platelets, plasma, packed red cells, etc.).
Class IX – Repair parts and components to include kits, assemblies, and subassemblies (repairable or non-repairable) required for maintenance support of all equipment.
Class X – Material to support nonmilitary programs such as agriculture and economic development (not included in Classes I through IX).
Miscellaneous – Water, salvage, and captured material.
'''

sys_msg_tot = f'''
You are a helpful assistant that classifies supplies into an appropriate class (I, II, III, IV, V, VI, VII, VIII, IX, X).

When you answer, imagine two different experts are answering this question. Both experts will consider the examples provided by the user, class guidelines, and item characteristics. They will write down one step of their thinking and then share it with the group. When one expert realizes they made a mistake, they will backtrack and explore other possible solution paths. Finally, all experts will agree on a result. For example:

User asks: "Given that Jerry Cans are class II, classify Tuna Cans".

You answer:
"Expert 1: Cans can be considered equipment, indicating class II.
Expert 2: Tuna indicates that we are dealing with food, indicating this could be class II.
Expert 1: I see my mistake; these are tuna cans, which means their purpose is to provide food. This indicates we are dealing with class I rations.
Expert 2: We could also be dealing with class VI (personal items like snacks), but tuna is not usually eaten as a snack, which indicates this item is class I.
All experts agree that this item is class I."

Furthermore, you know that the guidelines for assigning a class are the following:
Class I – Rations – Subsistence (food and drinking water), gratuitous (free) health and comfort items.
Class II – Clothing And Equipment – individual equipment, tentage, some aerial delivery equipment, organizational tool sets and kits, hand tools, unclassified maps, administrative and housekeeping supplies and equipment.
Class III – POL – Petroleum, Oil and Lubricants (POL) (package and bulk): Petroleum, fuels, lubricants, hydraulic and insulating oils, preservatives, liquids and gases, bulk chemical products, coolants, deicer and antifreeze compounds, components, and additives of petroleum and chemical products, and coal.
Class IV – Construction materials, including installed equipment and all fortification and barrier materials.
Class V – Ammunition of all types, bombs, explosives, mines, fuses, detonators, pyrotechnics, missiles, rockets, propellants, and associated items.
Class VI – Personal demand items (such as health and hygiene products, soaps and toothpaste, writing material, snack food, beverages, cigarettes, batteries, alcohol, and cameras—nonmilitary sales items).
Class VII – Major end items such as launchers, tanks, mobile machine shops, some parachute systems and vehicles.
Class VIII – Medical material (equipment and consumables) including repair parts particular to medical equipment. (Class VIIIa – Medical consumable supplies not including blood & blood products; Class VIIIb – Blood & blood components (whole blood, platelets, plasma, packed red cells, etc.).
Class IX – Repair parts and components to include kits, assemblies, and subassemblies (repairable or non-repairable) required for maintenance support of all equipment.
Class X – Material to support nonmilitary programs such as agriculture and economic development (not included in Classes I through IX).
Miscellaneous – Water, salvage, and captured material.
'''


In [None]:
!pip install --upgrade openai

Collecting openai
  Downloading openai-1.3.7-py3-none-any.whl (221 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/221.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.2/221.4 kB[0m [31m2.7 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m221.4/221.4 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.25.2-py3-none-any.whl (74 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.0/75.0 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)
  Downloading httpcore-1.0.2-py3-none-any.whl (76 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.9/76.9 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)
  Downloading h11-0.14.0-py3

In [None]:
from openai import OpenAI
client = OpenAI(api_key='your-openai-api-key')


def prompt_model(item, examples, cot):
  cot_prompt = f'Make sure to explain your reasoning while you think, step by step'
  prompt = f'Consider these examples: {examples[0]}, {examples[1]} and {examples[2]}. Please classify {item} accordingly.'
  response = client.chat.completions.create(
      model="your-model-goes-here",
      messages=[
          {"role": "system", "content": sys_msg_cot if cot else sys_msg_tot},
          {"role": "user", "content": prompt + cot_prompt if cot else prompt}
      ]
  )
  return response

In [None]:
import re

def get_class_from_response(text):
    # Find all instances of the word "class" in the text
    class_positions = [match.end() for match in re.finditer(r'\bclass\b', text, flags=re.IGNORECASE)]

    # If there are no occurrences of "class", return None
    if not class_positions:
        return None

    # Find the position of the last occurrence of "class"
    last_class_position = max(class_positions)

    # Extract any sequence of roman numerals after the last "class" occurrence
    match = re.search(r'\b(?:I|II|III|IV|V|VI|VII|VIII|IX|X)+\b', text[last_class_position:])
    if match:
        return match.group()

    # If no roman numerals are found after the last "class", return None
    return None

In [None]:
from sentence_transformers import SentenceTransformer
import pinecone
import torch
import time

device = 'cuda' if torch.cuda.is_available() else 'cpu'

model = SentenceTransformer('all-MiniLM-L6-v2', device=device)

pinecone.init(api_key="", environment="gcp-starter")

index = pinecone.Index("nlp-final-project")

def test(data, cot):

  predictions = []

  for key in test_data:
    query = model.encode(key).tolist()
    query_results = index.query(query, top_k=3, include_metadata=True, include_values=True)


    examples = list(map(lambda result: result['metadata']['text'], query_results['matches']))

    gpt_response = prompt_model(key, examples, cot)
    result = gpt_response.choices[0].message.content

    label = get_class_from_response(result)
    predictions.append(label)

  return predictions

In [None]:
predictions_cot = test(test_data, True)
predictions_tot = test(test_data, False)

print(predictions_cot)
print(predictions_tot)

['I', None, 'I', 'I', 'I', 'II', 'VII', 'VIII', 'X', 'VI', 'III', 'X', None, 'III', 'III', 'IV', 'IV', 'IV', 'IV', 'IV', 'V', 'V', 'V', 'V', 'V', 'VI', 'VI', 'I', 'VI', 'VI', 'VII', 'II', 'VII', 'VII', 'VII', None, 'VIII', None, None, None, 'IX', 'IX', 'II', 'IX', 'X', 'X', 'X', 'X', 'I', None]
['I', 'VI', 'I', 'I', 'I', 'II', 'II', 'II', 'VII', None, 'III', 'III', 'III', 'III', 'III', 'IV', 'IV', 'IV', 'IV', 'IV', 'V', 'V', 'V', 'V', 'V', 'VI', 'I', 'VII', 'VI', 'VI', 'VII', 'II', 'VII', 'IV', 'VII', 'I', 'VIII', None, None, None, 'IX', 'III', 'IX', 'IX', 'IX', 'X', 'X', 'X', 'X', 'II']


In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def correctness(predictions, data):
    clean_predictions = []
    clean_y_true = []

    for pred, true_label in zip(predictions, data.values()):
        if pred is None:
            continue

        clean_predictions.append(pred)
        clean_y_true.append(true_label)

    accuracy = accuracy_score(clean_y_true, clean_predictions)

    # Calculate precision, recall, and F1 score
    precision = precision_score(clean_y_true, clean_predictions, average='weighted', zero_division=1)
    recall = recall_score(clean_y_true, clean_predictions, average='weighted', zero_division=1)
    f1 = f1_score(clean_y_true, clean_predictions, average='weighted', zero_division=1)

    return {
        'accuracy': round(accuracy * 100, 2),
        'precision': round(precision * 100, 2),
        'recall': round(recall * 100, 2),
        'f1_score': f1
    }

In [None]:
correctness_cot = correctness(predictions_cot, test_data)
print(f'Correctness using COT prompting: {correctness_cot}')

Correctness using COT prompting: {'accuracy': 74.42, 'precision': 79.84, 'recall': 74.42, 'f1_score': 0.7448504983388705}


In [None]:
correctness_tot = correctness(predictions_tot, test_data)
print(f'Correctness using TOT prompting: {correctness_tot}')

Correctness using TOT prompting: {'accuracy': 78.26, 'precision': 82.21, 'recall': 78.26, 'f1_score': 0.7742643829600351}
