In [1]:
from win32cryptcon import szOID_X957

from llama3_8b.PNA_Predictions import predictions

fs_examples_1 = [
    f"Example 1. The sentence 'Didn't have no radio home.' is Multiple Negation.",
    f"Example 2. The sentence 'And I don't remember no kind of bad stuff.' is an example of Multiple Negation.",
    f"Example 3. The sentence 'And knowing now that ain’t nobody got to give you nothing.' is an example of Multiple Negation.",
    f"Example 4. The sentence 'Not saying they don't do it.' is an example of Not Multiple Negation.",
    f"Example 5. The sentence 'And in his book he says his Word shall never change.' is an example of Not Multiple Negation.",
    f"Example 6. The sentence 'Because he ain't been back to finish yet.' is an example of Not Multiple Negation."
]
fs_examples_2 = [
    f"Example 1. The sentence 'Didn't have no radio home.' is Multiple Negation.",
    f"Example 2. The sentence 'Not saying they don't do it.' is an example of Not Multiple Negation.",
    f"Example 3. The sentence 'And I don't remember no kind of bad stuff.' is an example of Multiple Negation.",
    f"Example 4. The sentence 'And in his book he says his Word shall never change.' is an example of Not Multiple Negation.",
    f"Example 5. The sentence 'And knowing now that ain’t nobody got to give you nothing.' is an example of Multiple Negation.",
    f"Example 6. The sentence 'Because he ain't been back to finish yet.' is an example of Not Multiple Negation."
]
fs_examples_3 = [
    f"Example 1. The sentence 'Not saying they don't do it.' is an example of Not Multiple Negation.",
    f"Example 2. The sentence 'And in his book he says his Word shall never change.' is an example of Not Multiple Negation.",
    f"Example 3. The sentence 'Because he ain't been back to finish yet.' is an example of Not Multiple Negation.",
    f"Example 4. The sentence 'Didn't have no radio home.' is Multiple Negation.",
    f"Example 5. The sentence 'And I don't remember no kind of bad stuff.' is an example of Multiple Negation.",
    f"Example 6. The sentence 'And knowing now that ain’t nobody got to give you nothing.' is an example of Multiple Negation."
]
fs_examples_4 = [
    f"Example 1. The sentence 'Didn't have no radio home.' is Multiple Negation.",
    f"Example 2. The sentence 'And I don't remember no kind of bad stuff.' is an example of Multiple Negation.",
    f"Example 3. The sentence 'Not saying they don't do it.' is an example of Not Multiple Negation.",
    f"Example 4. The sentence 'And in his book he says his Word shall never change.' is an example of Not Multiple Negation.",
    f"Example 5. The sentence 'Because he ain't been back to finish yet.' is an example of Not Multiple Negation.",
    f"Example 6. The sentence 'And knowing now that ain’t nobody got to give you nothing.' is an example of Multiple Negation."
]
fs_examples_5 = [
    f"Example 1. The sentence 'Not saying they don't do it.' is an example of Not Multiple Negation.",
    f"Example 2. The sentence 'And in his book he says his Word shall never change.' is an example of Not Multiple Negation.",
    f"Example 3. The sentence 'Didn't have no radio home.' is Multiple Negation.",
    f"Example 4. The sentence 'And I don't remember no kind of bad stuff.' is an example of Multiple Negation.",
    f"Example 5. The sentence 'And knowing now that ain’t nobody got to give you nothing.' is an example of Multiple Negation.",
    f"Example 6. The sentence 'Because he ain't been back to finish yet.' is an example of Not Multiple Negation."
]
fs_examples_6 = [
    f"Example 1. The sentence 'Not saying they don't do it.' is an example of Not Multiple Negation.",
    f"Example 2. The sentence 'Didn't have no radio home.' is Multiple Negation.",
    f"Example 3. The sentence 'And in his book he says his Word shall never change.' is an example of Not Multiple Negation.",
    f"Example 4. The sentence 'And I don't remember no kind of bad stuff.' is an example of Multiple Negation.",
    f"Example 5. The sentence 'Because he ain't been back to finish yet.' is an example of Not Multiple Negation.",
    f"Example 6. The sentence 'And knowing now that ain’t nobody got to give you nothing.' is an example of Multiple Negation."
]

orderings = [fs_examples_1, fs_examples_2, fs_examples_3, fs_examples_4, fs_examples_5, fs_examples_6]

In [None]:
import torch
import transformers
import time
import os
import pandas as pd
from sklearn.metrics import classification_report
import re

class Llama3:
    def __init__(self, model_path):
        self.model_id = model_path
        self.pipeline = transformers.pipeline(
            "text-generation",
            model=self.model_id,
            device='cuda:0',
            #device_map='auto',
            model_kwargs={
                "torch_dtype": torch.float16,
                #"quantization_config": {"load_in_4bit": True},
                #"low_cpu_mem_usage": True,
            },
        )
        self.terminators = [self.pipeline.tokenizer.eos_token_id]
        self.terminators = [t for t in self.terminators if t is not None]

    def get_response(self, query, message_history=[]):
        user_prompt = message_history + [{"role": "user", "content": query}]
        prompt = self.pipeline.tokenizer.apply_chat_template(user_prompt, tokenize=False, add_generation_prompt=True)
        outputs = self.pipeline(
            prompt,
            max_new_tokens=4096,
            eos_token_id=self.terminators[0],
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
        )
        response = outputs[0]["generated_text"][len(prompt):]
        return response, user_prompt + [{"role": "assistant", "content": response}]

    def pass_input(self, sentence, index, ordering, system_instructions="You are a helpful assistant."):
        user_input = "\n".join(orderings[ordering])
        conversation = [{"role": "system", "content": system_instructions}]
        user_input += "\n".join([f"\nQuestion {index+1}. Classify the sentence '{sentence}' as Multiple Negation or Not Multiple Negation in one word which is a concept of AAVE English and keep the numbering given at the start of the prompt intact in the response."])
        print(user_input)
        response, conversation = self.get_response(user_input, conversation)
        print("-----------------------------------")
        print(response)
        print("-----------------------------------\n")
        return response

    def classify_sentences_batch(self, in_sentences, ordering, batch_size):
        classify_results = {i: None for i in range(len(in_sentences))}
        classify_missingkeys = {}

        def process_batch(batch, batch_indices, ordering):
            batch_responses = []
            batch_index = -1
            for sentence in batch:
                batch_index += 1
                try:
                    response = self.pass_input(sentence, batch_index, ordering)
                    batch_responses.append(response.strip())
                except Exception as e:
                    print(f"Error processing sentence: {e}")
                    batch_responses.append("Error")

            for i, idx in enumerate(batch_indices):
                classify_results[idx] = batch_responses[i]

        batchnumber = 1
        for i in range(0, len(in_sentences), batch_size):
            batch_indices = list(range(i, min(i + batch_size, len(in_sentences))))
            process_batch([in_sentences[idx] for idx in batch_indices], batch_indices, ordering)
            batchnumber += 1
            time.sleep(80)

        if classify_missingkeys:
            for batch_number, batch_indices in classify_missingkeys.items():
                missedsentences = [in_sentences[idx] for idx in batch_indices]
                process_batch(missedsentences, batch_indices, ordering)

        return classify_results, classify_missingkeys

    def gather_results(self, test_data, test_sentences, results):
        fold_predictions = []
        fold_true_labels = []
        for idx in range(len(test_sentences)):
            prediction = results[idx] if idx < len(results) else None
            prediction = re.sub(r'^[^a-zA-Z]+', '', prediction).strip()

            if prediction:
                prediction = prediction.lower()

            if prediction is None:
                numeric_prediction = 0
            else:
                if prediction.startswith('m'):
                    numeric_prediction = 1
                elif prediction.startswith('n'):
                    numeric_prediction = 0
                else:
                    numeric_prediction = '*'

            true_label = 1 if test_data.loc[idx, 'Mneg'] == 1 else 0

            fold_predictions.append(numeric_prediction)
            fold_true_labels.append(true_label)

        return fold_predictions, fold_true_labels

    def main(self, data_file, results_folder, ordering):
        if not os.path.exists(results_folder):
            os.makedirs(results_folder)

        print(f"Processing {data_file}")

        data = pd.read_csv(data_file, sep=',', header=None, names=["Text", "Mneg"])

        test_sentences = data['Text'].tolist()
        true_labels = data['Mneg'].tolist()

        print("Sentence:", test_sentences[0])
        print("True Label:", true_labels[0])

        results, missingkeys = self.classify_sentences_batch(test_sentences, ordering, batch_size=20)

        fold_predictions, fold_true_labels = self.gather_results(data, test_sentences, results)

        output_df = pd.DataFrame({
            'Sentence': test_sentences,
            'True_Label': fold_true_labels,
            'Prediction': fold_predictions
        })
        output_file = os.path.join(results_folder, f'[output file name]')
        output_df.to_csv(output_file, index=False)
        print(f"Results saved to {output_file}")

        print("Complete prediction set:", len(fold_predictions), " True labels:", len(fold_true_labels))
        return [fold_predictions, fold_true_labels]

In [None]:
if __name__ == "__main__":
    model_path = R"[local model path]"
    data_file = R"[input file path]"
    results_folder = R"[output file path]"
    llm = Llama3(model_path)

    output_0 = llm.main(data_file, results_folder, 0)
    print("\n-----------------------------------------------------------------------------------------------\n")
    output_1 = llm.main(data_file, results_folder, 1)
    print("\n-----------------------------------------------------------------------------------------------\n")
    output_2 = llm.main(data_file, results_folder, 2)
    print("\n-----------------------------------------------------------------------------------------------\n")
    output_3 = llm.main(data_file, results_folder, 3)
    print("\n-----------------------------------------------------------------------------------------------\n")
    output_4 = llm.main(data_file, results_folder, 4)
    print("\n-----------------------------------------------------------------------------------------------\n")
    output_5 = llm.main(data_file, results_folder, 5)

In [None]:
#print("Ordering 0:")
#print(classification_report(s0, true, target_names=['Erroneous', 'Not Multiple Negation (label 0)', 'Multiple Negation (label 1)']))
# print("------------------------------------------------------------")
# print("Ordering 1:")
# print(classification_report(true, s1, target_names=['Not Multiple Negation (label 0)', 'Multiple Negation (label 1)']))
# print("------------------------------------------------------------")
# print("Ordering 2:")
# print(classification_report(true, s2, target_names=['Not Multiple Negation (label 0)', 'Multiple Negation (label 1)']))
# print("------------------------------------------------------------")
# print("Ordering 3:")
# print(classification_report(true, s3, target_names=['Not Multiple Negation (label 0)', 'Multiple Negation (label 1)']))
# print("------------------------------------------------------------")
# print("Ordering 4:")
# print(classification_report(true, s4, target_names=['Not Multiple Negation (label 0)', 'Multiple Negation (label 1)']))
# print("------------------------------------------------------------")
# print("Ordering 5:")
# print(classification_report(true, s5, target_names=['Not Multiple Negation (label 0)', 'Multiple Negation (label 1)']))