### Importing Dependencies

In [None]:
# Ignore any warnings that may arise during execution
import warnings
warnings.filterwarnings('ignore')

# Importing the replicate library for interacting with the Replicate API (likely for using pre-trained models)
import replicate

# Import getpass to securely handle API keys
from getpass import getpass

# Import os for interacting with the operating system (e.g., file handling)
import os

# Import pandas for data manipulation (e.g., reading CSVs)
import pandas as pd

# Importing transformers for handling pre-trained BERT model and tokenizer
from transformers import BertForSequenceClassification
from transformers import BertTokenizer

# Importing PyTorch for deep learning operations (e.g., model handling)
import torch

# Import OpenAI client for interacting with the OpenAI API
from openai import OpenAI

### Setting up environment

In [15]:
# Set the Replicate API key as an environment variable, prompting the user to input it securely
os.environ['REPLICATE_API_TOKEN'] = getpass("Enter Replicate API Key: ")

In [16]:
# Disable Python's HTTPS certificate verification (typically used for environments where certificate validation may be problematic)
os.environ['PYTHONHTTPSVERIFY'] = '0'

In [None]:
# Set the OpenAI API key as an environment variable, prompting the user to input it securely
os.environ['OPENAI_API_KEY'] = getpass("Enter OpenAI API Key: ")

In [None]:
# Initialize the OpenAI client (ensure the OpenAI API key is set correctly in the environment)
client = OpenAI()

### Loading test data

In [10]:
# Read the CSV file "test.csv" into a DataFrame
df = pd.read_csv("test.csv")

In [11]:
# Extract the 'sentence' column as a numpy array and store it in 'sentences'
sentences = df.sentence.values

# Extract the 'label' column as a numpy array and store it in 'labels'
labels = df.label.values

### Meta Llama Inference

In [None]:
# Initialize counters for True Positives (TP), True Negatives (TN), False Positives (FP), and False Negatives (FN)
TP = 0
TN = 0
FP = 0
FN = 0

# Loop through the first 500 sentences in the dataset
for i in range(500):
    # Run the model on the current sentence, asking the model to output '1' for grammatically correct and '0' for incorrect
    output = replicate.run(
        "meta/meta-llama-3-70b-instruct",  # Model being used
        input={ 
            "top_k": 50,                    # Set the top-k sampling parameter
            "top_p": 0.9,                   # Set the top-p (nucleus sampling) parameter
            "prompt": f"Give output as '1' when sentence is grammatically correct and '0' when sentence is grammatically incorrect. Don't correct the sentence. Just give '1' or '0' \n Sentence: {sentences[i]}",  # Construct the input prompt
            "max_tokens": 512,               # Limit the response length
            "min_tokens": 0,                 # Minimum tokens the model should output
            "temperature": 0.6,              # Set the temperature for randomness
            "prompt_template": "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n",  # Prompt template
            "presence_penalty": 1.15,        # Set the presence penalty to penalize new topics
            "frequency_penalty": 0.2         # Set the frequency penalty to avoid repetition
        },
    )
    
    # Print the model output ('1' or '0')
    print(output[0], end=" ")

    # Compare model output with the actual label and update confusion matrix counts
    if int(output[0]) == 1 and labels[i] == 1:  # Correctly identified grammatically correct sentence
        TP += 1
    elif int(output[0]) == 1 and labels[i] == 0:  # Incorrectly identified grammatically incorrect sentence
        FP += 1
    elif int(output[0]) == 0 and labels[i] == 1:  # Incorrectly identified grammatically correct sentence
        FN += 1
    elif int(output[0]) == 0 and labels[i] == 0:  # Correctly identified grammatically incorrect sentence
        TN += 1

In [27]:
# After the loop, print the confusion matrix counts
print("TP: ", TP)
print("FP: ", FP)
print("FN: ", FN)
print("TN: ", TN)

### OpenAI GPT Inference

In [None]:
# Initialize counters for True Positives (TP), True Negatives (TN), False Positives (FP), and False Negatives (FN)
TP = 0
TN = 0
FP = 0
FN = 0

# Loop through the first 500 sentences in the dataset
for i in range(500):
    # Send a request to OpenAI's GPT-4 model to classify the grammatical correctness of the current sentence
    response = client.chat.completions.create(
        model="gpt-4",  # Model being used
        messages=[ 
            {
                "role": "system",  # System message to instruct the model
                "content": "Give '1' if sentence is grammatically correct and '0' if sentence is grammatically incorrect."
            },
            {
                "role": "user",  # User message containing the current sentence to evaluate
                "content": sentences[i]  # The sentence to evaluate
            }
        ],
        temperature=1,  # Temperature parameter for response randomness
        max_tokens=256,  # Maximum tokens for the model's response
        top_p=1,  # Top-p (nucleus sampling) to control randomness
        frequency_penalty=0,  # No penalty for frequency of terms in the response
        presence_penalty=0  # No penalty for the presence of new topics in the response
    )

    # Extract the model's output ('1' or '0') from the response and convert it to integer
    output = int(str(response.choices[0].message.content)[0])  
    print(f"{i}.) {output}", end=" ")  # Print the output for each sentence

    # Update the confusion matrix based on the model's prediction and the true label
    if output == 1 and labels[i] == 1:  # Correctly identified grammatically correct sentence
        TP += 1
    elif output == 1 and labels[i] == 0:  # Incorrectly identified grammatically incorrect sentence
        FP += 1
    elif output == 0 and labels[i] == 1:  # Incorrectly identified grammatically correct sentence
        FN += 1
    elif output == 0 and labels[i] == 0:  # Correctly identified grammatically incorrect sentence
        TN += 1

In [None]:
# After the loop, print the confusion matrix counts: TP, FP, FN, TN
print("TP: ", TP)
print("FP: ", FP)
print("FN: ", FN)
print("TN: ", TN)

### Loading Saved Models

In [None]:
# Define the directory where the fine-tuned model and tokenizer are stored
output_dir = 'model_20k_bert_base_uncased_new_lang8'

In [5]:
# Load the tokenizer from the saved fine-tuned model
tokenizer = BertTokenizer.from_pretrained(output_dir)

# Load the fine-tuned model for sequence classification
model_loaded = BertForSequenceClassification.from_pretrained(output_dir)

### Setting up GPU

In [6]:
# Check if a GPU is available and set the device accordingly (CUDA for GPU, CPU if no GPU available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Move the model to the selected device (GPU or CPU)
model_loaded = model_loaded.to(device)

### Setting up fuction for Inference

In [None]:
# Inference function for grammar checking using a pre-trained BERT model
def BertGrammarChecker(sentence):
    # Store the input sentence for processing
    sent = sentence

    # Tokenize the input sentence and prepare it for BERT
    encoded_dict = tokenizer.encode_plus(
                            sent,                    # Input sentence
                            add_special_tokens=True,  # Add special tokens like [CLS] and [SEP]
                            max_length=64,           # Limit the sentence length to 64 tokens
                            pad_to_max_length=True,  # Pad to the maximum length (if necessary)
                            return_attention_mask=True,  # Return attention mask (to ignore padding tokens)
                            return_tensors='pt',      # Return PyTorch tensors
                    )

    # Extract input_ids and attention_mask from the encoded dictionary
    input_id = encoded_dict['input_ids']
    attention_mask = encoded_dict['attention_mask']

    # Move the tensors to the selected device (GPU or CPU)
    input_id = input_id.to(device)
    attention_mask = attention_mask.to(device)

    # Disable gradient calculation for inference (to save memory and computation)
    with torch.no_grad():
        # Perform forward pass through the model to get the logits (predictions)
        outputs = model_loaded(input_id, token_type_ids=None, attention_mask=attention_mask)

        # Extract logits (predictions)
        logits = outputs[0]
        
        # Get the index of the highest logit (most probable label)
        index = logits.argmax()

    # Return the predicted index (label)
    return index

### Inference on Saved Models

In [16]:
# Initialize counters for True Positives (TP), True Negatives (TN), False Positives (FP), and False Negatives (FN)
TP = 0
TN = 0
FP = 0
FN = 0

# Iterate over the first 500 sentences for inference
for i in range(500):
    # Get the predicted label (1 or 0) from the grammar checker function
    lb = BertGrammarChecker(sentences[i])

    # Compare the predicted label with the actual label (from the 'labels' list)
    # Update the corresponding counter based on the prediction and ground truth
    if lb == 1 and labels[i] == 1:
        TP += 1  # True Positive: Correctly predicted as positive
    elif lb == 1 and labels[i] == 0:
        FP += 1  # False Positive: Incorrectly predicted as positive
    elif lb == 0 and labels[i] == 1:
        FN += 1  # False Negative: Incorrectly predicted as negative
    elif lb == 0 and labels[i] == 0:
        TN += 1  # True Negative: Correctly predicted as negative

In [None]:
# Print the results for TP, FP, FN, and TN
print("TP: ", TP)
print("FP: ", FP)
print("FN: ", FN)
print("TN: ", TN)