In [1]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers
from datasets import Dataset
from peft import LoraConfig, PeftConfig
from trl import SFTTrainer
from trl import setup_chat_format
from transformers import (AutoModelForCausalLM, 
                          AutoTokenizer, 
                          BitsAndBytesConfig, 
                          TrainingArguments, 
                          pipeline, 
                          logging)
from sklearn.metrics import (accuracy_score, 
                             classification_report, 
                             confusion_matrix)
from sklearn.model_selection import train_test_split
import bitsandbytes as bnb
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
from tqdm import tqdm
from huggingface_hub import login

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Path to the checkpoint directory
checkpoint_path = "models/llama-3.1-fine-tuned-model-30k-1024/checkpoint-21000"

# Load the fine-tuned model from the checkpoint
model = AutoModelForCausalLM.from_pretrained(checkpoint_path)
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(checkpoint_path, use_fast=False)

Loading checkpoint shards: 100%|██████████| 4/4 [00:02<00:00,  1.41it/s]


In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
prompt = prompt = f"""
            Classify the given candidate string into "Non-sensitive" or "Secret" based on its presence and usage in the provided code snippet. A "Secret" refers to sensitive information like API keys, passwords, or private tokens. Return the answer as the corresponding label.
candidate_string: "xxxxxxxxxx"
code snippet: 
import requests

API_KEY = "xxxxxxxxxx"  

response = requests.get(f"https://api.stripe.com/v1/charges", headers={{
    "Authorization": f"Bearer API_KEY"
}})
print(response.json())
label: """.strip()

prompt_false_negative_1 = f"""
    Classify the given candidate string into "Non-sensitive" or "Secret" based on its presence and usage in the provided code snippet. A "Secret" refers to sensitive information like API keys, passwords, or private tokens. Return the answer as the corresponding label.
candidate_string: V2h5LCBoZWxsbyB0aGVyZaedsrtyUSE=
code snippet: n the `token`
        parameter and a credential ID placed in the `credential` parameter.

        The final Authorization header will look like this:

        ```
        Authorization: Token token="V2h5LCBoZWxsbyB0aGVyZaedsrtyUSE=", credential="8f71320c-fdbc-4934-b3b9-f1eda4fb39a9""
        ```
      in: header
      name: Authorization
      type: apiKey

label: """.strip()


prompt_false_negative = f"""
    Classify the given candidate string into "Non-sensitive" or "Secret" based on its presence and usage in the provided code snippet. A "Secret" refers to sensitive information like API keys, passwords, or private tokens. Return the answer as the corresponding label.
candidate_string: -----BEGIN RSA PRIVATE KEY-----
xxxx
-----END RSA PRIVATE KEY-----
code snippet: -----BEGIN RSA PRIVATE KEY-----
xxxxx
-----END RSA PRIVATE KEY-----
ssh-rsa-cert-v01@openssh.com xxxxxx+JRrPLNb0vrEYqJp+bEAAAADAQABAAABAQC96eyjDJkj80k2JJ2imXQTXb4VfjEXHxPClX4uw0Th7dJ6NxvKb+AfAbaFdYu3xJj
label: """.strip()

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.float16,

)

# outputs = pipe(prompt_false_negative_1, max_new_tokens=2, do_sample=True, temperature=0.1)
# print(outputs[0]["generated_text"].split("label: ")[-1].strip())

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


In [5]:
import torch
print(torch.cuda.is_available())
print(torch.cuda.device_count())
print(torch.cuda.get_device_name(0))


True
1
NVIDIA GeForce RTX 4090


In [6]:
import time

max_new_tokens = 2
do_sample = True
temperature = 0.1

# Start the timer
start_time = time.time()

# Call the pipeline
outputs = pipe(prompt_false_negative_1, max_new_tokens=max_new_tokens, do_sample=do_sample, temperature=temperature)

# End the timer
end_time = time.time()

# Compute runtime
runtime = end_time - start_time
print(f"Runtime: {runtime} seconds")
print(outputs[0]["generated_text"].split("label: ")[-1].strip())

Runtime: 7.783200979232788 seconds
Non-sensitive


In [7]:
prompt_true_positive = f"""
            Classify the given candidate string into "Non-sensitive" or "Secret" based on its presence and usage in the provided code snippet. A "Secret" refers to sensitive information like API keys, passwords, or private tokens. Return the answer as the corresponding label.
candidate_string: "sk_test_4eC39HqLyjWDarjtT1zdp7dc"
code snippet: 
import requests

API_KEY = "sk_test_4eC39HqLyjWDarjtT1zdp7dc"  

response = requests.get(f"https://api.stripe.com/v1/charges", headers={{
    "Authorization": f"Bearer API_KEY"
}})
print(response.json())
label: """.strip()

# Output for True Positive
outputs_true_positive = pipe(prompt_true_positive, max_new_tokens=2, do_sample=True, temperature=0.1)
print(outputs_true_positive[0]["generated_text"].split("label: ")[-1].strip())

Secret


In [8]:

# prompt_false_positive = f"""
#             Classify the given candidate string into "Non-sensitive" or "Secret" based on its presence and usage in the provided code snippet. A "Secret" refers to sensitive information like API keys, passwords, or private tokens. Return the answer as the corresponding label.
# candidate_string: "my_password123"
# code snippet: 
# def authenticate_user(username, password):
#     if password == "my_password123":
#         print("Authentication successful")
#     else:
#         print("Authentication failed")
# label: """.strip()

# # Output for False Positive
# outputs_false_positive = pipe(prompt_false_positive, max_new_tokens=2, do_sample=True, temperature=0.1)
# print(outputs_false_positive[0]["generated_text"].split("label: ")[-1].strip())


In [9]:
prompt_false_positive_1 = f"""
            Classify the given candidate string into "Non-sensitive" or "Secret" based on its presence and usage in the provided code snippet. A "Secret" refers to sensitive information like API keys, passwords, or private tokens. Return the answer as the corresponding label.
candidate_string: "abcdefghijkl"
code snippet: 
def login_user(username, password):
    if username == "user123" and password == "abcdefghijkl":
        print("Login successful")
    else:
        print("Login failed")
label: """.strip()

prompt_true_positive_1 = f"""
            Classify the given candidate string into "Non-sensitive" or "Secret" based on its presence and usage in the provided code snippet. A "Secret" refers to sensitive information like API keys, passwords, or private tokens. Return the answer as the corresponding label.
candidate_string: "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA7tVq9IwtM0FdI9eDB7qOjfF1INpCwIQsA6WlLRnm6aB+LbbHtDps5KP3pzDJ2PzZ7VJ5uM9gqytCw=="  # Private Key
code snippet: 
import cryptography

private_key = "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA7tVq9IwtM0FdI9eDB7qOjfF1INpCwIQsA6WlLRnm6aB+LbbHtDps5KP3pzDJ2PzZ7VJ5uM9gqytCw=="  # Secret private key

# Using the private key for decryption
decrypted_data = cryptography.decrypt_data(private_key, encrypted_data)
print(decrypted_data)
label: """.strip()


# Output for True Positive
# outputs_true_positive = pipe(prompt_true_positive_1, max_new_tokens=2, do_sample=True, temperature=0.1)
# print(outputs_true_positive[0]["generated_text"].split("label: ")[-1].strip())

# Output for False Positive
outputs_false_positive = pipe(prompt_false_positive_1, max_new_tokens=2, do_sample=True, temperature=0.1)
print(outputs_false_positive[0]["generated_text"].split("label: ")[-1].strip())

KeyboardInterrupt: 