In [None]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers
from datasets import Dataset
from peft import LoraConfig, PeftConfig
from trl import SFTTrainer
from trl import setup_chat_format
from transformers import (AutoModelForCausalLM, 
                          AutoTokenizer, 
                          BitsAndBytesConfig, 
                          TrainingArguments, 
                          pipeline, 
                          logging)
from sklearn.metrics import (accuracy_score, 
                             classification_report, 
                             confusion_matrix)
from sklearn.model_selection import train_test_split
import bitsandbytes as bnb
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
from tqdm import tqdm
from huggingface_hub import login

In [None]:
from transformers import set_seed

set_seed(69420)

In [None]:
df3=pd.read_csv("../final_dataset/test_set.csv")
df3['Label'] = df3['Label'].replace({0: 'Non-sensitive', 1: 'Secret'})
print(df3['Label'].value_counts())

In [None]:
def create_context_window(text, target_string, window_size=200):

    target_index = text.find(target_string)

    if target_index != -1:
        start_index = max(0, target_index - window_size)
        end_index = min(len(text), target_index + len(target_string) + window_size)
        context_window = text[start_index:end_index]
        return context_window

    return None

df3['Contents'] = df3.apply(lambda row: create_context_window(row['Contents'], row['Secret']), axis=1)


In [None]:
# df3 = df3[df3['Contents'].notna()]

In [None]:
df3.shape

In [None]:

def generate_prompt(data_point):
    return f"""
You are a code security auditor or classifier speccialized in identifying and categorizing sensitive secrets from code snippet.Classify the given candidate string as either "Non-sensitive" or "Secret" based on its role in the provided code snippet. A "Secret" includes sensitive information such as: API keys and secrets (e.g., `sk_test_ABC123`), Private and secret keys (e.g., private SSH keys, private cryptographic keys), Authentication keys and tokens (e.g., `Bearer <token>`), Database connection strings with credentials (e.g., `mongodb://user:password@host:port`), Passwords, usernames, and any other private information that should not be shared openly. A "Non-sensitive" string is not considered secret and can be shared openly. This may include: Publicly available keys (e.g., public SSH keys), Non-sensitive configuration values or identifiers, Any non-sensitive data not directly tied to security or authentication. Carefully consider the context of the string in the provided code. If the string is part of authentication, encryption, or access control, it is likely a "Secret". Otherwise, it is "Non-sensitive". Ensure you pay attention to specific patterns like tokens, passwords, or keys in the string. Return the answer as the corresponding label.

candidate_string: {data_point["Secret"]}
code_snippet: {data_point["Contents"]}
label: {data_point["Label"]}
""".strip()

def generate_test_prompt(data_point):
    return f"""
You are a code security auditor or classifier speccialized in identifying and categorizing sensitive secrets from code snippet.Classify the given candidate string as either "Non-sensitive" or "Secret" based on its role in the provided code snippet. A "Secret" includes sensitive information such as: API keys and secrets (e.g., `sk_test_ABC123`), Private and secret keys (e.g., private SSH keys, private cryptographic keys), Authentication keys and tokens (e.g., `Bearer <token>`), Database connection strings with credentials (e.g., `mongodb://user:password@host:port`), Passwords, usernames, and any other private information that should not be shared openly. A "Non-sensitive" string is not considered secret and can be shared openly. This may include: Publicly available keys (e.g., public SSH keys), Non-sensitive configuration values or identifiers, Any non-sensitive data not directly tied to security or authentication. Carefully consider the context of the string in the provided code. If the string is part of authentication, encryption, or access control, it is likely a "Secret". Otherwise, it is "Non-sensitive". Ensure you pay attention to specific patterns like tokens, passwords, or keys in the string. Return the answer as the corresponding label.

candidate_string: {data_point["Secret"]}
code_snippet: {data_point["Contents"]}
label: """.strip()

In [None]:
X_test = df3

In [None]:
y_true = X_test.loc[:,'Label']
X_test = pd.DataFrame(X_test.apply(generate_test_prompt, axis=1), columns=["text"])

In [None]:
# base_model_name = "meta-llama/Llama-3.1-8B-Instruct"

# bnb_config = BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_use_double_quant=False,
#     bnb_4bit_quant_type="nf4",
#     bnb_4bit_compute_dtype="float16",
# )

# model = AutoModelForCausalLM.from_pretrained(
#     base_model_name,
#     device_map="auto",
#     torch_dtype="float16",
#     quantization_config=bnb_config, 
# )

# model.config.use_cache = False
# model.config.pretraining_tp = 1

# tokenizer = AutoTokenizer.from_pretrained(base_model_name)

base_model_name = "deepseek-ai/deepseek-llm-7b-base"
checkpoint_path = "../models/deepseek-fine-tuned-model-30k-new-prompt-1024-imb/checkpoint-21000"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=False,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
)

# Load fine-tuned model with quantization settings
model = AutoModelForCausalLM.from_pretrained(
    checkpoint_path,
    device_map="auto",
    torch_dtype="float16",
    quantization_config=bnb_config,  # Keep quantization enabled
)

model.config.use_cache = False
model.config.pretraining_tp = 1

# Load tokenizer from checkpoint
tokenizer = AutoTokenizer.from_pretrained(base_model_name, use_fast=False)
tokenizer.pad_token_id = tokenizer.eos_token_id


In [None]:
import time
from tqdm import tqdm

def predict(test, model, tokenizer):
    y_pred = []
    total_time = 0  # Track total inference time
    
    for i in tqdm(range(len(test))):
        prompt = test.iloc[i]["text"]
        pipe = pipeline(task="text-generation", 
                        model=model, 
                        tokenizer=tokenizer, 
                        max_new_tokens=4, 
                        temperature=0.1)
        
        start = time.time()
        result = pipe(prompt)
        end = time.time()

        total_time += (end - start)
        
        answer = result[0]['generated_text'].split("label:")[-1].strip()
        
        # Determine the predicted label
        if "Secret" in answer:
            y_pred.append("Secret")
        else:
            y_pred.append("Non-sensitive")

    avg_time = total_time / len(test)
    print(f"Average prediction time: {avg_time:.4f} seconds")
    
    return y_pred

y_pred = predict(X_test, model, tokenizer)


In [None]:
# # Convert to numpy array for easy comparison
# y_true = np.array(y_true)
# y_pred = np.array(y_pred)

# # Identify False Negatives (FN) and False Positives (FP)
# false_negatives = X_test[(y_true == "Secret") & (y_pred == "Non-sensitive")]
# false_positives = X_test[(y_true == "Non-sensitive") & (y_pred == "Secret")]

# # Save to a text file
# output_file = "false_predictions.txt"
# with open(output_file, "w", encoding="utf-8") as f:
#     f.write("False Negatives (FN):\n")
#     f.write("\n".join(false_negatives["text"]) + "\n\n")

#     f.write("False Positives (FP):\n")
#     f.write("\n".join(false_positives["text"]) + "\n")

# print(f"False negatives and false positives saved to {output_file}")

# Identify false positives and false negatives
results = pd.DataFrame({
    'true_label': y_true,
    'predicted_label': y_pred,
    'text': X_test['text']
})

# False Negatives: Actual = "Secret", Predicted = "Non-sensitive"
false_negatives = results[(results['true_label'] == 'Secret') & 
                          (results['predicted_label'] == 'Non-sensitive')]

# False Positives: Actual = "Non-sensitive", Predicted = "Secret"
false_positives = results[(results['true_label'] == 'Non-sensitive') & 
                          (results['predicted_label'] == 'Secret')]

# Display counts
print(f"Total False Negatives: {len(false_negatives)}")
print(f"Total False Positives: {len(false_positives)}")

# # Save to text files
# with open('false_negatives1024-np-1.txt', 'w', encoding='utf-8') as f:
#     for idx, row in false_negatives.iterrows():
#         f.write(f"Example {idx+1}:\n")
#         f.write(f"True: {row['true_label']} | Predicted: {row['predicted_label']}\n")
#         f.write(f"{row['text']}\n\n")
#         f.write("-" * 100 + "\n\n")

# with open('false_positives1024-np-1.txt', 'w', encoding='utf-8') as f:
#     for idx, row in false_positives.iterrows():
#         f.write(f"Example {idx+1}:\n")
#         f.write(f"True: {row['true_label']} | Predicted: {row['predicted_label']}\n")
#         f.write(f"{row['text']}\n\n")
#         f.write("-" * 100 + "\n\n")

# print("Files saved: false_negatives.txt and false_positives.txt")

# Optional: Create a confusion matrix visualization to better understand the model's performance
cm = confusion_matrix(y_true, y_pred, labels=['Non-sensitive', 'Secret'])
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=['Non-sensitive', 'Secret'],
            yticklabels=['Non-sensitive', 'Secret'])
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.title('Confusion Matrix')
plt.savefig('../plots/deepseek-7e-new-prompt-1024-imb.png')
plt.close()

# Print some metrics
print("\nClassification Report:")

print(classification_report(y_true, y_pred, digits=4))


In [None]:
# # Path to the checkpoint directory
# checkpoint_path = "models/llama-3.1-fine-tuned-model-30k-new/checkpoint-21000"

# # Load the fine-tuned model from the checkpoint
# model = AutoModelForCausalLM.from_pretrained(checkpoint_path)
# # Load the tokenizer
# tokenizer = AutoTokenizer.from_pretrained(checkpoint_path, use_fast=False)