In [None]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers
from datasets import Dataset
from peft import LoraConfig, PeftConfig
from trl import SFTTrainer
from trl import setup_chat_format
from transformers import (AutoModelForCausalLM, 
                          AutoTokenizer, 
                          BitsAndBytesConfig, 
                          TrainingArguments, 
                          pipeline, 
                          logging)
from sklearn.metrics import (accuracy_score, 
                             classification_report, 
                             confusion_matrix)
from sklearn.model_selection import train_test_split
import bitsandbytes as bnb
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
from tqdm import tqdm
from huggingface_hub import login

In [None]:
from transformers import set_seed

set_seed(69420)

In [None]:
df=pd.read_csv("../final_dataset/test_set.csv")

In [None]:
df= df[['Contents','Secret','Label']]
print(df['Label'].value_counts())

In [None]:
df['Label'] = df['Label'].replace({0: 'Non-sensitive', 1: 'Secret'})
print(df['Label'].value_counts())

In [None]:
def create_context_window(text, target_string, window_size=200):

    target_index = text.find(target_string)

    if target_index != -1:
        start_index = max(0, target_index - window_size)
        end_index = min(len(text), target_index + len(target_string) + window_size)
        context_window = text[start_index:end_index]
        return context_window

    return None

df['Contents'] = df.apply(lambda row: create_context_window(row['Contents'], row['Secret']), axis=1)

In [None]:

X_test = df

In [None]:
def generate_test_prompt(data_point):
    return """
You are a code security auditor or classifier speccialized in identifying and categorizing sensitive secrets from code snippet.Classify the given candidate string as either "Non-sensitive" or "Secret" based on its role in the provided code snippet. A "Secret" includes sensitive information such as: API keys and secrets (e.g., `sk_test_ABC123`), Private and secret keys (e.g., private SSH keys, private cryptographic keys), Authentication keys and tokens (e.g., `Bearer <token>`), Database connection strings with credentials (e.g., `mongodb://user:password@host:port`), Passwords, usernames, and any other private information that should not be shared openly. A "Non-sensitive" string is not considered secret and can be shared openly. This may include: Publicly available keys (e.g., public SSH keys), Non-sensitive configuration values or identifiers, Any non-sensitive data not directly tied to security or authentication. Carefully consider the context of the string in the provided code. If the string is part of authentication, encryption, or access control, it is likely a "Secret". Otherwise, it is "Non-sensitive". Ensure you pay attention to specific patterns like tokens, passwords, or keys in the string. Return the answer as the corresponding label.

Examples:
candidate_string: "AIzaSyD12345"
code snippet: 'GOOGLE_API_KEY = "AIzaSyD12345"'
label: "Secret"

Now classify the following:

candidate_string: {candidate_string}
code snippet: {code_snippet}
label: """.format(
        candidate_string=data_point["Secret"],
        code_snippet=data_point["Contents"]
    ).strip()



In [None]:
# Generate test prompts and extract true labels
y_true = X_test.loc[:,'Label']
batch_size = 100
num_batches = len(X_test) // batch_size + 1

results = []
for i in range(num_batches):
    batch = X_test.iloc[i * batch_size: (i + 1) * batch_size]
    batch["text"] = batch.apply(generate_test_prompt, axis=1)
    results.append(batch)

X_test = pd.concat(results, ignore_index=True)


In [None]:
login(HF_TOKEN)

In [None]:
base_model_name = "google/gemma-7b"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=False,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
)

model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    torch_dtype="float16",
    quantization_config=bnb_config, 
)

model.config.use_cache = False
model.config.pretraining_tp = 1

tokenizer = AutoTokenizer.from_pretrained(base_model_name)

tokenizer.pad_token_id = tokenizer.eos_token_id

In [None]:
def predict(test, model, tokenizer):
    y_pred = []
    
    for i in tqdm(range(len(test))):
        prompt = test.iloc[i]["text"]
        pipe = pipeline(task="text-generation", 
                        model=model, 
                        tokenizer=tokenizer, 
                        max_new_tokens=2, 
                        temperature=0.1)
        
        result = pipe(prompt)
        answer = result[0]['generated_text'].split("label:")[-1].strip()
        
        # Determine the predicted label (Secret or Non-sensitive)
        if "Secret" in answer:
            y_pred.append("Secret")
        else:
            y_pred.append("Non-sensitive")
        
    return y_pred

y_pred = predict(X_test, model, tokenizer)



In [None]:
def evaluate(y_true, y_pred):
    def map_func(x):
        if x == "Non-sensitive":
            return 0
        elif x == "Secret":
            return 1
        else:
            return -1  # Handle unexpected labels (optional)

    # Map the true and predicted labels to integers
    y_true_mapped = np.array([map_func(label) for label in y_true])
    y_pred_mapped = np.array([map_func(label) for label in y_pred])

    # Filter out invalid labels (-1)
    valid_indices = np.where((y_true_mapped != -1) & (y_pred_mapped != -1))[0]
    y_true_mapped = y_true_mapped[valid_indices]
    y_pred_mapped = y_pred_mapped[valid_indices]

    # Calculate overall accuracy
    accuracy = accuracy_score(y_true=y_true_mapped, y_pred=y_pred_mapped)
    print(f'Overall Accuracy: {accuracy:.3f}')

    # Calculate accuracy for each label
    labels = [0, 1]
    label_names = ["Non-sensitive", "Secret"]
    for label, name in zip(labels, label_names):
        label_indices = np.where(y_true_mapped == label)[0]
        label_accuracy = accuracy_score(
            y_true=y_true_mapped[label_indices], 
            y_pred=y_pred_mapped[label_indices]
        ) if len(label_indices) > 0 else 0.0
        print(f'Accuracy for {name}: {label_accuracy:.3f}')

    # Generate classification report
    class_report = classification_report(
        y_true=y_true_mapped, 
        y_pred=y_pred_mapped, 
        target_names=label_names, 
        labels=labels,
        digits=4
    )
    print('\nClassification Report:')
    print(class_report)

    # Generate confusion matrix
    conf_matrix = confusion_matrix(
        y_true=y_true_mapped, 
        y_pred=y_pred_mapped, 
        labels=labels
    )
    print('\nConfusion Matrix:')
    print(conf_matrix)

evaluate(y_true, y_pred)
