# Enhancing Compliance Monitoring with BERT and Large Language Models

In [26]:
from transformers import BertForSequenceClassification, BertTokenizer

# Load the pre-trained BERT model
model_name = 'bert-base-uncased'
model = BertForSequenceClassification.from_pretrained(model_name)

# Load the corresponding tokenizer
tokenizer = BertTokenizer.from_pretrained(model_name)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [27]:
import torch
from transformers import BertForSequenceClassification, BertTokenizer

# Load the pre-trained BERT model
model_name = 'bert-base-uncased'
model = BertForSequenceClassification.from_pretrained(model_name)

# Load the corresponding tokenizer
tokenizer = BertTokenizer.from_pretrained(model_name)

# Example log entry for inference
sample_log_entry = "User 'John' accessed sensitive file 'confidential.txt'."

# Tokenize the input log entry
inputs = tokenizer(sample_log_entry, return_tensors="pt", padding=True, truncation=True)

# Perform inference
with torch.no_grad():
    outputs = model(**inputs)

# Assuming binary classification (0 for compliant, 1 for non-compliant)
prediction = torch.argmax(outputs.logits).item()

# Generate insights based on the prediction
if prediction == 0:
    insights = "This log entry is compliant with security policies."
else:
    insights = "Non-compliance detected. Recommend reviewing access controls."

print("Log Entry:", sample_log_entry)
print("Prediction:", prediction)
print("Insights:", insights)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Log Entry: User 'John' accessed sensitive file 'confidential.txt'.
Prediction: 1
Insights: Non-compliance detected. Recommend reviewing access controls.


In [28]:
import transformers

# Load a pre-trained LLM (for simplicity, let's assume it's already fine-tuned for compliance)
llm_model = transformers.AutoModelForSequenceClassification.from_pretrained(model_name)

def analyze_compliance(log_entry):
    # Given a log entry, use the LLM to classify compliance
    inputs = tokenizer(log_entry, return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        outputs = llm_model(**inputs)
    prediction = torch.argmax(outputs.logits)
    return prediction.item()

def generate_insights(prediction):
    # Based on the prediction, generate actionable insights
    if prediction == 0:
        return "This log entry is compliant with security policies."
    else:
        return "Non-compliance detected. Recommend reviewing access controls."

if __name__ == "__main__":
    tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)

    sample_log_entry = "User 'John' accessed sensitive file 'confidential.txt'."
    
    compliance_prediction = analyze_compliance(sample_log_entry)
    insights = generate_insights(compliance_prediction)
    
    print("Log Entry:", sample_log_entry)
    print("Prediction:", compliance_prediction)
    print("Insights:", insights)


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Log Entry: User 'John' accessed sensitive file 'confidential.txt'.
Prediction: 0
Insights: This log entry is compliant with security policies.
