# Creating Synthetic Data using Llama 3.1

In [1]:
import openai
import pandas as pd
from dotenv import load_dotenv
import os


load_dotenv()
api_key = os.getenv("API_KEY")

In [2]:
# Define the system content
system_content = """
You are an expert in cybersecurity. Your task is to generate synthetic Windows Event ID 7045 log entries for training purposes.
Each entry should include:
- A label ("benign" or "malicious")
- Service Name
- Service File Name
- Service Type
- Service Start Type
- Service Account
- Data Service Name
- Timestamp
- ID

For the ID:
- End with "x" for malicious entries
- End with "Y" for benign entries
For the Service File Name:
- Use a command line for the malicious entries
- Use a file path for the benign entries
Ensure the generated entries are varied and realistic.

An example of a benign entry might look like this:
- Label: benign
- Service Name: ExampleService
- Service File Name: C:\\Program Files\\ExampleService\\service.exe
- Service Type: Own Process
- Service Start Type: Auto Start
- Service Account: LocalSystem
- Data Service Name: Windows11
- Timestamp: 2024-08-18T14:23:50.835Z
- ID: 09585cd1-8a61-4812-93asrwe-xxxxxxxxY

An example of a malicious entry might look like this:
- Label: malicious
- Service Name: MaliciousService
- Service File Name: %comspec% /b /cl start /b /min powershell -nop ...
- Service Type: Own Process
- Service Start Type: Auto Start
- Service Account: LocalSystem
- Data Service Name: oTm
- Timestamp: 2024-08-17T14:23:50.835Z
- ID: 09585cd1-8a61-4812-93asrwe-xxxxxxxxx
"""


In [3]:
# Define the user content for generating log entries
user_content_benign = "Generate a random benign Windows Event ID 7045 log entry."
user_content_malicious = "Generate a random malicious Windows Event ID 7045 log entry."

In [4]:
client = openai.OpenAI(
    api_key=api_key,
    base_url="https://api.aimlapi.com",
)

In [5]:

# Generate benign log
def generate_benign_log():
    chat_completion_benign = client.chat.completions.create(
        model="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
        messages=[
            {"role": "system", "content": system_content},
            {"role": "user", "content": user_content_benign},
        ],
        temperature=0.7,
        max_tokens=256,
    )
    return chat_completion_benign

# Generate malicious log
def generate_malicious_log():
    chat_completion_malicious = client.chat.completions.create(
        model="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
        messages=[
            {"role": "system", "content": system_content},
            {"role": "user", "content": user_content_malicious},
        ],
        temperature=0.7,
        max_tokens=256,
    )
    return chat_completion_malicious

In [6]:
synthetic_logs = []


In [7]:
for i in range(5):
    synthetic_logs.append(generate_benign_log().choices[0].message.content.strip())
for i in range(5):
    synthetic_logs.append(generate_malicious_log().choices[0].message.content.strip())

In [8]:
for log in synthetic_logs:
    print(log)
    print("\n\n")

Here is a randomly generated benign Windows Event ID 7045 log entry:

- Label: benign
- Service Name: Netman
- Service File Name: C:\Windows\System32\netman.dll
- Service Type: Shared Process
- Service Start Type: Demand Start
- Service Account: NT AUTHORITY\LocalService
- Data Service Name: Windows11Home
- Timestamp: 2024-03-22T08:45:12.219Z
- ID: 43a6bc71-421d-4f3c-b12f-11g6h7i8Y



Here is a random benign Windows Event ID 7045 log entry:

- Label: benign
- Service Name: BackupService
- Service File Name: C:\Program Files\Backup Software\backupsvc.exe
- Service Type: Own Process
- Service Start Type: Auto Start
- Service Account: NT AUTHORITY\LocalService
- Data Service Name: Windows10
- Timestamp: 2024-03-22T08:47:10.219Z
- ID: 4a731ecc-5214-42a8-b231-6a8f21a48c5Y



Here is a randomly generated benign Windows Event ID 7045 log entry:

- Label: benign
- Service Name: NetmanService
- Service File Name: C:\Windows\System32\svchost.exe
- Service Type: Shared Process
- Service Start Typ

In [9]:
# Function to parse a log entry
def parse_log_entry(log_entry):
    lines = log_entry.split('\n')
    log_data = {}
    for line in lines:
        if ': ' in line:
            key, value = line.split(': ', 1)
            log_data[key.strip()] = value.strip()
    return log_data

In [10]:
parsed_logs = []
for log in synthetic_logs:
    parsed_log = parse_log_entry(log)
    parsed_logs.append(parsed_log)

In [11]:
df = pd.DataFrame(parsed_logs)

In [12]:
df

Unnamed: 0,- Label,- Service Name,- Service File Name,- Service Type,- Service Start Type,- Service Account,- Data Service Name,- Timestamp,- ID
0,benign,Netman,C:\Windows\System32\netman.dll,Shared Process,Demand Start,NT AUTHORITY\LocalService,Windows11Home,2024-03-22T08:45:12.219Z,43a6bc71-421d-4f3c-b12f-11g6h7i8Y
1,benign,BackupService,C:\Program Files\Backup Software\backupsvc.exe,Own Process,Auto Start,NT AUTHORITY\LocalService,Windows10,2024-03-22T08:47:10.219Z,4a731ecc-5214-42a8-b231-6a8f21a48c5Y
2,benign,NetmanService,C:\Windows\System32\svchost.exe,Shared Process,Auto Start,NT AUTHORITY\LocalService,Windows10,2024-03-22T08:45:12.192Z,43f7e1c9-5210-497d-b8c5-324f92a5e19Y
3,benign,UpdateService,C:\Program Files\Microsoft\UpdateService\updat...,Share Process,Demand Start,NT AUTHORITY\NetworkService,Windows10,2024-09-02T10:15:32.219Z,4f38b23a-2e91-4123-bc45-1234567890Y
4,benign,NetworkMonitor,C:\Windows\System32\svchost.exe,Share Process,Demand Start,NT AUTHORITY\NetworkService,Windows10,2024-03-22T08:45:12.219Z,4f68a11e-9b43-4121-9f45-1234567890Y
5,malicious,SuspiciousUpdater,cmd.exe /c powershell -w hidden -enc JABjAGwAa...,Own Process,Demand Start,NT AUTHORITY\SYSTEM,1wV,2024-09-01T08:11:22.419Z,741c7a56-219f-42f0-9876-5b4321aad991x
6,malicious,SuspiciousUpdater,%windir%\system32\cmd.exe /c powershell -w hid...,Own Process,Demand Start,NT AUTHORITY\SYSTEM,WSearch,2024-09-02T08:45:12.219Z,4a3156b4-21cd-49f2-92ab-1a2f7b6dx
7,malicious,SuspiciousUpdater,%windir%\system32\cmd.exe /c powershell -w hid...,Share Process,Demand Start,NT AUTHORITY\NetworkService,4hJL,2024-03-22T08:41:19.421Z,4ec7a2d1-9b43-4129-8a5b-4a5e6c7d8a9x
8,malicious,UpdateSvc,"cmd.exe /c powershell -ep bypass -c ""IEX (New-...",Own Process,Demand Start,SYSTEM,w7x64,2024-07-22T10:01:23.019Z,3fba271d-4b6c-4938-af4e-1c531c59cxx
9,malicious,SuspiciousSvc,%windir%\system32\cmd.exe /c powershell -w hid...,Own Process,Demand Start,NT AUTHORITY\SYSTEM,wNc,2024-08-25T10:47:12.219Z,4b426551-43e1-4c91-b3a5-5d92aeb2e3cfx
