In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [66]:
try:
  del model
except:
  print("Model does not exists.")

In [67]:
import pandas as pd
import numpy as np
from keras.models import Model
from keras.layers import Input, Embedding, LSTM, Dense
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

# Veri dosyalarının yolu
log_filename = '/content/drive/MyDrive/tam1.xlsx'
keyword_filename = '/content/drive/MyDrive/keywords.txt'

# Veriyi yükleme
df = pd.read_excel(log_filename)
logs = df['text'].tolist()

for i in range(len(logs)):
    logs[i] = ''.join(logs[i].split(',')[1:]).strip()

# Anahtar kelimeleri yükleme
with open(keyword_filename, 'r') as file:
    keywords = file.read().splitlines()

# Anahtar kelimeleri kontrol ederek etiketleme
labels = []
for log in logs:
    if any(keyword in log for keyword in keywords):
        labels.append(1)  # Infected
    else:
        labels.append(0)  # Not infected

# Tokenizasyon ve Dizilim
tokenizer = Tokenizer()
tokenizer.fit_on_texts(logs)
sequences = tokenizer.texts_to_sequences(logs)
max_length = max(len(seq) for seq in sequences)
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post')

# Model Tanımı
text_input = Input(shape=(max_length,), name='text_input')
embedding_layer = Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=50)(text_input)
lstm_layer = LSTM(64)(embedding_layer)
output = Dense(1, activation='sigmoid')(lstm_layer)

model = Model(inputs=text_input, outputs=output)

# Derleme ve Eğitim
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

print("padded_sequences boyutu:", padded_sequences.shape)
print("labels boyutu:", len(labels))

# Eğitim Verisi
X = np.array(padded_sequences)
y = np.array(labels)

# Modelin Eğitimi
history = model.fit(X, y, epochs=10, batch_size=1024, validation_split=0.2)

padded_sequences boyutu: (21360, 38)
labels boyutu: 21360
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [77]:
# Test
test_logs = [
    ["gobuster/3.6","infected"],
    ["Mozilla/5.0 (Linux; U; Android 4.1.2; en-us; HTC_Desire_700_dual_sim Build/JZO54K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30","clean"],
    ["gobuster/3.6","infected"],
    ["Mozilla/5.0 (Linux; Android 8.0.0; SM-A600G Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.91 Mobile Safari/537.36","clean"],
    ["Mozilla/5.0 (Windows NT 6.1","clean"],
    ["Mozilla/5.0 (Linux; Android 7.0; Lenovo TB-7304N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.99 Safari/537.36","clean"],
    ["Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)","clean"],
    ["gobuster/3.6","infected"],
    ["gobuster/3.6","infected"],
    ["Mozilla/5.0 (compatible","clean"],
    ["gobuster/3.6","infected"],
    ["gobuster/3.6","infected"],
    ["Mozilla/5.0 (compatible","clean"],
    ["gobuster/3.6","infected"],
    ["Mozilla/5.0 (Linux; Android 6.0; LG-H818 Build/MRA58K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Mobile Safari/537.36","clean"],
    ["gobuster/3.6","infected"],
    ["Mozilla/5.0 (compatible","clean"],
    ["Mozilla/5.0 (Windows NT 6.1","clean"],
    ["SQLmap","infected"],
    ["gobuster/3.6","infected"],
    ["gobuster/3.6","infected"],
    ["Mozilla/5.0 (iPhone; CPU iPhone OS 12_1_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Mobile/15E148 Safari/604.1","clean"],
    ["Mozilla/5.0 (compatible","clean"],
    ["Mozilla/5.0 (Linux; Android 5.1.1; SM-T285 Build/LMY47V) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.126 Safari/537.36","clean"],
    ["Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 OPR/57.0.3098.116","clean"],
    ["gobuster/3.6","infected"],
    ["gobuster/3.6","infected"],
    ["ghauri","infected"],
    ["gobuster/3.6","infected"],
    ["Mozilla/5.0 (compatible","clean"],
    ["gobuster/3.6","infected"],
    ["Mozilla/5.0 (compatible","clean"],
    ["gobuster/3.6","infected"],
    ["gobuster/3.6","infected"],
    ["Mozilla/5.0 (compatible","clean"],
    ["Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 OPR/57.0.3098.116","clean"],
    ["Mozilla/5.0 (compatible","clean"],
    ["gobuster/3.6","infected"],
    ["gobuster/3.6","infected"],
    ["Mozilla/5.0 (compatible","clean"],
    ["gobuster/3.6","infected"],
    ["Mozilla/5.0 (compatible","clean"],
    ["Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36","clean"]
  ]

test_inputs = []
test_outputs = []
for i in test_logs:
  test_inputs.append(i[0])
  test_outputs.append(i[1])

In [78]:
sequences = tokenizer.texts_to_sequences(test_inputs)
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post')
predictions = model.predict(padded_sequences)

for log, prediction in zip(test_inputs, predictions):
    print(f"Log: {log}")
    print(f"Status: {'Infected' if prediction[0] > 0.9 else 'Clean'}")
    print(f"Infected Probability: {prediction[0]}\n")

Log: gobuster/3.6
Status: Infected
Infected Probability: 0.9987911581993103

Log: Mozilla/5.0 (Linux; U; Android 4.1.2; en-us; HTC_Desire_700_dual_sim Build/JZO54K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30
Status: Clean
Infected Probability: 0.0013781210873275995

Log: gobuster/3.6
Status: Infected
Infected Probability: 0.9987911581993103

Log: Mozilla/5.0 (Linux; Android 8.0.0; SM-A600G Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.91 Mobile Safari/537.36
Status: Clean
Infected Probability: 0.0013730444479733706

Log: Mozilla/5.0 (Windows NT 6.1
Status: Clean
Infected Probability: 0.0013694181106984615

Log: Mozilla/5.0 (Linux; Android 7.0; Lenovo TB-7304N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.99 Safari/537.36
Status: Clean
Infected Probability: 0.001371453981846571

Log: Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (comp

In [79]:
# İlk olarak TP, TN, FP, FN değerlerini sıfırlayın
TP = 0
TN = 0
FP = 0
FN = 0

# Her bir çift için gerçek ve tahmin edilen etiketleri karşılaştırın
for i in range(len(predictions)):
    gercek_etiket = test_outputs[i]
    if predictions[i][0] > 0.8:
      tahmin_edilen_etiket = "infected"
    else:
      tahmin_edilen_etiket = "clean"

    # TP (True Positive): Gerçek "infected", tahmin edilen "infected"
    if gercek_etiket == "infected" and tahmin_edilen_etiket == "infected":
        TP += 1
    # TN (True Negative): Gerçek "clean", tahmin edilen "clean"
    elif gercek_etiket == "clean" and tahmin_edilen_etiket == "clean":
        TN += 1
    # FP (False Positive): Gerçek "clean", tahmin edilen "infected"
    elif gercek_etiket == "clean" and tahmin_edilen_etiket == "infected":
        FP += 1
    # FN (False Negative): Gerçek "infected", tahmin edilen "clean"
    elif gercek_etiket == "infected" and tahmin_edilen_etiket == "clean":
        FN += 1

# Sonuçları yazdırın
print("True Positives:", TP)
print("True Negatives:", TN)
print("False Positives:", FP)
print("False Negatives:", FN)

True Positives: 19
True Negatives: 22
False Positives: 0
False Negatives: 2


In [80]:
# Doğruluk hesaplama
accuracy = (TP + TN) / (TP + TN + FP + FN)

# Hassasiyet hesaplama
precision = TP / (TP + FP)

# Özgüllük hesaplama
recall = TP / (TP + FN)

# F1 puanı hesaplama
f1_score = 2 * (precision * recall) / (precision + recall)

# Sonuçları yazdırma
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1_score)


Accuracy: 0.9534883720930233
Precision: 1.0
Recall: 0.9047619047619048
F1 Score: 0.9500000000000001
