In [31]:
!pip install faiss-cpu tqdm python-dotenv
!pip uninstall -y generativeai
!pip install --upgrade google-generativeai


Found existing installation: generativeai 0.0.1
Uninstalling generativeai-0.0.1:
  Successfully uninstalled generativeai-0.0.1


In [32]:

import os
os.environ["GEMINI_API_KEY"] = "I used my own API key with gemini and it worked"


In [22]:
!pw


/content


In [38]:
import numpy as np
import faiss
import tqdm as tqdm
import google.generativeai as genai
import os



# 1. CONFIGURATION GEMINI
# ___________________________

genai.configure(api_key=os.environ["GEMINI_API_KEY"])
embed_model ="models/text-embedding-004"
llm_model= "models/gemini-2.5-pro"

# 2. Load datas
#____________________________

with open("/content/sample_data/logs.txt") as log :
  documents = [line.strip() for line in log if line.strip()]

print(f"[+] Loaded {len(documents)} log entries.")
print("[+] Embedding logs with Gemini Pro…")


# 3. Extract embedding vectors
#__________________________________

embeddings = []
for doc in tqdm.tqdm(documents):
    vec = genai.embed_content(
        model=embed_model,
        content=doc,
    )["embedding"]
    embeddings.append(vec)

embeddings = np.array(embeddings).astype("float32")


dim = embeddings.shape[1]
index = faiss.IndexFlatL2(dim)    # Use FAISS basis
index.add(embeddings)

print(f"[+] Index built with {index.ntotal} vectors.")


def rag_query(question, k=5):

    # embed question
    q_embed = genai.embed_content(
        model=embed_model,
        content=question,
    )["embedding"]
    q_embed = np.array([q_embed]).astype("float32")

    # vector search
    D, I = index.search(q_embed, k)
    retrieved = [documents[i] for i in I[0]]

    context = "\n".join(retrieved)

    prompt = f"""
You are a cybersecurity analysis assistant.
Here is the question:
{question}

Here are the most relevant logs:
{context}

Explain clearly what is happening, summarize, and detect potential threats.
"""

    response = genai.GenerativeModel(llm_model).generate_content(prompt)
    return response.text, retrieved


def ask_soc(question):
    answer, logs = rag_query(question)

    print("\n--- Retrieved logs ---")
    for l in logs:
        print(l)

    print("\n--- Gemini Pro Answer ---")
    print(answer)

models = genai.list_models()
for m in models:
    print(m.name, m.supported_generation_methods)


[+] Loaded 20 log entries.
[+] Embedding logs with Gemini Pro…


100%|██████████| 20/20 [00:12<00:00,  1.60it/s]


[+] Index built with 20 vectors.
models/embedding-gecko-001 ['embedText', 'countTextTokens']
models/gemini-2.5-pro-preview-03-25 ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']
models/gemini-2.5-flash ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']
models/gemini-2.5-pro-preview-05-06 ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']
models/gemini-2.5-pro-preview-06-05 ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']
models/gemini-2.5-pro ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']
models/gemini-2.0-flash-exp ['generateContent', 'countTokens', 'bidiGenerateContent']
models/gemini-2.0-flash ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']
models/gemini-2.0-flash-001 ['generateContent', 'countTokens', 'createCachedContent', 'batchGenerateContent']
models/gemini-2.0-flash-lite-001 ['genera

In [40]:
ask_soc("list me the problems")


--- Retrieved logs ---
2025-01-10 11:42:09 apache2[2391]: "GET /index.php?id=1%20OR%201=1 HTTP/1.1" 400 512 "-" "Mozilla/5.0"
2025-01-10 14:03:22 kernel: usb 1-1: USB disconnect, device number 4
2025-01-10 09:15:32 sudo: ubuntu : TTY=pts/0 ; PWD=/home/ubuntu ; USER=root ; COMMAND=/usr/bin/systemctl restart apache2
2025-01-10 11:45:55 apache2[2391]: "POST /login HTTP/1.1" 200 1034 "-" "curl/8.0.1"
2025-01-10 09:12:44 sudo: ubuntu : TTY=pts/0 ; PWD=/home/ubuntu ; USER=root ; COMMAND=/usr/bin/apt update

--- Gemini Pro Answer ---
Of course. Here is a clear breakdown of the problems and potential threats found in the provided logs.

### Summary of Events

The logs show a mix of routine administrative activity, a clear external attack attempt, and some unusual behavior. On the morning of January 10th, an administrator performed system updates and restarted the web server. Later that morning, an external actor launched a **SQL Injection attack**, which appears to have failed. Shortly after,