# Instalación de Dependencias

In [None]:
!pip install streamlit pandas pymongo langchain google-generativeai plotly PyPDF2 python-docx openpyxl

# Configuración de Conexiones (MongoDB y Google AI Studio)

In [1]:
import os
import google.generativeai as genai
from pymongo import MongoClient

# Configurar Google AI Studio
genai.configure(api_key="AIzaSyDH7ConVRgWwMypMNj1rTwanUSCG8r-88g")

# Conectar a MongoDB
MONGODB_URI = "mongodb+srv://rodyuzuriaga:jG8KeOea6LoeLbgi@cluster0.kz9c1wg.mongodb.net/"
client = MongoClient(MONGODB_URI)
db = client["security_db"]
collection_vectors = db["security_vectors"]
collection_evaluations = db["evaluations"]

print("Conexiones configuradas correctamente.")

  from .autonotebook import tqdm as notebook_tqdm


Conexiones configuradas correctamente.


# Carga y Procesamiento de Documentos

In [2]:
import json
import PyPDF2
from docx import Document
import os

data_dir = "data/"
documents = []

# Función para extraer texto de PDF
def extract_pdf_text(file_path):
    with open(file_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        text = ""
        for page in reader.pages:
            text += page.extract_text()
    return text

# Función para extraer texto de DOCX
def extract_docx_text(file_path):
    doc = Document(file_path)
    text = ""
    for para in doc.paragraphs:
        text += para.text + "\n"
    return text

# Procesar archivos en data/
for file_name in os.listdir(data_dir):
    file_path = os.path.join(data_dir, file_name)
    if file_name.endswith('.pdf'):
        text = extract_pdf_text(file_path)
        documents.append({"file": file_name, "content": text})
    elif file_name.endswith('.docx'):
        text = extract_docx_text(file_path)
        documents.append({"file": file_name, "content": text})
    elif file_name.endswith('.json'):
        with open(file_path, 'r') as f:
            data = json.load(f)
            documents.append({"file": file_name, "content": json.dumps(data)})

print(f"Procesados {len(documents)} documentos.")

# Almacenar en MongoDB (colección documents)
collection_documents = db["documents"]
collection_documents.drop()  # Limpiar colección anterior
collection_documents.insert_many(documents)
print("Documentos almacenados en MongoDB.")

Procesados 1 documentos.
Documentos almacenados en MongoDB.
Documentos almacenados en MongoDB.


# Vectorización Mejorada para RAG Completo

In [3]:
# Función para crear embeddings
def create_embedding(text):
    result = genai.embed_content(model="models/embedding-001", content=text)
    return result['embedding']

# Chunking con overlap
def chunk_text(text, chunk_size=800, overlap=200):
    chunks = []
    start = 0
    while start < len(text):
        end = start + chunk_size
        chunk = text[start:end]
        chunks.append(chunk)
        start = end - overlap
    return chunks

# Procesar documentos y crear vectores
vectors = []
for doc in documents:
    content = doc["content"]
    chunks = chunk_text(content)
    for i, chunk in enumerate(chunks):
        if chunk.strip():  # Evitar chunks vacíos
            embedding = create_embedding(chunk)
            vectors.append({
                "file": doc["file"],
                "chunk_id": i,
                "text": chunk,
                "embedding": embedding
            })

print(f"Creados {len(vectors)} vectores.")

# Almacenar en MongoDB
collection_vectors.drop()  # Limpiar colección anterior
collection_vectors.insert_many(vectors)
print("Vectores almacenados en MongoDB.")

# Crear índice vectorial
collection_vectors.create_index([("embedding", "vector")], name="vector_index")
print("Índice vectorial creado.")

ResourceExhausted: 429 You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/usage?tab=rate-limit. 
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0
* Quota exceeded for metric: generativelanguage.googleapis.com/embed_content_free_tier_requests, limit: 0 [links {
  description: "Learn more about Gemini API quotas"
  url: "https://ai.google.dev/gemini-api/docs/rate-limits"
}
, violations {
  quota_metric: "generativelanguage.googleapis.com/embed_content_free_tier_requests"
  quota_id: "EmbedContentRequestsPerDayPerProjectPerModel-FreeTier"
}
violations {
  quota_metric: "generativelanguage.googleapis.com/embed_content_free_tier_requests"
  quota_id: "EmbedContentRequestsPerMinutePerProjectPerModel-FreeTier"
}
violations {
  quota_metric: "generativelanguage.googleapis.com/embed_content_free_tier_requests"
  quota_id: "EmbedContentRequestsPerMinutePerUserPerProjectPerModel-FreeTier"
}
violations {
  quota_metric: "generativelanguage.googleapis.com/embed_content_free_tier_requests"
  quota_id: "EmbedContentRequestsPerDayPerUserPerProjectPerModel-FreeTier"
}
]

# Configuración del Modelo RAG con Gemini

In [None]:
from langchain.vectorstores import MongoDBAtlasVectorSearch
from langchain.embeddings import GooglePalmEmbeddings
from langchain.llms import GooglePalm
from langchain.chains import RetrievalQA

# Configurar embeddings y LLM
embeddings = GooglePalmEmbeddings(google_api_key="AIzaSyDH7ConVRgWwMypMNj1rTwanUSCG8r-88g")
llm = GooglePalm(google_api_key="AIzaSyDH7ConVRgWwMypMNj1rTwanUSCG8r-88g", temperature=0.1)

# Vector store
vectorstore = MongoDBAtlasVectorSearch(
    collection=collection_vectors,
    embedding=embeddings,
    index_name="vector_index",
    text_key="text"
)

# Cadena RAG
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever(search_kwargs={"k": 5})
)

print("Modelo RAG configurado.")

# Creación de la Aplicación Streamlit

In [None]:
# Código para app.py (copiar a archivo separado)
app_code = '''
import streamlit as st
import pandas as pd
import json
import plotly.express as px
from pymongo import MongoClient
import google.generativeai as genai
from langchain.vectorstores import MongoDBAtlasVectorSearch
from langchain.embeddings import GooglePalmEmbeddings
from langchain.llms import GooglePalm
from langchain.chains import RetrievalQA

# Configuraciones
genai.configure(api_key="AIzaSyDH7ConVRgWwMypMNj1rTwanUSCG8r-88g")
client = MongoClient("mongodb+srv://rodyuzuriaga:jG8KeOea6LoeLbgi@cluster0.kz9c1wg.mongodb.net/")
db = client["security_db"]
collection_evaluations = db["evaluations"]

# Configurar RAG
embeddings = GooglePalmEmbeddings(google_api_key="AIzaSyDH7ConVRgWwMypMNj1rTwanUSCG8r-88g")
llm = GooglePalm(google_api_key="AIzaSyDH7ConVRgWwMypMNj1rTwanUSCG8r-88g", temperature=0.1)
vectorstore = MongoDBAtlasVectorSearch(
    collection=db["security_vectors"],
    embedding=embeddings,
    index_name="vector_index",
    text_key="text"
)
qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vectorstore.as_retriever(search_kwargs={"k": 5}))

st.title("Asistente Inteligente de Seguridad de la Información")

# Navegación
page = st.sidebar.selectbox("Selecciona una página", ["Evaluación de Riesgos", "Chat RAG", "Historial"])

if page == "Evaluación de Riesgos":
    st.header("Evaluación de Riesgos ISO 27001")
    file = st.file_uploader("Sube tu plantilla Excel", type=["xlsx"])
    if file:
        df = pd.read_excel(file)
        df["THREAT SCORE"] = df["PROBABILITY RATING"] * df["IMPACT RATING"]
        df["LEVEL"] = df["THREAT SCORE"].apply(lambda x: "Bajo" if x <= 6 else ("Medio" if x <= 14 else "Alto"))
        st.dataframe(df)
        fig = px.scatter(df, x="PROBABILITY RATING", y="IMPACT RATING", color="LEVEL", title="Mapa de Riesgos")
        st.plotly_chart(fig)
        if st.button("Guardar evaluación"):
            json_data = df.to_dict(orient="records")
            collection_evaluations.insert_one({"evaluation": json_data})
            st.success("Evaluación guardada.")

elif page == "Chat RAG":
    st.header("Chat con Asistente IA")
    query = st.text_input("Pregunta sobre seguridad:")
    if query:
        response = qa_chain.run(query)
        st.write(response)

elif page == "Historial":
    st.header("Historial de Evaluaciones")
    evaluations = list(collection_evaluations.find())
    for eval in evaluations:
        st.json(eval["evaluation"])

st.markdown("### Acerca de")
st.write("Cálculo basado en NIST SP 800-30r1, controles de ISO 27002 y NIST 800-53r5.")
'''

print("Código de la app Streamlit preparado. Copiar a app.py")

# Integración de Funcionalidades de Evaluación de Riesgos ISO 27001

In [None]:
# Funcionalidades integradas en el código de la app: carga de Excel, conversión a JSON, cálculo de Threat Score, clasificación de niveles, sugerencias de controles.

# Visualización de Resultados y Gráficos

In [None]:
# Gráficos integrados: mapa de calor con px.scatter, barras con px.bar, métricas con st.metric.

# Almacenamiento de Historial en MongoDB

In [None]:
# Historial almacenado en collection_evaluations de MongoDB, recuperado en página Historial.