In [None]:
import requests
from bs4 import BeautifulSoup
import google.generativeai as genai
from google.colab import drive
import os

# Mount Google Drive
drive.mount('/content/drive')

# Function to scrape content from a URL
def scrape_content(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        return soup.get_text()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching {url}: {e}")
        return ""

# Function to generate an answer using the Gemini API
def generate_answer(prompt, scraped_content, gemini_key):
    combined_content = f"{prompt}\n\n{scraped_content}"

    if not gemini_key:
        raise ValueError("API key for Gemini is not set. Please set the GEMINI_API_KEY environment variable.")

    # Configure the Gemini API
    genai.configure(api_key=gemini_key)

    # Configuration of the model
    generation_config = {
        "temperature": 0.9,
        "top_p": 1,
        "top_k": 5,
        "max_output_tokens": 2048,
    }

    # Call the generative model with the configured settings
    model = genai.GenerativeModel(model_name="gemini-1.0-pro", generation_config=generation_config)
    response = model.generate_content(combined_content)

    # Extract the text from the response
    if response.candidates and len(response.candidates) > 0:
        # Extract the text from the first candidate
        generated_text = response.candidates[0].content.parts[0].text  # Adjust based on the structure
    else:
        generated_text = "No content generated."

    return generated_text

# Function to save the generated text to a text file
def save_to_text_file(generated_text, filename='rcm.txt'):
    filepath = os.path.join('/content/drive/MyDrive/', filename)
    with open(filepath, 'w', encoding='utf-8') as textfile:
        textfile.write(generated_text)

# Main function to process input and generate the output
def main(prompt, urls):
    gemini_key = "AIzaSyD6xlVBvZZvHjd6DCOUkdRDm4JpNWh9Ma0"  # Replace with your actual API key
    scraped_content = ""
    for url in urls:
        print(f"Scraping content from {url}...")
        scraped_content += scrape_content(url) + "\n\n"

    if not scraped_content.strip():
        print("No content scraped from the provided URLs.")
        return

    for i in range(10):
        try:
            generated_text = generate_answer(prompt, scraped_content, gemini_key)
            if "No content generated" in generated_text:
                raise ValueError("Content generation failed.")
            print(f"Generated Answer for iteration {i + 1}:")
            print(generated_text)
            save_to_text_file(generated_text, filename=f'rcm_{i + 10001}.txt')
        except Exception as e:
            print(f"Skipping iteration {i + 10001} due to error: {e}")

if __name__ == "__main__":
    # Example usage
    input_prompt = """You are a cybersecurity expert specializing in cyber threat intelligence. Given a list of CVE identifiers, your task is to retrieve detailed descriptions of each vulnerability from reputable sources.
     These descriptions will be used to map the CVE to the appropriate CWE (Common Weakness Enumeration) entries.

Follow these requirements:

Source Selection: Use the specified URLs to gather accurate and comprehensive descriptions.

Description Requirements: For each CVE, gather the following:

CVE ID: The unique identifier for the vulnerability.
Description: A detailed explanation of the vulnerability, including nature, potential impact, and affected systems.
CWE Mapping: The CWE identifier, if available.
References: Relevant links or document titles.
Output Format: Return the output in text format with the following fields:

CVE ID
Description
CWE Mapping
References
Important: Only return the text content as specified. Do not include any additional text or commentary outside the provided fields. """

input_urls = [
    "https://nvd.nist.gov",
    "https://cve.mitre.org",
    "https://www.tenable.com/cve",
    "https://www.qualys.com/research/singularity",
    "https://krebsonsecurity.com",
    "https://www.securityweek.com",
    "https://osvdb.org"
]

main(input_prompt, input_urls)

