In [1]:
import os
from dotenv import load_dotenv
import google.generativeai as genai

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import time
import json
import csv
import re

In [3]:
# Carrega as variáveis de ambiente do arquivo .env
load_dotenv()
# Acessa a chave de API do Gemini
GEMINI_API_KEY  = os.getenv("GEMINI_API_KEY")
genai.configure(api_key=GEMINI_API_KEY)

In [4]:
generation_config = {
  "temperature": 0.4,
}
safety_settings={
    'HATE': 'BLOCK_NONE',
    'HARASSMENT': 'BLOCK_NONE',
    'SEXUAL' : 'BLOCK_NONE',
    'DANGEROUS' : 'BLOCK_NONE'
    }
model = genai.GenerativeModel(model_name='gemini-1.5-flash',
                                  generation_config=generation_config,
                                  safety_settings=safety_settings,)


In [5]:
with open('filtered_diseases_symptoms_Medlineplus.json', 'r') as arquivo:
    diseases_symptoms = json.load(arquivo)
    
next(iter(diseases_symptoms.items()))

('HIV',
 ['Fever',
  'Chills',
  'Rash',
  'Night sweats (heavy sweating during sleep)',
  'Muscle aches',
  'Sore throat',
  'Fatigue',
  'Swollen lymph nodes',
  'Mouth ulcers'])

In [6]:
# Abrir e ler o arquivo JSON
with open('empty_symptoms_keys_Medlineplus.json', mode='r', encoding='utf-8') as file:
    diseases = json.load(file)
diseases[:5]

['Aspergillosis',
 'Abscess',
 'Acute Bronchitis',
 'Viral Infections',
 'Vaccines']

In [7]:
examples = []

for key, values in diseases_symptoms.items():
    for value in values:
        input_text = f'What are the symptoms of {key}?'
        output_text = values 
        examples.append({"input": input_text, "output": output_text})

examples[0]

{'input': 'What are the symptoms of HIV?',
 'output': ['Fever',
  'Chills',
  'Rash',
  'Night sweats (heavy sweating during sleep)',
  'Muscle aches',
  'Sore throat',
  'Fatigue',
  'Swollen lymph nodes',
  'Mouth ulcers']}

In [8]:
diseases_symptoms_new = {}
for disease in diseases[:2]:  # Check if the disease exists
    # Cria um prompt para o modelo generativo
    prompt = f'What are the symptoms of {disease}? Summarize the symptoms in just bulletpoints, without titles, about health symptoms with few words.'
    # Chama o modelo generativo para gerar um resumo
    response = model.generate_content(prompt)
    symptoms_final = re.sub(r"[^a-zA-Z\s]", "", response.text)
    diseases_symptoms_new[disease] = [item.strip() for item in symptoms_final.split('\n') if item.strip()]
    time.sleep(2)

In [9]:
# Salvar o dicionário filtrado em um arquivo JSON
with open("diseases_symptoms_medline_gemini.json", "w") as json_file:
    json.dump(diseases_symptoms_new, json_file, indent=4)