In [12]:
import os
from dotenv import load_dotenv
import google.generativeai as genai
from google.api_core.exceptions import ResourceExhausted
import time
import json
from json import JSONDecodeError
import re
from tqdm import tqdm
import pandas as pd

In [46]:
def json_pd(nome_arquivo):
    """Lê um arquivo JSON e retorna um DataFrame com colunas 'Disease' e 'Symptom'."""
    try:
        with open(nome_arquivo, 'r', encoding='utf-8') as arquivo:
            dados = json.load(arquivo)

        rows = []
        for disease, symptoms in dados.items():  # Iterate over disease-symptoms pairs
            rows.append({"disease": disease, "symptom": symptoms})

        df = pd.DataFrame(rows)
        return df  # Return the DataFrame instead of the original dictionary

    except FileNotFoundError:
        print(f"Erro: Arquivo '{nome_arquivo}' não encontrado.")
        return None
    except json.JSONDecodeError:
        print(f"Erro: Arquivo '{nome_arquivo}' não é um JSON válido.")
        return None

In [58]:
list_files = ['diseases_symptoms_medline.json', 'diseases_symptoms_who.json', 'diseases_symptoms_gemini.json']

In [87]:
def merge_files(list_files = list_files):
    df1 = json_pd(list_files[0])
    df1['disease'] = df1['disease'].astype(str).str.lower().replace(r'\([^)]*\)', '', regex=True)
    df1['symptom'] = df1['symptom'].astype(str).str.lower().replace(r'\([^)]*\)', '', regex=True)  
    for file in list_files[1:]:  
        df2 = json_pd(file)
        df2['disease'] = df2['disease'].astype(str).str.lower().replace(r'\([^)]*\)', '', regex=True)  
        df2['symptom'] = df2['symptom'].astype(str).str.lower().replace(r'\([^)]*\)', '', regex=True) 
        df1 = pd.merge(df1, df2, on='disease', how='outer')
    return df1

In [88]:
df = merge_files(list_files)


In [89]:
df['symptoms'] = df['symptom_x'].fillna(df['symptom_y']).fillna(df['symptom'])
df = df.drop(columns=['symptom_x', 'symptom_y', 'symptom'])
df

Unnamed: 0,disease,symptoms
0,abortion,['incomplete abortion not all pregnancy tissue...
1,abscess,"['pain', 'swelling', 'redness', 'warmth', 'pus..."
2,abuse of older people,"['physical injuries', 'premature death', 'depr..."
3,acute bronchitis,"['cough', 'coughing up mucus phlegm', 'shortne..."
4,acute flaccid myelitis,"['arm or leg weakness', 'a loss of muscle tone..."
...,...,...
208,yaws,"['wartlike tumor filled with bacteria', 'ulcer..."
209,yeast infections,['itching and irritation in the vagina or surr...
210,yellow fever,"['days incubation', 'fever muscle pain headach..."
211,zika virus,"['often no symptoms', 'symptoms appear days a..."


In [107]:
# Carrega as variáveis de ambiente do arquivo .env
load_dotenv()

# Acessa a chave de API do Gemini
GEMINI_API_KEY  = os.getenv("GEMINI_API_KEY")
genai.configure(api_key=GEMINI_API_KEY)

# Define o modelo
generation_config = {
  "temperature": 1,
}

safety_settings={
    'HATE': 'BLOCK_NONE',
    'HARASSMENT': 'BLOCK_NONE',
    'SEXUAL' : 'BLOCK_NONE',
    'DANGEROUS' : 'BLOCK_NONE'
    }


In [108]:
model_diseases = genai.GenerativeModel(model_name="gemini-1.0-pro",      
                              generation_config=generation_config,
                              safety_settings=safety_settings)

In [120]:
df.disease.iloc[210]

'yellow fever'

In [121]:
prompt = f"""traduza para portugues Brasil {df.disease.iloc[210]}"""
resposta = model_diseases.generate_content(prompt)
match =  resposta.text

In [122]:
match

'Febre amarela'

In [None]:
df = merge_files(list_files)
