In [2]:
from pyspark.sql import SparkSession
import pandas as pd
import requests, json, os, re

from datetime import datetime

from pyspark.sql import functions as F
from pyspark.sql.types import StructField, StringType, ArrayType, StructType, DateType
from pyspark.sql.functions import regexp_replace

In [3]:
spark = SparkSession.builder.appName("desafiohl7").master("local[*]").getOrCreate()

In [4]:
url = "https://github.com/wandersondsm/teste_fhir/raw/refs/heads/main/data/patients.csv"

In [5]:
# Função para baixar um arquivo
def download_file(url, folder):
    try:
        # Extrai o nome do arquivo da URL
        file_name = os.path.join(folder, url.split("/")[-1])
        
        # Faz a requisição HTTP para baixar o arquivo
        response = requests.get(url, stream=True)
        response.raise_for_status()  # Lança exceção se houver erro HTTP
        
        # Salva o arquivo na pasta especificada
        with open(file_name, 'wb') as file:
            for chunk in response.iter_content(chunk_size=8192):
                file.write(chunk)
        
        print(f"Download concluído: {file_name}")
    
    except requests.exceptions.RequestException as e:
        print(f"Falha ao baixar {url}: {e}")

In [6]:
# Pasta onde os arquivos serão salvos
download_folder = "data"

if not os.path.exists(download_folder):
    os.makedirs(download_folder)

# for url in lista:
download_file(url, download_folder)

Download concluído: data/patients.csv


In [7]:
df = spark.read.format("csv").option("encoding", "ISO-8859-1").option("header", True).option("inferSchema", True).load('data/patients.csv')

In [8]:
csv_colunas = [colunas.replace(" ", "_").lower() for colunas in df.columns]
df = df.toDF(*csv_colunas)

In [9]:
df.printSchema()

root
 |-- nome: string (nullable = true)
 |-- cpf: string (nullable = true)
 |-- gênero: string (nullable = true)
 |-- data_de_nascimento: string (nullable = true)
 |-- telefone: string (nullable = true)
 |-- país_de_nascimento: string (nullable = true)
 |-- observação: string (nullable = true)



In [10]:
df = df.withColumnRenamed("gênero", "genero")
df = df.withColumnRenamed("país_de_nascimento", "pais_de_nascimento")
df = df.withColumnRenamed("observação", "observacao")

In [11]:
df.createOrReplaceTempView('pacientes')

In [12]:
# df.withColumn("data_de_nascimento", DateType())
df = spark.sql("SELECT CONCAT_WS('-', REVERSE(SPLIT(data_de_nascimento, '/'))) AS data_de_nascimento, SPLIT_PART(nome, ' ', -1) AS family, nome, cpf, \
        CASE WHEN genero == 'Masculino' THEN 'male' WHEN genero == 'Feminino' THEN 'female' END AS genero, telefone, pais_de_nascimento, observacao FROM pacientes")
#withColumn("data_de_nascimento", DateType())

In [13]:
# df = spark.sql("SELECT , * FROM pacientes")

In [14]:
df.show()

+------------------+--------+----------------+--------------+------+--------------+------------------+--------------------+
|data_de_nascimento|  family|            nome|           cpf|genero|      telefone|pais_de_nascimento|          observacao|
+------------------+--------+----------------+--------------+------+--------------+------------------+--------------------+
|        1980-05-10|   Silva|   João da Silva|123.456.789-00|  male|(11) 1234-5678|            Brasil|                NULL|
|        1992-08-15|   Souza|     Maria Souza|987.654.321-01|female|(21) 9876-5432|            Brasil|            Gestante|
|        1975-12-03|Oliveira|  Pedro Oliveira|456.789.123-02|  male|(31) 4567-8901|            Brasil|                NULL|
|        1988-06-20|  Santos|      Ana Santos|789.123.456-03|female|(41) 7890-1234|            Brasil|  Gestante|Diabético|
|        1995-09-25|   Costa|    Carlos Costa|234.567.890-04|  male|(51) 2345-6789|            Brasil|                NULL|
|       

In [15]:
df

DataFrame[data_de_nascimento: string, family: string, nome: string, cpf: string, genero: string, telefone: string, pais_de_nascimento: string, observacao: string]

In [16]:
# spark.sql("SELECT SPLIT_PART(nome, ' ', -1) AS family FROM pacientes")
# spark.sql("SELECT SPLIT_PART(nome, ' ', -1) AS family FROM pacientes").show()

In [17]:
# spark.sql("SELECT SPLIT_PART(nome, ' ', -1) AS family FROM pacientes")

In [18]:
# The data you want to send, represented as a Python dictionary

lista = []
# (nome, family, cpf, gênero, data_de_nascimento, telefone, país_de_nascimento, observação)
for coluna in df.toPandas().itertuples():
    # print(coluna.family, coluna.nome, coluna.cpf)
    payload = {
      "resourceType": "Patient",
      "identifier": [
        {
          "use": "official",
          "type": {
            "coding": [
              {
                "system": "http://terminology.hl7.org/CodeSystem/v2-0203",
                "code": "MR",
                "display": "Medical Record Number"
              }
            ]
          },
          "system": "http://hospital.saude/exemplo/patient",
          "value": "123456"
        }
      ],
      "active": "true",
      "name": [
        {
          "use": "official",
          "family": coluna.family,
          "given": [
            coluna.nome
          ]
        }
      ],
      "telecom": [
        {
          "system": "phone",
          "value": coluna.telefone,
          "use": "mobile"
        },
        {
          "system": "email",
          "value": "example.silva@email.com",
          "use": "home"
        }
      ],
      "gender": coluna.genero,
      "birthDate": coluna.data_de_nascimento,
        "customObservations": [
            {
              "code": {
                "coding": [
                  {
                    "system": "http://loinc.org",
                    "code": "8480-6",
                    "display": f"{coluna.observacao}"
                  }
                ]
              },
              "value": {
                "value": f"{coluna.observacao}",
                "unit": "mm[Hg]",
                "system": "http://unitsofmeasure.org",
                "code": "mm[Hg]"
              },
              "dateRecorded": f"{datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ')}"
            }
        ],
      "address": [
        {
          "use": "home",
          "type": "physical",
          "line": [
            "Rua das Flores, 123"
          ],
          "city": "São Paulo",
          "state": "SP",
          "postalCode": "01234-567",
          "country": coluna.pais_de_nascimento
        }
      ],
      "maritalStatus": {
        "coding": [
          {
            "system": "http://terminology.hl7.org/CodeSystem/v3-MaritalStatus",
            "code": "M",
            "display": "Married"
          }
        ]
      }
    }
    lista.append(json.dumps(payload))

In [19]:
datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")

'2025-11-14T23:43:02Z'

In [20]:
lista[2]

'{"resourceType": "Patient", "identifier": [{"use": "official", "type": {"coding": [{"system": "http://terminology.hl7.org/CodeSystem/v2-0203", "code": "MR", "display": "Medical Record Number"}]}, "system": "http://hospital.saude/exemplo/patient", "value": "123456"}], "active": "true", "name": [{"use": "official", "family": "Oliveira", "given": ["Pedro Oliveira"]}], "telecom": [{"system": "phone", "value": "(31) 4567-8901", "use": "mobile"}, {"system": "email", "value": "example.silva@email.com", "use": "home"}], "gender": "male", "birthDate": "1975-12-03", "customObservations": [{"code": {"coding": [{"system": "http://loinc.org", "code": "8480-6", "display": "None"}]}, "value": {"value": "None", "unit": "mm[Hg]", "system": "http://unitsofmeasure.org", "code": "mm[Hg]"}, "dateRecorded": "2025-11-14T23:43:02Z"}], "address": [{"use": "home", "type": "physical", "line": ["Rua das Flores, 123"], "city": "S\\u00e3o Paulo", "state": "SP", "postalCode": "01234-567", "country": "Brasil"}], "ma

In [21]:
json.loads(json.dumps(lista[2], indent=4))

'{"resourceType": "Patient", "identifier": [{"use": "official", "type": {"coding": [{"system": "http://terminology.hl7.org/CodeSystem/v2-0203", "code": "MR", "display": "Medical Record Number"}]}, "system": "http://hospital.saude/exemplo/patient", "value": "123456"}], "active": "true", "name": [{"use": "official", "family": "Oliveira", "given": ["Pedro Oliveira"]}], "telecom": [{"system": "phone", "value": "(31) 4567-8901", "use": "mobile"}, {"system": "email", "value": "example.silva@email.com", "use": "home"}], "gender": "male", "birthDate": "1975-12-03", "customObservations": [{"code": {"coding": [{"system": "http://loinc.org", "code": "8480-6", "display": "None"}]}, "value": {"value": "None", "unit": "mm[Hg]", "system": "http://unitsofmeasure.org", "code": "mm[Hg]"}, "dateRecorded": "2025-11-14T23:43:02Z"}], "address": [{"use": "home", "type": "physical", "line": ["Rua das Flores, 123"], "city": "S\\u00e3o Paulo", "state": "SP", "postalCode": "01234-567", "country": "Brasil"}], "ma

In [22]:
# The URL endpoint you want to post the JSON data to
# url_api = "http://172.19.0.4:8090/baseR4/Patient"
url_api = "http://localhost:8090/fhir/Patient"

In [23]:
# The headers required by the FHIR server
headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36',
    'accept': 'application/fhir+json',
    'Content-Type': 'application/fhir+json'
}

In [24]:
def requisicao_fhir(url_api, json, headers):
    try:
        # Use the 'json=' parameter to send the dictionary as JSON
        response = requests.post(url_api, json=payload, headers=headers)
    
        # Check for a successful response (status code 2xx)
        response.raise_for_status() 
    
        print(f"Status Code: {response.status_code}")
        
        # If the server responds with JSON, you can access it as a Python dictionary
        print("Response JSON:")
        print(response.json())
    
    except requests.exceptions.RequestException as e:
        print(f"An error occurred: {e}")

In [25]:
requisicao_fhir(url_api, lista[2], headers)

An error occurred: HTTPConnectionPool(host='localhost', port=8090): Max retries exceeded with url: /fhir/Patient (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7d73ecb0ca50>: Failed to establish a new connection: [Errno 111] Connection refused'))


In [26]:
r = requests.get('https://google.com')
r.status_code

200

In [29]:
# r = requests.get('http://localhost:8090/fhir/Patient/1?_format=json')
# r = requests.get('http://localhost:8090/fhir/Patient/1')
# r = requests.get('http://127.0.0.1:8090/fhir/Patient/1')
# r.status_code

In [30]:
!man curl

This system has been minimized by removing packages and content that are
not required on a system that users do not log into.

To restore this content, including manpages, you can run the 'unminimize'
command. You will still need to ensure the 'man-db' package is installed.


In [None]:
!apt-get install curl

In [None]:
import subprocess
import json
import shlex

# The original command arguments (extracted from your cURL POST request)
URL = 'http://localhost:8090/fhir/Patient'
DATA_PAYLOAD = '{"resourceType": "Patient", "identifier": [{"use": "official", "type": {"coding": [{"system": "http://terminology.hl7.org/CodeSystem/v2-0203", "code": "MR", "display": "Medical Record Number"}]}, "system": "http://hospital.saude/exemplo/patient", "value": "123456"}], "active": "true", "name": [{"use": "official", "family": "Oliveira", "given": ["Pedro Oliveira"]}], "telecom": [{"system": "phone", "value": "(31) 4567-8901", "use": "mobile"}, {"system": "email", "value": "example.silva@email.com", "use": "home"}], "gender": "male", "birthDate": "1975-12-03", "customObservations": [{"code": {"coding": [{"system": "http://loinc.org", "code": "8480-6", "display": "None"}]}, "value": {"value": "None", "unit": "mm[Hg]", "system": "http://unitsofmeasure.org", "code": "mm[Hg]"}, "dateRecorded": "2025-11-14T21:56:24Z"}], "address": [{"use": "home", "type": "physical", "line": ["Rua das Flores, 123"], "city": "São Paulo", "state": "SP", "postalCode": "01234-567", "country": "Brasil"}], "maritalStatus": {"coding": [{"system": "http://terminology.hl7.org/CodeSystem/v3-MaritalStatus", "code": "M", "display": "Married"}]}}'

# Construct the command list
curl_command = [
    'curl',
    '-X', 'POST',
    URL,
    '-H', 'accept: application/fhir+json',
    '-H', 'Content-Type: application/fhir+json',
    '-d', DATA_PAYLOAD
]

print("Executing command...")

try:
    # Execute the command
    result = subprocess.run(
        curl_command, 
        capture_output=True, 
        text=True, 
        check=True # Raise an exception for non-zero exit codes (errors)
    )

    # Print the output
    print("--- cURL Output ---")
    print("Status: Success")
    print("Stdout:")
    
    # Attempt to pretty-print the JSON response body
    try:
        response_json = json.loads(result.stdout)
        print(json.dumps(response_json, indent=4))
    except json.JSONDecodeError:
        print(result.stdout)
        
except subprocess.CalledProcessError as e:
    print(f"--- cURL Error (Exit Code {e.returncode}) ---")
    print("Stderr:")
    print(e.stderr)
except FileNotFoundError:
    print("Error: The 'curl' command was not found. Ensure cURL is installed and in your system's PATH.")