In [0]:
from datetime import date
import requests
from pyspark.sql import SparkSession

# No need to import or initialize SparkSession in Databricks notebooks
# from pyspark.sql import SparkSession

# Use date.today() to get the current date in a format that Spark can handle
current_date = date.today()

# Initialize Spark session
spark = SparkSession.builder.appName("NPI Data").getOrCreate()

# Base URL for the NPI Registry API
base_url = "https://npiregistry.cms.hhs.gov/api/"

# Defining the parameters for the initial API request to get a list of NPIs
params = {
    "version": "2.1",  # API version
    "state": "CA",  # Example state, replace with desired state or other criteria
    "city": "Los Angeles",  # Example city, replace with desired city
    "limit": 20,  # Limit the number of results for demonstration purposes
}

# Make the initial API request to get a list of NPIs
response = requests.get(base_url, params=params)

# Check if the request was successful
if response.status_code == 200:
    npi_data = response.json()
    npi_list = [result["number"] for result in npi_data.get("results", [])]

    # Initialize a list to store detailed NPI information
    detailed_results = []

    # Loop through each NPI to get their details
    for npi in npi_list:
        detail_params = {"version": "2.1", "number": npi}
        detail_response = requests.get(base_url, params=detail_params)

        if detail_response.status_code == 200:
            detail_data = detail_response.json()
            if "results" in detail_data and detail_data["results"]:
                for result in detail_data["results"]:
                    npi_number = result.get("number")
                    basic_info = result.get("basic", {})
                    if result["enumeration_type"] == "NPI-1":
                        fname = basic_info.get("first_name", "")
                        lname = basic_info.get("last_name", "")
                    else:
                        fname = basic_info.get("authorized_official_first_name", "")
                        lname = basic_info.get("authorized_official_last_name", "")
                    position = (
                        basic_info.get("authorized_official_title_or_position", "")
                        if "authorized_official_title_or_position" in basic_info
                        else ""
                    )
                    organisation = basic_info.get("organization_name", "")
                    last_updated = basic_info.get("last_updated", "")
                    detailed_results.append(
                        {
                            "npi_id": npi_number,
                            "first_name": fname,
                            "last_name": lname,
                            "position": position,
                            "organisation_name": organisation,
                            "last_updated": last_updated,
                            "refreshed_at": current_date,
                        }
                    )

    # Create a DataFrame
    if detailed_results:
        print(detailed_results)
        df = spark.createDataFrame(detailed_results)
        display(df)
        df.write.format("parquet").mode("overwrite").save("/mnt/bronze/npi_extract/")
        df.write.format("delta").mode("overwrite").saveAsTable("npi_extract")

    else:
        print("No detailed results found.")
else:
    print(f"Failed to fetch data: {response.status_code} - {response.text}")
     

[{'npi_id': '1487344586', 'first_name': 'MANEESH', 'last_name': 'SINGHAL', 'position': 'CEO/ Medical Director', 'organisation_name': '1 FAMILY CLINIC INC', 'last_updated': '2023-05-11', 'refreshed_at': datetime.date(2025, 7, 29)}, {'npi_id': '1417720848', 'first_name': 'DARCY', 'last_name': 'JOHNSTON', 'position': 'Director of Revenue Cycle Managment', 'organisation_name': '1 METHOD CORP', 'last_updated': '2024-11-13', 'refreshed_at': datetime.date(2025, 7, 29)}, {'npi_id': '1295509628', 'first_name': 'DARCY', 'last_name': 'JOHNSTON', 'position': 'Director of Revenue Cycle Mgmnt', 'organisation_name': '1 METHOD CORP', 'last_updated': '2024-11-13', 'refreshed_at': datetime.date(2025, 7, 29)}, {'npi_id': '1619741071', 'first_name': 'DARCY', 'last_name': 'JOHNSTON', 'position': 'Director of Revenue Cycle Mgmnt', 'organisation_name': '1 METHOD CORP', 'last_updated': '2024-11-13', 'refreshed_at': datetime.date(2025, 7, 29)}, {'npi_id': '1881083566', 'first_name': 'CASSIDY', 'last_name': 'CO

first_name,last_name,last_updated,npi_id,organisation_name,position,refreshed_at
MANEESH,SINGHAL,2023-05-11,1487344586,1 FAMILY CLINIC INC,CEO/ Medical Director,2025-07-29
DARCY,JOHNSTON,2024-11-13,1417720848,1 METHOD CORP,Director of Revenue Cycle Managment,2025-07-29
DARCY,JOHNSTON,2024-11-13,1295509628,1 METHOD CORP,Director of Revenue Cycle Mgmnt,2025-07-29
DARCY,JOHNSTON,2024-11-13,1619741071,1 METHOD CORP,Director of Revenue Cycle Mgmnt,2025-07-29
CASSIDY,COUSENS,2015-01-18,1881083566,"1 METHOD, LLC",Program Administrator,2025-07-29
MICHAEL,YERIKYAN,2024-03-20,1396505947,"1 STOP HOME HEALTH, INC.",OWNER,2025-07-29
MICHAEL,YERIKYAN,2021-10-15,1689337768,"1 STOP HOSPICE AND PALLIATIVE CARE, INC.",OWNER,2025-07-29
KEVIN,CHOE,2011-07-12,1265620926,10 BODY TYPE ACUPUNCTURE CLINIC,President,2025-07-29
BAHRAM,PARSA,2009-03-27,1194834028,100 PLAZA CLINICAL LAB INC,DIRECTOR,2025-07-29
JOSHUA,ALOMIA,2024-07-16,1114580677,101 DANCE CENTER LLC,COO,2025-07-29
