In [29]:
#Import Libraries 
import pandas as pd
import numpy as np
import requests

#Contruct Dataframe from sample data
df = pd.read_csv('data\provider_npi_list.tsv', sep='\t')

In [30]:
#Total Number of Claims per Laboratory
claims_per_lab = df.groupby(by=['laboratory'])['claim_id_count'].sum()
claims_per_lab

laboratory
Lab A    9936
Lab B    3334
Lab C      58
Lab D    2895
Lab E       7
Lab F     108
Lab G      58
Lab H     200
Lab I      54
Lab J      51
Name: claim_id_count, dtype: int64

In [33]:
#Top 25 Ordering Providers
top_25_ordering_providers = df.nlargest(25,['claim_id_count'])
top_25_ordering_providers

Unnamed: 0,laboratory,provider_npi,claim_id_count
0,Lab A,1497727150,465
1,Lab A,1285050062,443
2,Lab A,1194180562,380
3,Lab A,1952636714,372
4,Lab A,1639593874,337
5,Lab A,1053973099,273
6,Lab A,1861429482,269
7,Lab A,1053309286,246
8,Lab A,1902018757,239
9,Lab A,1104379403,228


In [34]:
#Contructing API Query for Name

#Making copy of existing dataframe 
df_name_lookup = top_25_ordering_providers.copy()

def search_name_from_NPI(NPI):
    
    #Constructing URL to be used for Searching
    base_url = "https://npiregistry.cms.hhs.gov/api/"
    endpoint = f"{base_url}?number={NPI}&enumeration_type=&taxonomy_description=&first_name=&last_name=&organization_name=&address_purpose=&city=&state=&postal_code=&country_code=&limit=&skip=&version=2.0"
    
    #Checking if we get a successful status code
    r = requests.get(endpoint)
    if r.status_code not in range(200, 299):
        return None
    try:
        '''
        Try block in case any of our inputs are invalid. This is done instead
        '''
        results = r.json()['results'][0]
        name = results['basic']['name']
    except:
        pass
    return name

#Function to be applied to each row 
def enrich_with_nppes_api_name(row):
    npi_value = row['provider_npi']
    name = search_name_from_NPI(npi_value)
    return name

df_name_lookup['name'] = df_names.apply(enrich_with_nppes_api_name, axis=1)
df_names = df_name_lookup[['name', 'provider_npi']].copy()

#Top 25 Providers by Name and NPI
df_names

Unnamed: 0,name,provider_npi
0,BLUMENSTEIN BETH,1497727150
1,TIMMONS KRISTINA,1285050062
2,CODY JANICE,1194180562
3,LEWIS MEGAN,1952636714
4,FARLEIGH CHRISTINA,1639593874
5,KANEHL BRIANNE,1053973099
6,LANGELIERS TYSON,1861429482
7,DELEON CLARA,1053309286
8,DELAMARTER KRISTINE,1902018757
9,VOLK KATHERINE,1104379403


In [35]:
#Making copy of existing dataframe 
df_names_and_addresses = df_names.copy()

def search_address_from_NPI(NPI):
    #Constructing URL to be used for Searching
    base_url = "https://npiregistry.cms.hhs.gov/api/"
    endpoint = f"{base_url}?number={NPI}&enumeration_type=&taxonomy_description=&first_name=&last_name=&organization_name=&address_purpose=&city=&state=&postal_code=&country_code=&limit=&skip=&version=2.0"
    
    #Checking if we get a successful status code
    r = requests.get(endpoint)
    if r.status_code not in range(200, 299):
        return None, None, None, None, None, None
    
    try:
        '''
        Try block incase any of our inputs are invalid.
        '''
        results = r.json()['results'][0]
        
        address_1 = results['addresses'][0]['address_1']
        address_2 = results['addresses'][0]['address_2']
        state = results['addresses'][0]['state']
        city = results['addresses'][0]['city']       
        postal_code = results['addresses'][0]['postal_code']
        country = results['addresses'][0]['country_name']
        
    except:
        pass
    
    return address_1, address_2, state, city, postal_code, country

def enrich_with_nppes_api_address(row):
    npi_value = row['provider_npi']
    address_1, address_2, state, city, postal_code, country = search_address_from_NPI(npi_value)
    return address_1, address_2, state, city, postal_code, country


df_names_and_addresses[['address_1', 'address_2', 'state', 'city', 'postal_code', 'country']] = df_names_and_addresses.apply(enrich_with_nppes_api_address, axis=1, result_type='expand')
df_names_and_addresses

Unnamed: 0,name,provider_npi,address_1,address_2,state,city,postal_code,country
0,BLUMENSTEIN BETH,1497727150,51600 HUNTINGTON RD,,OR,LA PINE,977398887,United States
1,TIMMONS KRISTINA,1285050062,52726 AMMON RD,,OR,LA PINE,977397659,United States
2,CODY JANICE,1194180562,51600 HUNTINGTON RD,,OR,LA PINE,977398887,United States
3,LEWIS MEGAN,1952636714,51600 HUNTINGTON RD,,OR,LA PINE,97739,United States
4,FARLEIGH CHRISTINA,1639593874,51600 HUNTINGTON RD,,OR,LA PINE,977398887,United States
5,KANEHL BRIANNE,1053973099,51600 HUNTINGTON RD,,OR,LA PINE,977398887,United States
6,LANGELIERS TYSON,1861429482,51600 HUNTINGTON RD,,OR,LA PINE,97739,United States
7,DELEON CLARA,1053309286,1750 12TH ST,,OR,HOOD RIVER,97031,United States
8,DELAMARTER KRISTINE,1902018757,76 NE 12TH ST,,OR,MADRAS,977411827,United States
9,VOLK KATHERINE,1104379403,76 NE 12TH ST,,OR,MADRAS,977411827,United States


In [36]:
#Query Database to Find Facility Affiliation

#Using Web Driver Library to Access Data Online and Incorporate Into DataFrame
from selenium import webdriver
import time

#Making copy of existing dataframe 
df_hospital = df_names_and_addresses.copy()

def search_hospital_from_NPI(NPI):
    try:
        #Constructing URL to be used for Searching
        base_url = "https://npidb.org/npi-lookup/"
        endpoint = f"{base_url}?npi={NPI}&fname=&lname=&state=&sound=0&search=&state=&org="
        
        #Browser set to find my .exe file, executable_path would need to be adjusted based on computer
        browser = webdriver.Firefox(executable_path=r"C:\Users\Noah\anaconda3\geckodriver-v0.27.0-win64\geckodriver.exe")
        browser.get(endpoint)
        
        #Searching for NPI <a> tag and clicking to go to next page
        link = browser.find_element_by_link_text(str(NPI))
        link.click()
        
        #Letting Page Load before Grabbing Data
        time.sleep(3)
        
        #Grabbing Data from end of table
        facility = browser.find_element_by_xpath("/html/body/div[4]/div[2]/div[9]/div/div/div[2]/div[1]/div[1]/table/tbody/tr[9]/td[2]/ul/li/address/strong").text
        browser.close()
        return facility
    
    except:
        browser.close()
        return None

def enrich_with_nppes_api_hospital(row):
    npi_value = row['provider_npi']
    facility = search_hospital_from_NPI(npi_value)
    return facility

df_hospital['hospital'] = df_hospital.apply(enrich_with_nppes_api_hospital, axis=1)
df_hospital

Unnamed: 0,name,provider_npi,address_1,address_2,state,city,postal_code,country,hospital
0,BLUMENSTEIN BETH,1497727150,51600 HUNTINGTON RD,,OR,LA PINE,977398887,United States,ST CHARLES MEDICAL CENTER - BEND
1,TIMMONS KRISTINA,1285050062,52726 AMMON RD,,OR,LA PINE,977397659,United States,ST CHARLES MEDICAL CENTER - BEND
2,CODY JANICE,1194180562,51600 HUNTINGTON RD,,OR,LA PINE,977398887,United States,ST CHARLES MEDICAL CENTER - BEND
3,LEWIS MEGAN,1952636714,51600 HUNTINGTON RD,,OR,LA PINE,97739,United States,
4,FARLEIGH CHRISTINA,1639593874,51600 HUNTINGTON RD,,OR,LA PINE,977398887,United States,
5,KANEHL BRIANNE,1053973099,51600 HUNTINGTON RD,,OR,LA PINE,977398887,United States,
6,LANGELIERS TYSON,1861429482,51600 HUNTINGTON RD,,OR,LA PINE,97739,United States,
7,DELEON CLARA,1053309286,1750 12TH ST,,OR,HOOD RIVER,97031,United States,MID-COLUMBIA MEDICAL CENTER
8,DELAMARTER KRISTINE,1902018757,76 NE 12TH ST,,OR,MADRAS,977411827,United States,MOUNTAIN VIEW HOSPITAL DISTRICT
9,VOLK KATHERINE,1104379403,76 NE 12TH ST,,OR,MADRAS,977411827,United States,MOUNTAIN VIEW HOSPITAL DISTRICT


In [37]:
import os

os.system('jupyter nbconvert --to html Reliance_Technical_Challenge.ipynb')

0