In [1]:
import csv

import math
import numpy as np
import pandas as pd

import psycopg2

In [2]:
#
# function to run a select query and return rows in a pandas dataframe
# pandas puts all numeric values from postgres to float
# if it will fit in an integer, change it to integer
#

def my_select_query_pandas(query, rollback_before_flag, rollback_after_flag):
    "function to run a select query and return rows in a pandas dataframe"
    
    if rollback_before_flag:
        connection.rollback()
    
    df = pd.read_sql_query(query, connection)
    
    if rollback_after_flag:
        connection.rollback()
    
    # fix the float columns that really should be integers
    
    for column in df:
    
        if df[column].dtype == "float64":

            fraction_flag = False

            for value in df[column].values:
                
                if not np.isnan(value):
                    if value - math.floor(value) != 0:
                        fraction_flag = True

            if not fraction_flag:
                df[column] = df[column].astype('Int64')
    
    return(df)

In [3]:
connection = psycopg2.connect(
    user = "postgres",
    password = "ucb",
    host = "postgres",
    port = "5432",
    database = "postgres"
)

In [4]:
cursor = connection.cursor()

In [5]:
def my_read_csv_file(file_name, limit):
    "read the csv file and print only the first limit rows"
    
    csv_file = open(file_name, "r")
    
    csv_data = csv.reader(csv_file)
    
    i = 0
    
    for row in csv_data:
        i += 1
        if i <= limit:
            print(row)
            
    print("\nPrinted ", min(limit, i), "lines of ", i, "total lines.")

### The plan is to build the providers table which is all Dermatology providers in 2022 that got payments from Medicare system 

In [6]:
connection.rollback()

query = """

drop table if exists providers;

"""

cursor.execute(query)

connection.commit()

In [7]:
connection.rollback()


query = """

create table providers (
  provider_npi varchar(32),
  provider_last_name varchar(32),
  provider_first_name varchar(32),
  provider_credentials varchar(32),
  provider_gender varchar(2),
  provider_state varchar(2),
  provider_zip varchar(32),
  primary key (provider_npi)
);

"""

cursor.execute(query)

connection.commit()

In [8]:
connection.rollback()

query = """

copy providers
from '/user/projects/project-3-ss3382/medicare_data_csv/providers.csv' delimiter ',' NULL '' csv header;

"""

cursor.execute(query)

connection.commit()

In [9]:
rollback_before_flag = True
rollback_after_flag = True

query = """

select 

count(distinct provider_npi)
from providers
where provider_state = 'FL'


"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

Unnamed: 0,count
0,994


## Building Drug Table


In [10]:
connection.rollback()

query = """

drop table if exists drugs;

"""

cursor.execute(query)

connection.commit()

In [11]:
connection.rollback()


query = """

create table drugs (
  drug_code varchar(32),
  drug_description text,
  if_drugg boolean,
  primary key (drug_code)
);

"""

cursor.execute(query)

connection.commit()

In [12]:
connection.rollback()

query = """

copy drugs
from '/user/projects/project-3-ss3382/medicare_data_csv/drugs.csv' delimiter ',' NULL '' csv header;

"""

cursor.execute(query)

connection.commit()

In [13]:
rollback_before_flag = True
rollback_after_flag = True

query = """

with report as (
select 
r.provider_npi,
r.code,
r.total_amount_paid as weight
from services_rendered r
     inner join (
         select provider_npi
            from providers
            where provider_zip = '77304') k on k.provider_npi = r.provider_npi
)

select distinct  d.drug_code
from drugs d
    inner join report r on r.code = d.drug_code


"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

Unnamed: 0,drug_code
0,J3301


## Building procedure table


In [14]:
connection.rollback()

query = """

drop table if exists procedures;

"""

cursor.execute(query)

connection.commit()

In [15]:
connection.rollback()


query = """

create table procedures (
  procedure_code varchar(32),
  procedure_description text,
  if_drug boolean,
  primary key (procedure_code)
);

"""

cursor.execute(query)

connection.commit()

In [16]:
connection.rollback()

query = """

copy procedures
from '/user/projects/project-3-ss3382/medicare_data_csv/procedures.csv' delimiter ',' NULL '' csv header;

"""

cursor.execute(query)

connection.commit()

In [17]:
rollback_before_flag = True
rollback_after_flag = True

query = """

select 
 count(distinct  d.drug_code)
from drugs d


"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

Unnamed: 0,count
0,44


## Building Services table

In [18]:
connection.rollback()

query = """

drop table if exists services_rendered;

"""

cursor.execute(query)

connection.commit()

In [19]:
connection.rollback()


query = """

create table services_rendered (
  provider_npi varchar(32),
  code varchar(32),
  drug_indicator varchar(32),
  total_beneficiaries integer,
  total_services float,
  total_amount_paid float,
  primary key (provider_npi,code)
);

"""

cursor.execute(query)

connection.commit()

In [20]:
connection.rollback()

query = """

copy services_rendered
from '/user/projects/project-3-ss3382/medicare_data_csv/services_rendered.csv' delimiter ',' NULL '' csv header;

"""

cursor.execute(query)

connection.commit()

In [21]:
rollback_before_flag = True
rollback_after_flag = True

query = """


select 
provider_npi,
sum(total_amount_paid) as total_medicare_payments,
sum(total_beneficiaries) as total_beneficiaries
from services_rendered 
 where provider_npi = '1043630619'
 group by 1
"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

Unnamed: 0,provider_npi,total_medicare_payments,total_beneficiaries
0,1043630619,2726.94,1799


In [22]:
rollback_before_flag = True
rollback_after_flag = True

query = """


select 
count(distinct code)
from services_rendered r
     inner join (
            select provider_npi
            from providers
            where provider_state = 'FL'
            ) k on k.provider_npi = r.provider_npi
        left join (
            select 
            provider_npi,
            sum(total_amount_paid) as total_medicare_payments,
            sum(total_beneficiaries) as total_beneficiaries
            from services_rendered 
             group by 1
            ) t on t.provider_npi = r.provider_npi
    where  r.drug_indicator = 'N'     




"""

my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

Unnamed: 0,count
0,251
