In [0]:
Dim_Doctor = spark.read.table("Prajwal_Mock.Dim_Doctor")
Fact_Table = spark.read.table("Prajwal_Mock.Fact_table")

In [0]:
Dim_Patient = spark.read.table("Prajwal_Mock.Dim_Patient")

In [0]:
Dim_Doctor.printSchema()
Fact_Table.printSchema()
Dim_Patient.printSchema()

## 	Doctor Workload: Analyze the number of admissions, tests, and procedures handled by each doctor.

In [0]:
from pyspark.sql.functions import col, count

doctor_workload = Fact_Table.groupBy("doctorid") \
    .agg(
        count(col("patientid")).alias("Number_of_Admissions"),
        count(col("testtype")).alias("Number_of_Tests"),
        count(col("proceduretype")).alias("Number_of_Procedures")
    )

doctor_workload = doctor_workload.join(Dim_Doctor, doctor_workload.doctorid == Dim_Doctor.doctorid, "inner") \
    .select(Dim_Doctor.name.alias("Doctor_Name"), "Number_of_Admissions", "Number_of_Tests", "Number_of_Procedures")

display(doctor_workload)

Databricks visualization. Run in Databricks to view.

## 	Department Utilization: Study the distribution of patients and procedures across different hospital departments.

In [0]:
from pyspark.sql.functions import col, countDistinct

department_utilization = Fact_Table.groupBy("department") \
    .agg(
        countDistinct(col("patientid")).alias("Number_of_Patients"),
        count(col("proceduretype")).alias("Number_of_Procedures")
    )

department_utilization = department_utilization.join(Dim_Doctor, department_utilization.department == Dim_Doctor.department, "inner") \
    .select(Dim_Doctor.department.alias("Department_Name"), "Number_of_Patients", "Number_of_Procedures")

display(department_utilization)

Databricks visualization. Run in Databricks to view.

## 	Procedure Success Rate: To evaluate the effectiveness of healthcare procedures performed by each doctor.

In [0]:
from pyspark.sql.functions import col, count, when

procedure_success_rate = Fact_Table.groupBy("doctorid") \
    .agg(
        count(when(col("outcome") == "success", True)).alias("Successful_Procedures"),
        count(col("proceduretype")).alias("Total_Procedures")
    ) \
    .withColumn("Procedure_Success_Rate", col("Successful_Procedures") / col("Total_Procedures"))

procedure_success_rate = procedure_success_rate.join(Dim_Doctor, procedure_success_rate.doctorid == Dim_Doctor.doctorid, "inner") \
    .select(Dim_Doctor.name.alias("Doctor_Name"), "Successful_Procedures", "Total_Procedures", "Procedure_Success_Rate")

display(procedure_success_rate)

Databricks visualization. Run in Databricks to view.

## 	Admission Frequency: Determine how frequently patients are admitted for healthcare services.

In [0]:
from pyspark.sql.functions import col, count

admission_frequency = Fact_Table.groupBy("patientid") \
    .agg(count(col("admission_date")).alias("Admission_Frequency"))

admission_frequency = admission_frequency.join(Dim_Patient, admission_frequency.patientid == Dim_Patient.patientid, "inner") \
    .select(Dim_Patient.name.alias("Patient_Name"), "Admission_Frequency")

display(admission_frequency)    

Databricks visualization. Run in Databricks to view.

## 	Missed Payments: Identify and count instances of missed or delayed payments by patients.

In [0]:
from pyspark.sql.functions import col, count, when

missed_payments = Fact_Table.groupBy("patientid") \
    .agg(count(when(col("paymentstatus") == "missed", True)).alias("Missed_Payments"))

missed_payments = missed_payments.join(Dim_Patient, missed_payments.patientid == Dim_Patient.patientid, "inner") \
    .select(Dim_Patient.name.alias("Patient_Name"), "Missed_Payments")

display(missed_payments)

## 	Outstanding Amounts: Calculate unpaid or pending amounts per patient.

In [0]:
from pyspark.sql.functions import col, sum, when

outstanding_amounts = Fact_Table.groupBy("patientid") \
    .agg(sum(when(col("paymentstatus") == "pending", col("cost")).otherwise(0)).alias("Outstanding_Amounts"))

outstanding_amounts = outstanding_amounts.join(Dim_Patient, outstanding_amounts.patientid == Dim_Patient.patientid, "inner") \
    .select(Dim_Patient.name.alias("Patient_Name"), "Outstanding_Amounts")

display(outstanding_amounts)