# KPI

In [0]:
Dim_Doctor = spark.read.table("Prajwal_Telecom.Dim_Customer")
Dim_Device = spark.read.table("Prajwal_Telecom.Dim_Device")
Fact_Table = spark.read.table("Prajwal_Telecom.Fact")
Dim_Tower = spark.read.table("Prajwal_Telecom.Dim_Tower")

In [0]:
Dim_Doctor.printSchema()
Fact_Table.printSchema()
Dim_Device.printSchema()
Dim_Tower.printSchema()

## •	Average call duration per customer

In [0]:
from pyspark.sql.functions import col, avg

average_call_duration = Fact_Table.groupBy("customer_id") \
    .agg(avg(col("call_duration")).alias("Average_Call_Duration"))

average_call_duration = average_call_duration.join(Dim_Doctor, average_call_duration.customer_id == Dim_Doctor.customer_id, "inner") \
    .select(Dim_Doctor.customer_name.alias("Customer_Name"), "Average_Call_Duration")

display(average_call_duration)

## •	Total number of roaming calls

In [0]:
from pyspark.sql.functions import col, count

roaming_calls = Fact_Table.filter(col("roaming_flag") == "Y") \
    .groupBy("customer_id") \
    .agg(count("call_id").alias("Total_Roaming_Calls"))

roaming_calls = roaming_calls.join(Dim_Doctor, roaming_calls.customer_id == Dim_Doctor.customer_id, "inner") \
    .select(Dim_Doctor.customer_name.alias("Customer_Name"), "Total_Roaming_Calls")

display(roaming_calls)

## •	Total number of calls shorter than 10 seconds

In [0]:
from pyspark.sql.functions import col, count

short_calls = Fact_Table.filter(col("call_duration") < 10) \
    .groupBy("customer_id") \
    .agg(count("call_id").alias("Total_Short_Calls"))

short_calls = short_calls.join(Dim_Doctor, short_calls.customer_id == Dim_Doctor.customer_id, "inner") \
    .select(Dim_Doctor.customer_name.alias("Customer_Name"), "Total_Short_Calls")

display(short_calls)

## •	Flag high-value customers based on usage, more than 1000 mins

In [0]:
from pyspark.sql.functions import col, sum, expr

high_value_customers = Fact_Table.withColumn("call_duration", col("call_duration") * 60) \
    .groupBy("customer_id") \
    .agg(sum("call_duration").alias("Total_Call_Duration")) \
    .filter(col("Total_Call_Duration") > 1000)

high_value_customers = high_value_customers.join(Dim_Doctor, high_value_customers.customer_id == Dim_Doctor.customer_id, "inner") \
    .select(Dim_Doctor.customer_name.alias("Customer_Name"), "Total_Call_Duration")

display(high_value_customers)

## •	Total number of missed or incomplete calls

In [0]:
from pyspark.sql.functions import col

# Filter for missed or incomplete calls
missed_incomplete_calls = Fact_Table.filter((col("call_duration") <=1 ))

# Count the total number of missed or incomplete calls
total_missed_incomplete_calls = missed_incomplete_calls.count()

print(str(total_missed_incomplete_calls) + " calls were missed or incomplete")

## •	Devices older than 2020, mark as outdated device

In [0]:
from pyspark.sql.functions import col, lit

# Mark devices older than 2020 as outdated
outdated_devices = Dim_Device.withColumn("is_outdated", 
                                           (col("Purchase_Date") < lit("2020-01-01")).cast("boolean")).select("device_id", "device_model", "is_outdated").filter(col("is_outdated") == True)

display(outdated_devices)

## •	Categorized by age brackets

In [0]:
from pyspark.sql.functions import col, when

# Define age brackets
age_brackets = Dim_Device.withColumn(
    "age_bracket",
    when(col("Purchase_Date") >= lit("2023-01-01"), "0-2 years")
    .when((col("Purchase_Date") >= lit("2020-01-01")) & (col("Purchase_Date") < lit("2023-01-01")), "3-5 years")
    .otherwise("6+ years")
)

display(age_brackets.select("device_model", "Purchase_Date", "age_bracket"))