In [None]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, FloatType, IntegerType, DateType
from pyspark.sql import Window as W
import pyspark.sql.functions as F

# Initialize SparkSession
spark = SparkSession.builder \
    .appName("Library Management Analysis") \
    .getOrCreate()
spark

In [None]:
# Schema for 'branch' table
branch_schema = StructType([
    StructField("branch_id", StringType(), True),
    StructField("manager_id", StringType(), True),
    StructField("branch_address", StringType(), True),
    StructField("contact_no", StringType(), True)
])

# Schema for 'employees' table
employees_schema = StructType([
    StructField("emp_id", StringType(), True),
    StructField("emp_name", StringType(), True),
    StructField("position", StringType(), True),
    StructField("salary", FloatType(), True),
    StructField("branch_id", StringType(), True)
])

# Schema for 'books' table
books_schema = StructType([
    StructField("isbn", StringType(), True),
    StructField("book_title", StringType(), True),
    StructField("category", StringType(), True),
    StructField("rental_price", FloatType(), True),
    StructField("status", StringType(), True),
    StructField("author", StringType(), True),
    StructField("publisher", StringType(), True)
])

# Schema for 'issue_status' table
issue_status_schema = StructType([
    StructField("issued_id", StringType(), True),
    StructField("issued_member_id", StringType(), True),
    StructField("issued_book_name", StringType(), True),
    StructField("issued_date", DateType(), True),
    StructField("issued_book_isbn", StringType(), True),
    StructField("issued_emp_id", StringType(), True)
])

# Schema for 'return_status' table
return_status_schema = StructType([
    StructField("return_id", StringType(), True),
    StructField("issued_id", StringType(), True),
    StructField("return_book_name", StringType(), True),
    StructField("return_date", DateType(), True),
    StructField("return_book_isbn", StringType(), True)
])

# Schema for 'members' table
members_schema = StructType([
    StructField("member_id", StringType(), True),
    StructField("member_name", StringType(), True),
    StructField("member_address", StringType(), True),
    StructField("reg_date", DateType(), True)
])

# Load the CSV files into DataFrames
branch_df = spark.read.csv("/data/Library-System-Management/branch.csv", header=True, schema=branch_schema)
employees_df = spark.read.csv("/data/Library-System-Management/employees.csv", header=True, schema=employees_schema)
books_df = spark.read.csv("/data/Library-System-Management/books.csv", header=True, schema=books_schema)
issue_status_df = spark.read.csv("/data/Library-System-Management/issued_status.csv", header=True, schema=issue_status_schema)
return_status_df = spark.read.csv("/data/Library-System-Management/return_status.csv", header=True, schema=return_status_schema)
members_df = spark.read.csv("/data/Library-System-Management/members.csv", header=True, schema=members_schema)

# Register DataFrames as temporary views
branch_df.createOrReplaceTempView("branch")
employees_df.createOrReplaceTempView("employees")
books_df.createOrReplaceTempView("books")
issue_status_df.createOrReplaceTempView("issue_status")
return_status_df.createOrReplaceTempView("return_status")
members_df.createOrReplaceTempView("members")

views = ["branch","employees","books","issue_status","return_status","members"]
for view in views: print(f"{view} \n {spark.sql('select * from '+view).show(20,False)}")

In [None]:
# Library-System-Management:
# List Members Who Have Issued More Than One Book -- Objective: Use GROUP BY to find members who have issued more than one book
# Find Total Rental Income by Category
# List Members Who Registered in the Last 180 Days
# List Employees with Their Branch Manager's Name and their branch details
# Retrieve the List of Books Not Yet Returned