#### Problem statement

1. Read the text file (data.csv) 
2. Create the case Class to Represent the entity
3. Add all the records in to List
4. Perform filter operations based on salary and departments
5. Perform map operations to produce formatted report
6. Perform reduce operation to find total Salary, average Salary, number of employees department wise

NOTE: This code structure prioritizes clarity and readability over strict Scala conventions, with the comment `"We proceed with the logic..."` helping readers follow the program flow.

### 

Function(`getFileContent`) to read file content and return `ReadData`, case class to hold the results

In [1]:
case class ReadData(status: Boolean, message: String, data: List[String])

import scala.io.Source
import java.io.FileNotFoundException

def getFileContent: (String) => ReadData = (fp => {
    try{
        val data = scala.io.Source.fromFile(fp).getLines().toList
        
        if(!data.isEmpty) ReadData(status=true, message="Success", data=data) 
        else ReadData(status=true, message="No data found", data=data)
    }catch {
        case _: FileNotFoundException => ReadData(status=false, message=s"FileNotFound: $fp.", data=List.empty)
        case e: Exception => ReadData(status=false, message=s"Exception: $e.", data=List.empty)
    }
})

defined [32mclass[39m [36mReadData[39m
[32mimport [39m[36mscala.io.Source[39m
[32mimport [39m[36mjava.io.FileNotFoundException[39m
defined [32mfunction[39m [36mgetFileContent[39m

In [2]:
val fp = "data.csv"

val fileContent: ReadData = getFileContent(fp)
if(fileContent.data.isEmpty) println(fileContent.message)
else println("Hooray!! There are contents in the file") // We proceed with the logic calling corresponding functions to fetch stats


Hooray!! There are contents in the file


[36mfp[39m: [32mString[39m = [32m"data.csv"[39m
[36mfileContent[39m: [32mReadData[39m = [33mReadData[39m(
  status = [32mtrue[39m,
  message = [32m"Success"[39m,
  data = [33mList[39m(
    [32m"sno,name,city,salary,department"[39m,
    [32m"1,John Doe,New York,60000,Marketing"[39m,
    [32m"2,Jane Smith,Los Angeles,55000,Finance"[39m,
    [32m"3,Michael Johnson,Chicago,62000,HR"[39m,
    [32m"4,Sarah Lee,San Francisco,58000,Engineering"[39m,
    [32m"5,David Brown,Houston,63000,Operations"[39m,
    [32m"6,Emily Taylor,Miami,54000,Sales"[39m,
    [32m"7,James Wilson,Dallas,59000,Marketing"[39m,
    [32m"8,Emma Anderson,Seattle,61000,Finance"[39m,
    [32m"9,Matthew Martinez,Boston,57000,HR"[39m,
    [32m"10,Olivia Thomas,Atlanta,64000,Engineering"[39m,
    [32m"11,Liam Garcia,Denver,53000,Operations"[39m,
    [32m"12,Sophia Hernandez,Phoenix,60000,Sales"[39m,
    [32m"13,Alexander Lopez,Philadelphia,56000,Marketing"[39m,
    [32m"14,Mia Gon

case class `EmployeeData` represents the csv data as an entity. Format the csv data in terms of `EmployeeData` using `parseToEmployeeData` function

In [3]:
case class EmployeeData(id: Int, name: String, city: String, salary: Int, department: String)

def parseToEmployeeData(str: String): EmployeeData = {
    val elements = str.split(",")
    EmployeeData(elements(0).toInt, elements(1), elements(2), elements(3).toInt, elements(4))
}

val fc = fileContent.data.tail  // remove headers (first element of list: fileContent)
val formattedData: List[EmployeeData] = fc.map(parseToEmployeeData)

defined [32mclass[39m [36mEmployeeData[39m
defined [32mfunction[39m [36mparseToEmployeeData[39m
[36mfc[39m: [32mList[39m[[32mString[39m] = [33mList[39m(
  [32m"1,John Doe,New York,60000,Marketing"[39m,
  [32m"2,Jane Smith,Los Angeles,55000,Finance"[39m,
  [32m"3,Michael Johnson,Chicago,62000,HR"[39m,
  [32m"4,Sarah Lee,San Francisco,58000,Engineering"[39m,
  [32m"5,David Brown,Houston,63000,Operations"[39m,
  [32m"6,Emily Taylor,Miami,54000,Sales"[39m,
  [32m"7,James Wilson,Dallas,59000,Marketing"[39m,
  [32m"8,Emma Anderson,Seattle,61000,Finance"[39m,
  [32m"9,Matthew Martinez,Boston,57000,HR"[39m,
  [32m"10,Olivia Thomas,Atlanta,64000,Engineering"[39m,
  [32m"11,Liam Garcia,Denver,53000,Operations"[39m,
  [32m"12,Sophia Hernandez,Phoenix,60000,Sales"[39m,
  [32m"13,Alexander Lopez,Philadelphia,56000,Marketing"[39m,
  [32m"14,Mia Gonzalez,Detroit,62000,Finance"[39m,
  [32m"15,Ethan Perez,Minneapolis,55000,HR"[39m,
  [32m"16,Isabella Carte

Function(`filterEmployeeData`) to filter employee data based on a parameter and value

In [4]:
def filterEmployeeData(param: String, value: String): List[EmployeeData] = param.toLowerCase match {
    case "salary" => formattedData.filter{ele => ele.salary == value.toInt}
    case "department" => formattedData.filter{ele => ele.department.toLowerCase == value.toLowerCase}
    case _ => {
        println("Filter option not supported")
        List.empty[EmployeeData]
    }
}

val hrList = filterEmployeeData("Department", "HR")
val salaryList = filterEmployeeData("salary", "60000")

println(s"Number of employees as HR: ${hrList.size}") // 8
println(s"Number of employees with salary 60000: ${salaryList.size}") // 5

Number of employees as HR: 8
Number of employees with salary 60000: 5


defined [32mfunction[39m [36mfilterEmployeeData[39m
[36mhrList[39m: [32mList[39m[[32mEmployeeData[39m] = [33mList[39m(
  [33mEmployeeData[39m(
    id = [32m3[39m,
    name = [32m"Michael Johnson"[39m,
    city = [32m"Chicago"[39m,
    salary = [32m62000[39m,
    department = [32m"HR"[39m
  ),
  [33mEmployeeData[39m(
    id = [32m9[39m,
    name = [32m"Matthew Martinez"[39m,
    city = [32m"Boston"[39m,
    salary = [32m57000[39m,
    department = [32m"HR"[39m
  ),
  [33mEmployeeData[39m(
    id = [32m15[39m,
    name = [32m"Ethan Perez"[39m,
    city = [32m"Minneapolis"[39m,
    salary = [32m55000[39m,
    department = [32m"HR"[39m
  ),
  [33mEmployeeData[39m(
    id = [32m21[39m,
    name = [32m"Lucas Rivera"[39m,
    city = [32m"Memphis"[39m,
    salary = [32m59000[39m,
    department = [32m"HR"[39m
  ),
  [33mEmployeeData[39m(
    id = [32m27[39m,
    name = [32m"Madison Flores"[39m,
    city = [32m"Charlotte"[39m,


Function(`generateFormattedReport`) to generate a formatted report for an employee

In [5]:
// Assuming the formatted report is of type: $Name[empId: #id] from $city working in $department department earns $salary
def generateFormattedReport(data: EmployeeData): String = {
    s"${data.name}[empId: ${data.id}] from ${data.city} woring in ${data.department} department earns ${data.salary}"
} 

println(formattedData.map(generateFormattedReport))

List(John Doe[empId: 1] from New York woring in Marketing department earns 60000, Jane Smith[empId: 2] from Los Angeles woring in Finance department earns 55000, Michael Johnson[empId: 3] from Chicago woring in HR department earns 62000, Sarah Lee[empId: 4] from San Francisco woring in Engineering department earns 58000, David Brown[empId: 5] from Houston woring in Operations department earns 63000, Emily Taylor[empId: 6] from Miami woring in Sales department earns 54000, James Wilson[empId: 7] from Dallas woring in Marketing department earns 59000, Emma Anderson[empId: 8] from Seattle woring in Finance department earns 61000, Matthew Martinez[empId: 9] from Boston woring in HR department earns 57000, Olivia Thomas[empId: 10] from Atlanta woring in Engineering department earns 64000, Liam Garcia[empId: 11] from Denver woring in Operations department earns 53000, Sophia Hernandez[empId: 12] from Phoenix woring in Sales department earns 60000, Alexander Lopez[empId: 13] from Philadelphia

defined [32mfunction[39m [36mgenerateFormattedReport[39m

Function(`calculateDepartmentStats`) to calculate employee statistics by department in terms of entity `DepartmentStats`

In [6]:
case class DepartmentStats(department: String, totalSalary: Int, averageSalary: Double, employeesCount: Int)

def calculateDepartmentStats(department: String, employeeData: List[EmployeeData]): DepartmentStats = {
    val employeeCount = employeeData.size
    val totalSalary: Int = employeeData.map(_.salary).reduce(_+_)       // (reduce(_+_) == sum)
    val averageSalary = totalSalary / employeeCount

    DepartmentStats(department, totalSalary, averageSalary, employeeCount)
}

val departmentGrouped = formattedData.groupBy(_.department) // Map[String, List[EmployeeData]]
val departmentStats = departmentGrouped.map{case (k,v) => calculateDepartmentStats(k, v)}

println(departmentStats.map(ele => ele.department -> ele).toMap)

// Department stats for finance
println(departmentStats.filter(_.department.toLowerCase == "finance"))

HashMap(Engineering -> DepartmentStats(Engineering,494000,61750.0,8), Operations -> DepartmentStats(Operations,444000,55500.0,8), Sales -> DepartmentStats(Sales,483000,60375.0,8), Marketing -> DepartmentStats(Marketing,518000,57555.0,9), HR -> DepartmentStats(HR,462000,57750.0,8), Finance -> DepartmentStats(Finance,542000,60222.0,9))
List(DepartmentStats(Finance,542000,60222.0,9))


defined [32mclass[39m [36mDepartmentStats[39m
defined [32mfunction[39m [36mcalculateDepartmentStats[39m
[36mdepartmentGrouped[39m: [32mMap[39m[[32mString[39m, [32mList[39m[[32mEmployeeData[39m]] = [33mHashMap[39m(
  [32m"Engineering"[39m -> [33mList[39m(
    [33mEmployeeData[39m(
      id = [32m4[39m,
      name = [32m"Sarah Lee"[39m,
      city = [32m"San Francisco"[39m,
      salary = [32m58000[39m,
      department = [32m"Engineering"[39m
    ),
    [33mEmployeeData[39m(
      id = [32m10[39m,
      name = [32m"Olivia Thomas"[39m,
      city = [32m"Atlanta"[39m,
      salary = [32m64000[39m,
      department = [32m"Engineering"[39m
    ),
    [33mEmployeeData[39m(
      id = [32m16[39m,
      name = [32m"Isabella Carter"[39m,
      city = [32m"Portland"[39m,
      salary = [32m63000[39m,
      department = [32m"Engineering"[39m
    ),
    [33mEmployeeData[39m(
      id = [32m22[39m,
      name = [32m"Evelyn Scott"[39m