In [2]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("OnlineBankingAnalysis").getOrCreate()
# Loading loan data
loan_df = spark.read.csv("/content/loan.csv", header=True, inferSchema=True)

# Loading credit data
credit_df = spark.read.csv("/content/credit card.csv", header=True, inferSchema=True)

# Loading transaction data
txn_df = spark.read.csv("/content/txn.csv", header=True, inferSchema=True)

# Display first few rows of each dataset (optional)
loan_df.show(5)
credit_df.show(5)
txn_df.show(5)


+-----------+---+------+------------+--------------+-----------+------+-----------+-------------+-------------+-----------+-------+------------+----------------+------------------+
|Customer_ID|Age|Gender|  Occupation|Marital Status|Family Size|Income|Expenditure|Use Frequency|Loan Category|Loan Amount|Overdue| Debt Record| Returned Cheque| Dishonour of Bill|
+-----------+---+------+------------+--------------+-----------+------+-----------+-------------+-------------+-----------+-------+------------+----------------+------------------+
|    IB14001| 30|  MALE|BANK MANAGER|        SINGLE|          4| 50000|      22199|            6|      HOUSING| 10,00,000 |      5|      42,898|               6|                 9|
|    IB14008| 44|  MALE|   PROFESSOR|       MARRIED|          6| 51000|      19999|            4|     SHOPPING|     50,000|      3|      33,999|               1|                 5|
|    IB14012| 30|FEMALE|     DENTIST|        SINGLE|          3| 58450|      27675|            

In [3]:
# Register DataFrames as temporary views
loan_df.createOrReplaceTempView("loans")
credit_df.createOrReplaceTempView("credit_cards")
txn_df.createOrReplaceTempView("transactions")


**Analyzing Loan Data (loandata.csv):**

In [8]:
# 1. Count the number of loans in each category
spark.sql("SELECT `Loan Category`, COUNT(*) AS num_loans FROM loans GROUP BY `Loan Category`").show()




+------------------+---------+
|     Loan Category|num_loans|
+------------------+---------+
|           HOUSING|       67|
|        TRAVELLING|       53|
|       BOOK STORES|        7|
|       AGRICULTURE|       12|
|         GOLD LOAN|       77|
|  EDUCATIONAL LOAN|       20|
|        AUTOMOBILE|       60|
|          BUSINESS|       24|
|COMPUTER SOFTWARES|       35|
|           DINNING|       14|
|          SHOPPING|       35|
|       RESTAURANTS|       41|
|       ELECTRONICS|       14|
|          BUILDING|        7|
|        RESTAURANT|       20|
|   HOME APPLIANCES|       14|
+------------------+---------+



In [10]:
# 2. Number of People Who Have Taken More Than 1 Lakh Loan
spark.sql("SELECT COUNT(*) FROM loans WHERE `Loan Amount` > 100000").show()

+--------+
|count(1)|
+--------+
|       0|
+--------+



In [11]:
# 3. Number of People with Income Greater Than 60,000
spark.sql("SELECT COUNT(*) FROM loans WHERE `Income` > 60000").show()


+--------+
|count(1)|
+--------+
|     198|
+--------+



In [13]:
# 4. Number of People with 2 or More Returned Cheques and Income Less Than 50,000
spark.sql("SELECT COUNT(*) FROM loans WHERE ` Returned Cheque` >= 2 AND `Income` < 50000").show()


+--------+
|count(1)|
+--------+
|     137|
+--------+



In [15]:
# 5. Number of People with 2 or More Returned Cheques and Are Single
spark.sql("SELECT COUNT(*) FROM loans WHERE ` Returned Cheque` >= 2 AND `Marital Status` = 'Single'").show()


+--------+
|count(1)|
+--------+
|       0|
+--------+



In [16]:
# 6. Number of People with Expenditure Over 50,000 per Month
spark.sql("SELECT COUNT(*) FROM loans WHERE `Expenditure` > 50000").show()


+--------+
|count(1)|
+--------+
|       6|
+--------+



In [21]:
# 7. Number of Members Who Are Eligible for Credit Card
spark.sql("SELECT COUNT(*) FROM loans WHERE `Income` > 50000").show()



+--------+
|count(1)|
+--------+
|     284|
+--------+



**Analyzing Credit Data (credit.csv):**



In [23]:
# 8. Number of Credit Card Users in Spain
spark.sql("SELECT COUNT(*) FROM credit_cards WHERE `Geography` = 'Spain'").show()



+--------+
|count(1)|
+--------+
|    2477|
+--------+



In [25]:
# 9. Number of Members Who Are Eligible and Active in the Bank:
spark.sql("SELECT COUNT(*) FROM credit_cards WHERE `IsActiveMember` = 1 AND `Exited` = 0").show()


+--------+
|count(1)|
+--------+
|    4416|
+--------+



**Analyzing Transaction Data (txn.csv):**

In [28]:
# 10. Maximum Withdrawal Amount:
spark.sql("SELECT MAX(` WITHDRAWAL AMT `) FROM transactions").show()

+---------------------+
|max( WITHDRAWAL AMT )|
+---------------------+
|        4.594475464E8|
+---------------------+



In [29]:
# 11 Minimum Withdrawal Amount of an Account
spark.sql("SELECT MIN(` WITHDRAWAL AMT `) FROM transactions").show()

+---------------------+
|min( WITHDRAWAL AMT )|
+---------------------+
|                 0.01|
+---------------------+



In [30]:
# 12. Maximum Deposit Amount of an Account
spark.sql("SELECT MAX(` DEPOSIT AMT `) FROM transactions").show()


+------------------+
|max( DEPOSIT AMT )|
+------------------+
|           5.448E8|
+------------------+



In [31]:
# 13. Minimum Deposit Amount of an Account
spark.sql("SELECT MIN(` DEPOSIT AMT `) FROM transactions").show()


+------------------+
|min( DEPOSIT AMT )|
+------------------+
|              0.01|
+------------------+



In [33]:
# 14. Sum of Balance in Every Bank Account
spark.sql("SELECT `Account No`, SUM(`BALANCE AMT`) FROM transactions GROUP BY `Account No`").show()


+-------------+--------------------+
|   Account No|    sum(BALANCE AMT)|
+-------------+--------------------+
|409000438611'|-2.49486577068339...|
|     1196711'|-1.60476498101275E13|
|     1196428'| -8.1418498130721E13|
|409000493210'|-3.27584952132095...|
|409000611074'|       1.615533622E9|
|409000425051'|-3.77211841164998...|
|409000405747'|-2.43108047067000...|
|409000362497'| -5.2860004792808E13|
|409000493201'|1.0420831829499985E9|
|409000438620'|-7.12291867951358...|
+-------------+--------------------+



In [34]:
# 15. Number of Transactions on Each Date
spark.sql("SELECT `VALUE DATE`, COUNT(*) AS num_transactions FROM transactions GROUP BY `VALUE DATE`").show()

+----------+----------------+
|VALUE DATE|num_transactions|
+----------+----------------+
| 23-Dec-16|             143|
|  7-Feb-19|              98|
| 21-Jul-15|              80|
|  9-Sep-15|              91|
| 17-Jan-15|              16|
| 18-Nov-17|              53|
| 21-Feb-18|              77|
| 20-Mar-18|              71|
| 19-Apr-18|              71|
| 21-Jun-16|              97|
| 17-Oct-17|             101|
|  3-Jan-18|              70|
|  8-Jun-18|             223|
| 15-Dec-18|              62|
|  8-Aug-16|              97|
| 17-Dec-16|              74|
|  3-Sep-15|              83|
| 21-Jan-16|              76|
|  4-May-18|              92|
|  7-Sep-17|              94|
+----------+----------------+
only showing top 20 rows



In [39]:
# 16. Customers with Withdrawal Amount More Than 1 Lakh
spark.sql("SELECT DISTINCT `Account No` FROM transactions WHERE ` WITHDRAWAL AMT ` > 100000").show()



+-------------+
|   Account No|
+-------------+
|409000438611'|
|     1196711'|
|     1196428'|
|409000493210'|
|409000611074'|
|409000425051'|
|409000405747'|
|409000362497'|
|409000493201'|
|409000438620'|
+-------------+

