In [52]:
import os
import glob
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import random
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta


In [77]:
import pprint
import pyspark
import pyspark.sql.functions as F
from pyspark.sql.functions import col
from pyspark.sql.types import StringType, IntegerType, FloatType, DateType

import utils.bronze_layer
import utils.silver_layer
import utils.gold_layer

Set up Spark Session

In [101]:
from pyspark.sql import SparkSession

spark = SparkSession.builder \
      .appName("Loan Default EDA") \
      .master("local[*]") \
      .config("spark.driver.host", "127.0.0.1") \
      .getOrCreate()

spark.sparkContext.setLogLevel("ERROR")
print("Spark session created successfully!")
print(f"Spark version: {spark.version}")



Spark session created successfully!
Spark version: 4.0.1


Loading dataset

In [55]:
# Load Clickstream
df_clickstream=spark.read.option("header","true").option("inferSchema","true").csv("data/feature_clickstream.csv")

# Load attributes
df_attributes = spark.read.option("header","true").option("inferSchema","true").csv("data/features_attributes.csv")

# Load financials
df_financials = spark.read.option("header","true").option("inferSchema","true").csv("data/features_financials.csv")

# Load loan data
df_loans= spark.read.option("header","true").option("inferSchema","true").csv("data/lms_loan_daily.csv")

print("data loaded")

data loaded


General feature exploration

In [56]:
print("-- Clickstream data--")

print(df_clickstream.printSchema())
print(len(df_clickstream.columns))
print(df_clickstream.columns)

-- Clickstream data--
root
 |-- fe_1: integer (nullable = true)
 |-- fe_2: integer (nullable = true)
 |-- fe_3: integer (nullable = true)
 |-- fe_4: integer (nullable = true)
 |-- fe_5: integer (nullable = true)
 |-- fe_6: integer (nullable = true)
 |-- fe_7: integer (nullable = true)
 |-- fe_8: integer (nullable = true)
 |-- fe_9: integer (nullable = true)
 |-- fe_10: integer (nullable = true)
 |-- fe_11: integer (nullable = true)
 |-- fe_12: integer (nullable = true)
 |-- fe_13: integer (nullable = true)
 |-- fe_14: integer (nullable = true)
 |-- fe_15: integer (nullable = true)
 |-- fe_16: integer (nullable = true)
 |-- fe_17: integer (nullable = true)
 |-- fe_18: integer (nullable = true)
 |-- fe_19: integer (nullable = true)
 |-- fe_20: integer (nullable = true)
 |-- Customer_ID: string (nullable = true)
 |-- snapshot_date: date (nullable = true)

None
22
['fe_1', 'fe_2', 'fe_3', 'fe_4', 'fe_5', 'fe_6', 'fe_7', 'fe_8', 'fe_9', 'fe_10', 'fe_11', 'fe_12', 'fe_13', 'fe_14', 'fe_15', 

In [57]:
print("-- Attributes data--")

print(df_attributes.printSchema())
print(len(df_attributes.columns))
print(df_attributes.columns)

-- Attributes data--
root
 |-- Customer_ID: string (nullable = true)
 |-- Name: string (nullable = true)
 |-- Age: string (nullable = true)
 |-- SSN: string (nullable = true)
 |-- Occupation: string (nullable = true)
 |-- snapshot_date: date (nullable = true)

None
6
['Customer_ID', 'Name', 'Age', 'SSN', 'Occupation', 'snapshot_date']


In [58]:
print("-- Finance Data--")

print(df_financials.printSchema())
print(len(df_financials.columns))
print(df_financials.columns)

-- Finance Data--
root
 |-- Customer_ID: string (nullable = true)
 |-- Annual_Income: string (nullable = true)
 |-- Monthly_Inhand_Salary: double (nullable = true)
 |-- Num_Bank_Accounts: integer (nullable = true)
 |-- Num_Credit_Card: integer (nullable = true)
 |-- Interest_Rate: integer (nullable = true)
 |-- Num_of_Loan: string (nullable = true)
 |-- Type_of_Loan: string (nullable = true)
 |-- Delay_from_due_date: integer (nullable = true)
 |-- Num_of_Delayed_Payment: string (nullable = true)
 |-- Changed_Credit_Limit: string (nullable = true)
 |-- Num_Credit_Inquiries: double (nullable = true)
 |-- Credit_Mix: string (nullable = true)
 |-- Outstanding_Debt: string (nullable = true)
 |-- Credit_Utilization_Ratio: double (nullable = true)
 |-- Credit_History_Age: string (nullable = true)
 |-- Payment_of_Min_Amount: string (nullable = true)
 |-- Total_EMI_per_month: double (nullable = true)
 |-- Amount_invested_monthly: string (nullable = true)
 |-- Payment_Behaviour: string (nullable

In [59]:
print("-- Loan Data--")

print(df_loans.printSchema())
print(len(df_loans.columns))
print(df_loans.columns)

-- Loan Data--
root
 |-- loan_id: string (nullable = true)
 |-- Customer_ID: string (nullable = true)
 |-- loan_start_date: date (nullable = true)
 |-- tenure: integer (nullable = true)
 |-- installment_num: integer (nullable = true)
 |-- loan_amt: integer (nullable = true)
 |-- due_amt: double (nullable = true)
 |-- paid_amt: double (nullable = true)
 |-- overdue_amt: double (nullable = true)
 |-- balance: double (nullable = true)
 |-- snapshot_date: date (nullable = true)

None
11
['loan_id', 'Customer_ID', 'loan_start_date', 'tenure', 'installment_num', 'loan_amt', 'due_amt', 'paid_amt', 'overdue_amt', 'balance', 'snapshot_date']


Setup Config


In [60]:
snapshot_date_str='2023-01-01'

start_date_str='2023-01-01'
end_date_str='2025-01-01'

In [61]:
# generate list of dates to process
def generate_first_of_month_dates(start_date_str, end_date_str):
    # Convert the date strings to datetime objects
    start_date = datetime.strptime(start_date_str, "%Y-%m-%d")
    end_date = datetime.strptime(end_date_str, "%Y-%m-%d")
    
    # List to store the first of month dates
    first_of_month_dates = []

    # Start from the first of the month of the start_date
    current_date = datetime(start_date.year, start_date.month, 1)

    while current_date <= end_date:
        # Append the date in yyyy-mm-dd format
        first_of_month_dates.append(current_date.strftime("%Y-%m-%d"))
        
        # Move to the first of the next month
        if current_date.month == 12:
            current_date = datetime(current_date.year + 1, 1, 1)
        else:
            current_date = datetime(current_date.year, current_date.month + 1, 1)

    return first_of_month_dates

dates_str_lst = generate_first_of_month_dates(start_date_str, end_date_str)
print(dates_str_lst)

['2023-01-01', '2023-02-01', '2023-03-01', '2023-04-01', '2023-05-01', '2023-06-01', '2023-07-01', '2023-08-01', '2023-09-01', '2023-10-01', '2023-11-01', '2023-12-01', '2024-01-01', '2024-02-01', '2024-03-01', '2024-04-01', '2024-05-01', '2024-06-01', '2024-07-01', '2024-08-01', '2024-09-01', '2024-10-01', '2024-11-01', '2024-12-01', '2025-01-01']


Build Bronze Table

In [62]:
bronze_lms_directory='datamart/bronze/lms/'

if not os.path.exists(bronze_lms_directory):
    os.makedirs(bronze_lms_directory)

In [63]:
for date_str in dates_str_lst:
    utils.bronze_layer.process_bronze_clickstream(date_str,bronze_lms_directory,spark)
    utils.bronze_layer.process_bronze_attributes(date_str,bronze_lms_directory,spark)
    utils.bronze_layer.process_bronze_financials(date_str,bronze_lms_directory,spark)
    utils.bronze_layer.process_bronze_loan(date_str,bronze_lms_directory,spark)

2023-01-01row count: 8974
saved to: datamart/bronze/lms/bronze_clickstream_daily_2023_01_01.csv
2023-01-01row count: 530
saved to: datamart/bronze/lms/bronze_attributes_daily_2023_01_01.csv
2023-01-01row count: 530
saved to: datamart/bronze/lms/bronze_financials_daily_2023_01_01.csv
2023-01-01row count: 530
saved to: datamart/bronze/lms/bronze_loan_daily_2023_01_01.csv
2023-02-01row count: 8974
saved to: datamart/bronze/lms/bronze_clickstream_daily_2023_02_01.csv
2023-02-01row count: 501
saved to: datamart/bronze/lms/bronze_attributes_daily_2023_02_01.csv
2023-02-01row count: 501
saved to: datamart/bronze/lms/bronze_financials_daily_2023_02_01.csv
2023-02-01row count: 1031
saved to: datamart/bronze/lms/bronze_loan_daily_2023_02_01.csv
2023-03-01row count: 8974
saved to: datamart/bronze/lms/bronze_clickstream_daily_2023_03_01.csv
2023-03-01row count: 506
saved to: datamart/bronze/lms/bronze_attributes_daily_2023_03_01.csv
2023-03-01row count: 506
saved to: datamart/bronze/lms/bronze_fin

Process Silver layer

Attributes Table EDA

In [64]:
from pyspark.sql.functions import col, sum as spark_sum, when

In [65]:
#check missing value
missing_counts = []

for c in df_attributes.columns:
    # Get the data type of the column
    col_type = dict(df_attributes.dtypes)[c]

    if col_type in ['date', 'timestamp']:
        # For date columns, only check for nulls
        missing_count = df_attributes.filter(col(c).isNull()).count()
    else:
        # For other columns, check nulls, empty strings, and underscores
        missing_count = df_attributes.filter(
            col(c).isNull() |
            (col(c) == "") |
            col(c).rlike("^_+$")
        ).count()

    missing_counts.append((c, missing_count))
    print(f"{c:20s}: {missing_count} missing")

total_rows = df_attributes.count()
print(f"\nTotal rows: {total_rows}")

Customer_ID         : 0 missing
Name                : 0 missing
Age                 : 0 missing
SSN                 : 0 missing
Occupation          : 880 missing
snapshot_date       : 0 missing

Total rows: 12500


In [66]:
#check nonnumeric age
non_numeric_count = df_attributes.filter(~col('Age').rlike("[^0-9]")).count()
print('non numeric age:',non_numeric_count)

#Show the non-numeric values
non_numeric = df_attributes.filter(~col('Age').rlike("^[0-9]+$"))
print("Non-numeric Age values:")
non_numeric.select("Age").distinct().show()


non numeric age: 11759
Non-numeric Age values:
+-----+
|  Age|
+-----+
|  31_|
|  22_|
|  37_|
|  54_|
|6408_|
|2329_|
|  41_|
|  38_|
|  19_|
|3843_|
|  52_|
|  25_|
|  46_|
|3834_|
|  47_|
|  24_|
|  30_|
|  51_|
|  50_|
|  16_|
+-----+
only showing top 20 rows


In [67]:
# Show unique occupations
df_attributes.select("Occupation").distinct().show(50, False)

# Count of unique occupations
print(f"Number of unique occupations: {df_attributes.select('Occupation').distinct().count()}")

# Value counts for occupations
df_attributes.groupBy("Occupation").count().orderBy("count", ascending=False).show(20,False)

+-------------+
|Occupation   |
+-------------+
|Scientist    |
|Media_Manager|
|Musician     |
|Lawyer       |
|Teacher      |
|Developer    |
|Writer       |
|Architect    |
|Mechanic     |
|Entrepreneur |
|Journalist   |
|Doctor       |
|Engineer     |
|Accountant   |
|Manager      |
|_______      |
+-------------+

Number of unique occupations: 16
+-------------+-----+
|Occupation   |count|
+-------------+-----+
|_______      |880  |
|Lawyer       |828  |
|Architect    |795  |
|Engineer     |793  |
|Accountant   |791  |
|Scientist    |789  |
|Teacher      |782  |
|Media_Manager|780  |
|Developer    |780  |
|Mechanic     |780  |
|Entrepreneur |776  |
|Journalist   |761  |
|Doctor       |760  |
|Musician     |741  |
|Manager      |736  |
|Writer       |728  |
+-------------+-----+



Check clickstream data

In [68]:
#schema
print(f"Clickstream Rows: {df_clickstream.count():,}")
print(f"Clickstream Columns: {len(df_clickstream.columns)}")
df_clickstream.printSchema()

Clickstream Rows: 215,376
Clickstream Columns: 22
root
 |-- fe_1: integer (nullable = true)
 |-- fe_2: integer (nullable = true)
 |-- fe_3: integer (nullable = true)
 |-- fe_4: integer (nullable = true)
 |-- fe_5: integer (nullable = true)
 |-- fe_6: integer (nullable = true)
 |-- fe_7: integer (nullable = true)
 |-- fe_8: integer (nullable = true)
 |-- fe_9: integer (nullable = true)
 |-- fe_10: integer (nullable = true)
 |-- fe_11: integer (nullable = true)
 |-- fe_12: integer (nullable = true)
 |-- fe_13: integer (nullable = true)
 |-- fe_14: integer (nullable = true)
 |-- fe_15: integer (nullable = true)
 |-- fe_16: integer (nullable = true)
 |-- fe_17: integer (nullable = true)
 |-- fe_18: integer (nullable = true)
 |-- fe_19: integer (nullable = true)
 |-- fe_20: integer (nullable = true)
 |-- Customer_ID: string (nullable = true)
 |-- snapshot_date: date (nullable = true)



In [69]:

# Check min/max values for all features
from pyspark.sql.functions import min as spark_min, max as spark_max

# Get summary stats for all fe_ columns
feature_cols = [col for col in df_clickstream.columns if col.startswith('fe_')]
df_clickstream.select([spark_min(col).alias(f"min_{col}") for col in
feature_cols[:5]]).show()
df_clickstream.select([spark_max(col).alias(f"max_{col}") for col in
feature_cols[:5]]).show()

+--------+--------+--------+--------+--------+
|min_fe_1|min_fe_2|min_fe_3|min_fe_4|min_fe_5|
+--------+--------+--------+--------+--------+
|    -378|    -356|    -399|    -307|    -343|
+--------+--------+--------+--------+--------+

+--------+--------+--------+--------+--------+
|max_fe_1|max_fe_2|max_fe_3|max_fe_4|max_fe_5|
+--------+--------+--------+--------+--------+
|     541|     560|     583|     562|     570|
+--------+--------+--------+--------+--------+



In [70]:
#check missing value

from pyspark.sql.functions import sum as spark_sum, when

missing_summary = df_clickstream.select([
    spark_sum(when(col(c).isNull(), 1).otherwise(0)).alias(c)
    for c in df_clickstream.columns
])
missing_summary.show()

+----+----+----+----+----+----+----+----+----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----------+-------------+
|fe_1|fe_2|fe_3|fe_4|fe_5|fe_6|fe_7|fe_8|fe_9|fe_10|fe_11|fe_12|fe_13|fe_14|fe_15|fe_16|fe_17|fe_18|fe_19|fe_20|Customer_ID|snapshot_date|
+----+----+----+----+----+----+----+----+----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----------+-------------+
|   0|   0|   0|   0|   0|   0|   0|   0|   0|    0|    0|    0|    0|    0|    0|    0|    0|    0|    0|    0|          0|            0|
+----+----+----+----+----+----+----+----+----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----------+-------------+



In [71]:
#check customers
unique_customers = df_clickstream.select("Customer_ID").distinct().count()
print(f"Unique customers: {unique_customers:,}")

Unique customers: 8,974


Finance data

In [72]:
df_financials.columns

['Customer_ID',
 'Annual_Income',
 'Monthly_Inhand_Salary',
 'Num_Bank_Accounts',
 'Num_Credit_Card',
 'Interest_Rate',
 'Num_of_Loan',
 'Type_of_Loan',
 'Delay_from_due_date',
 'Num_of_Delayed_Payment',
 'Changed_Credit_Limit',
 'Num_Credit_Inquiries',
 'Credit_Mix',
 'Outstanding_Debt',
 'Credit_Utilization_Ratio',
 'Credit_History_Age',
 'Payment_of_Min_Amount',
 'Total_EMI_per_month',
 'Amount_invested_monthly',
 'Payment_Behaviour',
 'Monthly_Balance',
 'snapshot_date']

In [73]:
#identify outliers. 

# Check extreme values in key financial columns
financial_cols = ['Annual_Income', 'Monthly_Inhand_Salary', 'Num_Bank_Accounts',
                   'Num_Credit_Card', 'Num_Credit_Inquiries','Interest_Rate', 'Outstanding_Debt',
                   'Credit_Utilization_Ratio', 'Delay_from_due_date','Total_EMI_per_month']

for col_name in financial_cols:
    print(f"\n=== {col_name} ===")
    df_financials.select(col_name).describe().show()



=== Annual_Income ===
+-------+------------------+
|summary|     Annual_Income|
+-------+------------------+
|  count|             12500|
|   mean|165302.13805472082|
| stddev| 1318317.705695486|
|    min|        10003.495_|
|    max|          99995.22|
+-------+------------------+


=== Monthly_Inhand_Salary ===
+-------+---------------------+
|summary|Monthly_Inhand_Salary|
+-------+---------------------+
|  count|                12500|
|   mean|   4188.5923027131585|
| stddev|   3180.1476109204173|
|    min|    303.6454166666666|
|    max|   15204.633333333331|
+-------+---------------------+


=== Num_Bank_Accounts ===
+-------+------------------+
|summary| Num_Bank_Accounts|
+-------+------------------+
|  count|             12500|
|   mean|          16.93992|
| stddev|114.35081529709485|
|    min|                -1|
|    max|              1756|
+-------+------------------+


=== Num_Credit_Card ===
+-------+------------------+
|summary|   Num_Credit_Card|
+-------+--------------

Process Silver Layer

In [74]:
silver_loan_daily_directory = 'datamart/silver/loan_daily/'
silver_clickstream_directory = 'datamart/silver/clickstream/'
silver_attributes_directory = 'datamart/silver/attributes/'
silver_financials_directory = 'datamart/silver/financials/'

for directory in [silver_loan_daily_directory, silver_clickstream_directory,
                   silver_attributes_directory, silver_financials_directory]:
    if not os.path.exists(directory):
        os.makedirs(directory)

In [117]:
import importlib
import utils.silver_layer
importlib.reload(utils.silver_layer)

<module 'utils.silver_layer' from '/workspaces/Assignment 1/MLE_assignment1/utils/silver_layer.py'>

In [118]:
for date_str in dates_str_lst:
    utils.silver_layer.process_silver_attributes_table(date_str,bronze_lms_directory,silver_attributes_directory,spark)
    utils.silver_layer.process_silver_clickstream_table(date_str,bronze_lms_directory,silver_clickstream_directory,spark)
    utils.silver_layer.process_silver_financials_table(date_str,bronze_lms_directory,silver_financials_directory,spark)
    utils.silver_layer.process_silver_loan_table(date_str,bronze_lms_directory,silver_loan_daily_directory,spark)


loaded from: datamart/bronze/lms/bronze_attributes_daily_2023_01_01.csv rowcount: 530
saved to: datamart/silver/attributes/silver_attributes_2023_01_01.parquet
loaded from: datamart/bronze/lms/bronze_clickstream_daily_2023_01_01.csv row count: 8974


                                                                                

saved to: datamart/silver/clickstream/silver_clickstream_2023_01_01.parquet
loaded from: datamart/bronze/lms/bronze_financials_daily_2023_01_01.csv
saved to: datamart/silver/financials/silver_financials_2023_01_01.parquet
loaded from: datamart/bronze/lms/bronze_loan_daily_2023_01_01.csv row count: 530


                                                                                

saved to: datamart/silver/loan_daily/silver_loan_daily_2023_01_01.parquet
loaded from: datamart/bronze/lms/bronze_attributes_daily_2023_02_01.csv rowcount: 501
saved to: datamart/silver/attributes/silver_attributes_2023_02_01.parquet
loaded from: datamart/bronze/lms/bronze_clickstream_daily_2023_02_01.csv row count: 8974
saved to: datamart/silver/clickstream/silver_clickstream_2023_02_01.parquet
loaded from: datamart/bronze/lms/bronze_financials_daily_2023_02_01.csv
saved to: datamart/silver/financials/silver_financials_2023_02_01.parquet
loaded from: datamart/bronze/lms/bronze_loan_daily_2023_02_01.csv row count: 1031
saved to: datamart/silver/loan_daily/silver_loan_daily_2023_02_01.parquet
loaded from: datamart/bronze/lms/bronze_attributes_daily_2023_03_01.csv rowcount: 506
saved to: datamart/silver/attributes/silver_attributes_2023_03_01.parquet
loaded from: datamart/bronze/lms/bronze_clickstream_daily_2023_03_01.csv row count: 8974
saved to: datamart/silver/clickstream/silver_click

                                                                                

saved to: datamart/silver/loan_daily/silver_loan_daily_2023_06_01.parquet
loaded from: datamart/bronze/lms/bronze_attributes_daily_2023_07_01.csv rowcount: 471
saved to: datamart/silver/attributes/silver_attributes_2023_07_01.parquet
loaded from: datamart/bronze/lms/bronze_clickstream_daily_2023_07_01.csv row count: 8974


                                                                                

saved to: datamart/silver/clickstream/silver_clickstream_2023_07_01.parquet
loaded from: datamart/bronze/lms/bronze_financials_daily_2023_07_01.csv
saved to: datamart/silver/financials/silver_financials_2023_07_01.parquet
loaded from: datamart/bronze/lms/bronze_loan_daily_2023_07_01.csv row count: 3556
saved to: datamart/silver/loan_daily/silver_loan_daily_2023_07_01.parquet
loaded from: datamart/bronze/lms/bronze_attributes_daily_2023_08_01.csv rowcount: 481
saved to: datamart/silver/attributes/silver_attributes_2023_08_01.parquet
loaded from: datamart/bronze/lms/bronze_clickstream_daily_2023_08_01.csv row count: 8974
saved to: datamart/silver/clickstream/silver_clickstream_2023_08_01.parquet
loaded from: datamart/bronze/lms/bronze_financials_daily_2023_08_01.csv
saved to: datamart/silver/financials/silver_financials_2023_08_01.parquet
loaded from: datamart/bronze/lms/bronze_loan_daily_2023_08_01.csv row count: 4037
saved to: datamart/silver/loan_daily/silver_loan_daily_2023_08_01.par

                                                                                

saved to: datamart/silver/attributes/silver_attributes_2023_09_01.parquet
loaded from: datamart/bronze/lms/bronze_clickstream_daily_2023_09_01.csv row count: 8974
saved to: datamart/silver/clickstream/silver_clickstream_2023_09_01.parquet
loaded from: datamart/bronze/lms/bronze_financials_daily_2023_09_01.csv
saved to: datamart/silver/financials/silver_financials_2023_09_01.parquet
loaded from: datamart/bronze/lms/bronze_loan_daily_2023_09_01.csv row count: 4491
saved to: datamart/silver/loan_daily/silver_loan_daily_2023_09_01.parquet
loaded from: datamart/bronze/lms/bronze_attributes_daily_2023_10_01.csv rowcount: 487
saved to: datamart/silver/attributes/silver_attributes_2023_10_01.parquet
loaded from: datamart/bronze/lms/bronze_clickstream_daily_2023_10_01.csv row count: 8974
saved to: datamart/silver/clickstream/silver_clickstream_2023_10_01.parquet
loaded from: datamart/bronze/lms/bronze_financials_daily_2023_10_01.csv
saved to: datamart/silver/financials/silver_financials_2023_10

                                                                                

saved to: datamart/silver/loan_daily/silver_loan_daily_2024_02_01.parquet
loaded from: datamart/bronze/lms/bronze_attributes_daily_2024_03_01.csv rowcount: 511
saved to: datamart/silver/attributes/silver_attributes_2024_03_01.parquet
loaded from: datamart/bronze/lms/bronze_clickstream_daily_2024_03_01.csv row count: 8974
saved to: datamart/silver/clickstream/silver_clickstream_2024_03_01.parquet
loaded from: datamart/bronze/lms/bronze_financials_daily_2024_03_01.csv
saved to: datamart/silver/financials/silver_financials_2024_03_01.parquet
loaded from: datamart/bronze/lms/bronze_loan_daily_2024_03_01.csv row count: 5425
saved to: datamart/silver/loan_daily/silver_loan_daily_2024_03_01.parquet
loaded from: datamart/bronze/lms/bronze_attributes_daily_2024_04_01.csv rowcount: 513
saved to: datamart/silver/attributes/silver_attributes_2024_04_01.parquet
loaded from: datamart/bronze/lms/bronze_clickstream_daily_2024_04_01.csv row count: 8974
saved to: datamart/silver/clickstream/silver_click

                                                                                

saved to: datamart/silver/financials/silver_financials_2024_06_01.parquet
loaded from: datamart/bronze/lms/bronze_loan_daily_2024_06_01.csv row count: 5418
saved to: datamart/silver/loan_daily/silver_loan_daily_2024_06_01.parquet
loaded from: datamart/bronze/lms/bronze_attributes_daily_2024_07_01.csv rowcount: 505
saved to: datamart/silver/attributes/silver_attributes_2024_07_01.parquet
loaded from: datamart/bronze/lms/bronze_clickstream_daily_2024_07_01.csv row count: 8974


                                                                                

saved to: datamart/silver/clickstream/silver_clickstream_2024_07_01.parquet
loaded from: datamart/bronze/lms/bronze_financials_daily_2024_07_01.csv
saved to: datamart/silver/financials/silver_financials_2024_07_01.parquet
loaded from: datamart/bronze/lms/bronze_loan_daily_2024_07_01.csv row count: 5442
saved to: datamart/silver/loan_daily/silver_loan_daily_2024_07_01.parquet
loaded from: datamart/bronze/lms/bronze_attributes_daily_2024_08_01.csv rowcount: 543


                                                                                

saved to: datamart/silver/attributes/silver_attributes_2024_08_01.parquet
loaded from: datamart/bronze/lms/bronze_clickstream_daily_2024_08_01.csv row count: 8974
saved to: datamart/silver/clickstream/silver_clickstream_2024_08_01.parquet
loaded from: datamart/bronze/lms/bronze_financials_daily_2024_08_01.csv
saved to: datamart/silver/financials/silver_financials_2024_08_01.parquet
loaded from: datamart/bronze/lms/bronze_loan_daily_2024_08_01.csv row count: 5531


                                                                                

saved to: datamart/silver/loan_daily/silver_loan_daily_2024_08_01.parquet
loaded from: datamart/bronze/lms/bronze_attributes_daily_2024_09_01.csv rowcount: 493


                                                                                

saved to: datamart/silver/attributes/silver_attributes_2024_09_01.parquet
loaded from: datamart/bronze/lms/bronze_clickstream_daily_2024_09_01.csv row count: 8974


                                                                                

saved to: datamart/silver/clickstream/silver_clickstream_2024_09_01.parquet
loaded from: datamart/bronze/lms/bronze_financials_daily_2024_09_01.csv
saved to: datamart/silver/financials/silver_financials_2024_09_01.parquet
loaded from: datamart/bronze/lms/bronze_loan_daily_2024_09_01.csv row count: 5537
saved to: datamart/silver/loan_daily/silver_loan_daily_2024_09_01.parquet
loaded from: datamart/bronze/lms/bronze_attributes_daily_2024_10_01.csv rowcount: 456
saved to: datamart/silver/attributes/silver_attributes_2024_10_01.parquet
loaded from: datamart/bronze/lms/bronze_clickstream_daily_2024_10_01.csv row count: 8974
saved to: datamart/silver/clickstream/silver_clickstream_2024_10_01.parquet
loaded from: datamart/bronze/lms/bronze_financials_daily_2024_10_01.csv
saved to: datamart/silver/financials/silver_financials_2024_10_01.parquet
loaded from: datamart/bronze/lms/bronze_loan_daily_2024_10_01.csv row count: 5502
saved to: datamart/silver/loan_daily/silver_loan_daily_2024_10_01.par

                                                                                

saved to: datamart/silver/financials/silver_financials_2024_11_01.parquet
loaded from: datamart/bronze/lms/bronze_loan_daily_2024_11_01.csv row count: 5501


                                                                                

saved to: datamart/silver/loan_daily/silver_loan_daily_2024_11_01.parquet
loaded from: datamart/bronze/lms/bronze_attributes_daily_2024_12_01.csv rowcount: 515
saved to: datamart/silver/attributes/silver_attributes_2024_12_01.parquet
loaded from: datamart/bronze/lms/bronze_clickstream_daily_2024_12_01.csv row count: 8974
saved to: datamart/silver/clickstream/silver_clickstream_2024_12_01.parquet
loaded from: datamart/bronze/lms/bronze_financials_daily_2024_12_01.csv


                                                                                

saved to: datamart/silver/financials/silver_financials_2024_12_01.parquet
loaded from: datamart/bronze/lms/bronze_loan_daily_2024_12_01.csv row count: 5531
saved to: datamart/silver/loan_daily/silver_loan_daily_2024_12_01.parquet
loaded from: datamart/bronze/lms/bronze_attributes_daily_2025_01_01.csv rowcount: 526
saved to: datamart/silver/attributes/silver_attributes_2025_01_01.parquet
loaded from: datamart/bronze/lms/bronze_clickstream_daily_2025_01_01.csv row count: 0


                                                                                

saved to: datamart/silver/clickstream/silver_clickstream_2025_01_01.parquet
loaded from: datamart/bronze/lms/bronze_financials_daily_2025_01_01.csv
saved to: datamart/silver/financials/silver_financials_2025_01_01.parquet
loaded from: datamart/bronze/lms/bronze_loan_daily_2025_01_01.csv row count: 5539
saved to: datamart/silver/loan_daily/silver_loan_daily_2025_01_01.parquet
