```{contents}
```

## Data Inspection

In [4]:
import pandas as pd
url = "https://github.com/svgoudar/datasets/blob/main/loan_applications.csv?raw=true"
df = pd.read_csv(url)


### Structural Inspection


In [22]:
results = {}
results["shape"] = df.shape
results["ndim"] = df.ndim
results["size"] = df.size
results["axes"] = df.axes
results["columns"] = df.columns.tolist()
results["index"] = df.index.tolist()[:10]
results

{'shape': (50000, 21),
 'ndim': 2,
 'size': 1050000,
 'axes': [RangeIndex(start=0, stop=50000, step=1),
  Index(['application_id', 'customer_id', 'application_date', 'loan_type',
         'loan_amount_requested', 'loan_tenure_months', 'interest_rate_offered',
         'purpose_of_loan', 'employment_status', 'monthly_income', 'cibil_score',
         'existing_emis_monthly', 'debt_to_income_ratio',
         'property_ownership_status', 'residential_address', 'applicant_age',
         'gender', 'number_of_dependents', 'loan_status', 'fraud_flag',
         'fraud_type'],
        dtype='object')],
 'columns': ['application_id',
  'customer_id',
  'application_date',
  'loan_type',
  'loan_amount_requested',
  'loan_tenure_months',
  'interest_rate_offered',
  'purpose_of_loan',
  'employment_status',
  'monthly_income',
  'cibil_score',
  'existing_emis_monthly',
  'debt_to_income_ratio',
  'property_ownership_status',
  'residential_address',
  'applicant_age',
  'gender',
  'number_of_dep

### Content Overview

In [None]:
results={}
results["head"] = df.head(3).to_dict()
results["tail"] = df.tail(3).to_dict()
results["sample"] = df.sample(3, random_state=42).to_dict()
results["info"] = str(df.info())  # info prints to stdout, capture as string
results["describe"] = df.describe(include="all").to_dict()
results["dtypes"] = df.dtypes.to_dict()
results["nunique"] = df.nunique().to_dict()
results

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50000 entries, 0 to 49999
Data columns (total 21 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   application_id             50000 non-null  object 
 1   customer_id                50000 non-null  object 
 2   application_date           50000 non-null  object 
 3   loan_type                  50000 non-null  object 
 4   loan_amount_requested      50000 non-null  float64
 5   loan_tenure_months         50000 non-null  int64  
 6   interest_rate_offered      50000 non-null  float64
 7   purpose_of_loan            50000 non-null  object 
 8   employment_status          50000 non-null  object 
 9   monthly_income             50000 non-null  float64
 10  cibil_score                50000 non-null  int64  
 11  existing_emis_monthly      50000 non-null  float64
 12  debt_to_income_ratio       50000 non-null  float64
 13  property_ownership_status  50000 non-null  obj

### Value Inspection

In [28]:
results={}
first_col = df.columns[0]
results["value_counts"] = df[first_col].value_counts().head(5).to_dict()
results["unique"] = df[first_col].unique()[:5].tolist()
results["nunique_col"] = df[first_col].nunique()
results

{'value_counts': {'6aaf792e-6755-408e-a9ef-4d9bf841864b': 1,
  'c8bf0bea-70e6-4870-9125-41b8210c527f': 1,
  '91224cec-3544-4bc7-ac15-a9792da54c02': 1,
  '4efcd02d-4a03-4ab7-9bd1-0ff430493d0c': 1,
  'a61337d4-ba04-4a68-b492-2cb8266e6ed7': 1},
 'unique': ['c8bf0bea-70e6-4870-9125-41b8210c527f',
  '91224cec-3544-4bc7-ac15-a9792da54c02',
  '4efcd02d-4a03-4ab7-9bd1-0ff430493d0c',
  'a61337d4-ba04-4a68-b492-2cb8266e6ed7',
  'a8d1639e-170b-41b2-826a-55c7dae38d16'],
 'nunique_col': 50000}

### Data Quality Checks


In [17]:

results = {}
results["isnull_sum"] = df.isnull().sum().to_dict()
results["notnull_sum"] = df.notnull().sum().to_dict()
results["duplicated_sum"] = df.duplicated().sum()
results["duplicated_rows"] = df[df.duplicated()].head(3).to_dict()
pd.DataFrame(results)

Unnamed: 0,isnull_sum,notnull_sum,duplicated_sum,duplicated_rows
application_id,0,50000,0,{}
customer_id,0,50000,0,{}
application_date,0,50000,0,{}
loan_type,0,50000,0,{}
loan_amount_requested,0,50000,0,{}
loan_tenure_months,0,50000,0,{}
interest_rate_offered,0,50000,0,{}
purpose_of_loan,0,50000,0,{}
employment_status,0,50000,0,{}
monthly_income,0,50000,0,{}



### Statistical Checks


In [31]:
results = {}
results["mean"] = df.mean(numeric_only=True).to_dict()
results["median"] = df.median(numeric_only=True).to_dict()
results["mode"] = df.mode().iloc[0].to_dict()
results["min"] = df.min(numeric_only=True).to_dict()
results["max"] = df.max(numeric_only=True).to_dict()
results["corr"] = df.corr(numeric_only=True).to_dict()
results["var"] = df.var(numeric_only=True).to_dict()
results["std"] = df.std(numeric_only=True).to_dict()
pd.DataFrame(results)

Unnamed: 0,mean,median,mode,min,max,corr,var,std
loan_amount_requested,513913.04,503000.0,100000.0,100000.0,1618000.0,"{'loan_amount_requested': 1.0, 'loan_tenure_mo...",75741960000.0,275212.571219
loan_tenure_months,121.3884,60.0,24.0,12.0,360.0,{'loan_amount_requested': 0.006663320703185639...,14740.49,121.4104
interest_rate_offered,10.528547,10.51,7.0,7.0,19.62,{'loan_amount_requested': -0.00730245995193055...,3.711179,1.926442
monthly_income,50844.94,50300.0,10000.0,10000.0,162000.0,{'loan_amount_requested': -0.00628645429399494...,570003300.0,23874.742237
cibil_score,699.14976,699.0,715.0,476.0,888.0,{'loan_amount_requested': 0.008058566557966971...,2492.039,49.920325
existing_emis_monthly,3066.362,3000.0,0.0,0.0,11400.0,{'loan_amount_requested': 0.005052796337672041...,3598512.0,1896.9745
debt_to_income_ratio,8.573188,5.95,0.0,0.0,102.0,{'loan_amount_requested': 0.004796831954571135...,91.92302,9.587649
applicant_age,43.06088,43.0,48.0,21.0,65.0,{'loan_amount_requested': -0.00697686689550769...,168.2275,12.970254
number_of_dependents,2.01228,2.0,1.0,0.0,4.0,{'loan_amount_requested': 0.002294829283521847...,1.984609,1.408761
fraud_flag,0.02052,0.0,0.0,0.0,1.0,{'loan_amount_requested': -0.00591735482226123...,0.02009933,0.141772
