# Loan prediction analysis

Predict the customers that are likely to get loans approved

## Libraries and loading data

from azureml.core import Workspace, ScriptRunConfig, Environment, Experiment
from azureml.core.runconfig import MpiConfiguration

# get workspace
ws = Workspace.from_config()

# get compute target
target = ws.compute_targets['target-name']

# get curated environment
curated_env_name = 'AzureML-PyTorch-1.6-GPU'
env = Environment.get(workspace=ws, name=curated_env_name)

# get/create experiment
exp = Experiment(ws, 'experiment_name')

# distributed job configuration
distributed_job_config = MpiConfiguration(process_count_per_node=4, node_count=2)

# set up script run configuration
config = ScriptRunConfig(
    source_directory='.',
    script='script.py',
    compute_target=target,
    environment=env,
    distributed_job_config=distributed_job_config,
)

# submit script to AML
run = exp.submit(config)
print(run.get_portal_url()) # link to ml.azure.com
run.wait_for_completion(show_output=True)

In [2]:
import pandas as pd
import seaborn as sns
import numpy as num
import matplotlib.pyplot as plt

%matplotlib inline
import warnings 
warnings.filterwarnings('ignore')

In [4]:
test =  pd.read_csv('test.csv')
train = pd.read_csv('train.csv')

In [5]:
test_original = test.copy()
train_original = train.copy()

## EDA

In [6]:
train.columns

Index(['Loan_ID', 'Gender', 'Married', 'Dependents', 'Education',
       'Self_Employed', 'ApplicantIncome', 'CoapplicantIncome', 'LoanAmount',
       'Loan_Amount_Term', 'Credit_History', 'Property_Area', 'Loan_Status'],
      dtype='object')

In [9]:
train.shape

(614, 13)

In [10]:
train.dtypes

Loan_ID               object
Gender                object
Married               object
Dependents            object
Education             object
Self_Employed         object
ApplicantIncome        int64
CoapplicantIncome    float64
LoanAmount           float64
Loan_Amount_Term     float64
Credit_History       float64
Property_Area         object
Loan_Status           object
dtype: object

In [7]:
test.columns

Index(['Loan_ID', 'Gender', 'Married', 'Dependents', 'Education',
       'Self_Employed', 'ApplicantIncome', 'CoapplicantIncome', 'LoanAmount',
       'Loan_Amount_Term', 'Credit_History', 'Property_Area'],
      dtype='object')

In [11]:
test.shape

(367, 12)

In [12]:
test.dtypes

Loan_ID               object
Gender                object
Married               object
Dependents            object
Education             object
Self_Employed         object
ApplicantIncome        int64
CoapplicantIncome      int64
LoanAmount           float64
Loan_Amount_Term     float64
Credit_History       float64
Property_Area         object
dtype: object

### univariate analysis