## Predict the Loan Status using Logistic Regression in scikit-learn

In [1]:
import pandas as pd

In [2]:
# Load the data from the local files

df = pd.read_csv("./data/loan.csv")

In [9]:
LoanPrep = df[["Married", "Education", "Self_Employed", "ApplicantIncome", "LoanAmount",
              "Credit_History", "Loan_Status"]]

LoanPrep.head()

Unnamed: 0,Married,Education,Self_Employed,ApplicantIncome,LoanAmount,Credit_History,Loan_Status
0,No,Graduate,No,5849,,1.0,Y
1,Yes,Graduate,No,4583,128.0,1.0,N
2,Yes,Graduate,Yes,3000,66.0,1.0,Y
3,Yes,Not Graduate,No,2583,120.0,1.0,Y
4,No,Graduate,No,6000,141.0,1.0,Y


In [10]:
LoanPrep.isna().sum()

Married             3
Education           0
Self_Employed      32
ApplicantIncome     0
LoanAmount         22
Credit_History     50
Loan_Status         0
dtype: int64

In [11]:
# Clean Missing Data - Drop the columns with the missing values

LoanPrep = LoanPrep.dropna()

In [12]:
LoanPrep.isna().sum()

Married            0
Education          0
Self_Employed      0
ApplicantIncome    0
LoanAmount         0
Credit_History     0
Loan_Status        0
dtype: int64

### Training / Predict Script

In [None]:
# ------------------------------------------------------------------
# Predict the Loan Status using Logistic Regression in scikit-learn
# ------------------------------------------------------------------

# Import required classes from Azureml
from azureml.core import Workspace, Run

# Access the Workspace
ws = Workspace.from_config("./config")

# Get the context of the experiment run
new_run = Run.get_context()


# -----------------------------------------------------
# Do your stuff here
# -----------------------------------------------------
import pandas as pd

# Load the data from the local files
df = pd.read_csv("./data/loan.csv")

# Select columns from the dataset
LoanPrep = df[["Married", 
             "Education",
             "Self_Employed",
             "ApplicantIncome",
             "LoanAmount",
             "Loan_Amount_Term",
             "Credit_History",
             "Loan_Status"
             ]]

# Clean Missing Data - Drop the columns with missing values
LoanPrep = LoanPrep.dropna()


# Create Dummy variables - Not required in designer
LoanPrep = pd.get_dummies(LoanPrep, drop_first=True)

# Create X and Y - Similar to "edit columns" in Train Module
Y = LoanPrep[['Loan_Status_Y']]
X = LoanPrep.drop(['Loan_Status_Y'], axis=1)


# Split Data - X and Y datasets are training and testing sets
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = \
train_test_split(X, Y, test_size = 0.3, random_state = 1234, stratify=Y)


# Build the Logistic Regression model
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()


# Fit the data to the LogisticRegression object - Train Model
lr.fit(X_train, Y_train)


# Predict the outcome using Test data - Score Model 
# Scored Label
Y_predict = lr.predict(X_test)

# Get the probability score - Scored Probabilities
Y_prob = lr.predict_proba(X_test)[:, 1]

# Get Confusion matrix and the accuracy/score - Evaluate
from sklearn.metrics import confusion_matrix
cm    = confusion_matrix(Y_test, Y_predict)
score = lr.score(X_test, Y_test)





# -----------------------------------------------------
# Log metrics and Complete an experiment run
# -----------------------------------------------------
# Create the confusion matrix dictionary
cm_dict = {"schema_type": "confusion_matrix",
           "schema_version": "v1",
           "data": {"class_labels": ["N", "Y"],
                    "matrix": cm.tolist()}
           }

new_run.log("TotalObservations", len(df))
new_run.log_confusion_matrix("ConfusionMatrix", cm_dict)
new_run.log("Score", score)

# Create the Scored Dataset and upload to outputs
# -----------------------------------------------
# Test data - X_test
# Actual Y - Y_test
# Scored label
# Scored probabilities

X_test = X_test.reset_index(drop=True)
Y_test = Y_test.reset_index(drop=True)

Y_prob_df    = pd.DataFrame(Y_prob, columns=["Scored Probabilities"]) 
Y_predict_df = pd.DataFrame(Y_predict, columns=["Scored Label"]) 

scored_dataset = pd.concat([X_test, Y_test, Y_predict_df, Y_prob_df],
                           axis=1)

# Upload the scored dataset
scored_dataset.to_csv("./outputs/loan_scored.csv",
                      index=False)

# Complete the run
new_run.complete()


### Run / Submit Script

In [None]:

# ------------------------------------------------------------
# Run a script in an Azureml environment
# ------------------------------------------------------------
# This code will submit the script provided in ScriptRunConfig
# and create an Azureml environment on the local machine
# including docker for Azureml
# -------------------------------------------------------------


# Import the Azure Ml Classes
from azureml.core import Environment
from azureml.core.environment import CondaDependencies
from azureml.core import Workspace, Experiment, ScriptRunConfig

# Access the workplace using config.json
ws = Workspace.from_config("./config")

# Create/access the experiment from workspace
new_experiment = Experiment(workspace=ws,
                            name="Training_Script")

# --------------------------------------------------------
# Create custom environment

myenv = Environment(name="MyEnvironment")

# Create the dependencies object
myenv_dep = CondaDependencies.create(conda_packages=["scikit-Learn"])
myenv.python.conda_dependencies = myenv_dep

myenv.register(ws)
# --------------------------------------------------------

script_config = ScriptRunConfig(source_directory=".",
                          £      script="200 - Training Script.py",
                                environment=myenv)

# Submit a new run using the ScriptRunConfig
new_run = new_experiment.submit(config=script_config)


The default web browser has been opened at https://login.microsoftonline.com/organizations/oauth2/v2.0/authorize. Please continue the login in the web browser. If no web browser is available or if the web browser fails to open, use device code flow with `az login --use-device-code`.


Performing interactive authentication. Please follow the instructions on the terminal.
