In [None]:
import pandas as pd
from azureml.core import Workspace, Dataset

# Connect to workspace
ws = Workspace.from_config()

# Access the dataset (make sure the name here matches the registered dataset name in Azure ML Studio)
dataset = Dataset.get_by_name(ws, name='train_data')
data = dataset.to_pandas_dataframe()


In [None]:
print(data.head(12))

In [32]:
data = pd.get_dummies(data, drop_first=True)

In [None]:
data.info()

In [None]:
from sklearn.model_selection import train_test_split

X = data.drop(["id", "loan_status"], axis=1)
y = data["loan_status"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

from sklearn.preprocessing import StandardScaler

# Instantiate the scaler
scaler = StandardScaler()

# Fit and transform the training data
#X_train = scaler.fit_transform(X_train)

print(X_train.values[0].tolist())
X.columns

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy}")

In [None]:
'''
from sklearn.model_selection import GridSearchCV

# Redefine the model
model = RandomForestClassifier(random_state=42)

# Define the parameter grid
param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [10, 20, 30, None],
    'min_samples_split': [2, 5, 10]
}

# Use RandomizedSearchCV for hyperparameter tuning
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, scoring='accuracy', cv=5, n_jobs=-1)

# Fit the model with random search
grid_search.fit(X_train, y_train)

# Print best parameters and best score
print("Best Parameters:", random_search.best_params_)
print("Best Score:", random_search.best_score_)

# Evaluate the tuned model on the test set
best_model = random_search.best_estimator_
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Test Set Accuracy:", accuracy)
'''

In [None]:
test_dataset = Dataset.get_by_name(ws, name='test_data')
test_data = test_dataset.to_pandas_dataframe()
test_data.head()

In [None]:
test_data.info()

In [None]:
test_data = pd.get_dummies(test_data, drop_first=True)
test_data.info()

In [40]:
# Align test data columns with training data columns
X_train_columns = X_train.columns

# Add any missing columns in test data
for col in X_train_columns:
    if col not in test_data.columns:
        test_data[col] = 0  # Fill missing columns with 0 (or an appropriate value)

# Ensure columns are in the same order
test_data = test_data[X_train_columns]
#test_data = scaler.transform(test_data)

In [41]:
y_test_pred = model.predict(test_data)

In [None]:
# Creating a Kaggle Submision file

# Assuming 'ID' is the identifier in your test data
submission = pd.DataFrame({
    'id': test_data['id'],  # Replace 'ID' with the actual identifier column name if different
    'loan_status': y_test_pred  # Use appropriate name for the prediction column if specified by Kaggle
})

# Save to CSV
submission._to_csv("submission.csv", index=False)


In [None]:
# From here on we work towards deploying the model

import joblib
from azureml.core import Model, Workspace

# Save the model
joblib.dump(model, "loan_approval_model.pkl")

# Register the model
ws = Workspace.from_config()
model = Model.register(workspace=ws,
                       model_path="loan_approval_model.pkl",  # Path to the saved model file
                       model_name="loan_approval_model")       # Name to register the model under


In [44]:
from azureml.core.environment import Environment
from azureml.core.conda_dependencies import CondaDependencies

# Create the environment
env = Environment(name="loan-approval-env")
dependencies = CondaDependencies.create(
    python_version="3.8",
    pip_packages=["scikit-learn", "pandas", "joblib", "azureml-core", "azureml-defaults"]
)
env.python.conda_dependencies = dependencies


In [45]:
from azureml.core.model import InferenceConfig

inference_config = InferenceConfig(entry_script="score.py", environment=env)


In [46]:
from azureml.core.webservice import AciWebservice

aci_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1)


In [None]:
from azureml.core.model import Model

# Deploy the model
service = Model.deploy(workspace=ws,
                       name="loan-approval-service",
                       models=[model],
                       inference_config=inference_config,
                       deployment_config=aci_config)
service.wait_for_deployment(show_output=True)
print(f"Service state: {service.state}")
print(f"Scoring URI: {service.scoring_uri}")


In [None]:
import requests
import json

# Convert DataFrame to list format
sample_data = json.dumps({"data": X_train.values[2:3].tolist()})

headers = {"Content-Type": "application/json"}

# Send a request to the service
response = requests.post(service.scoring_uri, data=sample_data, headers=headers)
print(response.json())
print("actual:", y_train[2:3].tolist())
print(X_train.values[2:3].tolist())

In [None]:
print(f"Scoring URI: {service.scoring_uri}")