In [16]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import joblib

# Load the dataset
tenant_risk_df = pd.read_csv('./sklearn-custom-scaling-model/tenant_risk_data.csv')

# One-hot encode the 'Employment Status' feature
employment_status_encoded = pd.get_dummies(tenant_risk_df['Employment Status'])

# Concatenate the encoded features with the original DataFrame
tenant_risk_df = pd.concat([tenant_risk_df, employment_status_encoded], axis=1)

# Convert True/False to 1/0 if they are not already integers
tenant_risk_df[employment_status_encoded.columns] = tenant_risk_df[employment_status_encoded.columns].astype(int)

# Drop the original 'Employment Status' column
tenant_risk_df.drop('Employment Status', axis=1, inplace=True)

# Define the features and target variable
features_to_standardize = ['Age', 'Annual Income', 'Credit Score', 
                           'Years at Current Residence', 'Number of Defaults', 'Loan Amount'] + list(employment_status_encoded.columns)
X = tenant_risk_df[features_to_standardize]
y = tenant_risk_df['Risk Category']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the StandardScaler
scaler = StandardScaler()

# Fit the scaler on the training data and transform both training and testing data
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize the Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model on the scaled training data
model.fit(X_train_scaled, y_train)

# Evaluate the model
predictions = model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, predictions)
print(f'Model Accuracy: {accuracy:.2f}')

# Save the trained model and scaler
model_filename = './sklearn-custom-scaling-model/tenant_risk_model.joblib'
scaler_filename = './sklearn-custom-scaling-model/scaler.joblib'
joblib.dump(model, model_filename)
joblib.dump(scaler, scaler_filename)

print(f"Model saved as: {model_filename}")
print(f"Scaler saved as: {scaler_filename}")


Model Accuracy: 1.00
Model saved as: ./sklearn-custom-scaling-model/tenant_risk_model.joblib
Scaler saved as: ./sklearn-custom-scaling-model/scaler.joblib


<h1>How to Load and Use the Saved Model and Scaler</h1>
<h2>To load and use the saved scaler and model, use the following code: </h2>

In [32]:
import joblib
import pandas as pd

# Load the saved model and scaler
model_filename = './sklearn-custom-scaling-model/tenant_risk_model.joblib'
scaler_filename = './sklearn-custom-scaling-model/scaler.joblib'

loaded_model = joblib.load(model_filename)
loaded_scaler = joblib.load(scaler_filename)

# Load new data for predictions
new_data = pd.read_csv('./sklearn-custom-scaling-model/tenant_risk_data.csv')

# One-hot encode the 'Employment Status' feature
new_employment_status_encoded = pd.get_dummies(new_data['Employment Status'])
new_data = pd.concat([new_data, new_employment_status_encoded], axis=1)
new_data[new_employment_status_encoded.columns] = new_data[new_employment_status_encoded.columns].astype(int)
new_data.drop('Employment Status', axis=1, inplace=True)

# Extract features from new data
X_new = new_data.drop('Risk Category', axis=1)

# Scale new data using the loaded scaler
X_new_scaled = loaded_scaler.transform(X_new)


# Make predictions with the loaded model
predictions = loaded_model.predict(X_new_scaled)

# Combine the input data with the predictions
results_df = new_data.copy()  # Copy the input data
results_df['Predicted Risk Category'] = predictions  # Add predictions as a new column

# Display the first few rows of the table
results_df.head()



Unnamed: 0,Age,Annual Income,Credit Score,Years at Current Residence,Number of Defaults,Loan Amount,Risk Category,employed,self-employed,unemployed,Predicted Risk Category
0,56,53339,544,17,1,48512,E,0,0,1,E
1,69,85673,711,15,0,9591,B,0,1,0,B
2,46,40320,602,12,2,41153,D,0,1,0,D
3,32,59228,751,5,4,24166,E,0,0,1,E
4,60,126058,768,19,1,21889,B,0,1,0,B


<h1> Build the container </h1>