In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# Define a more realistic dataset for credit score prediction
# Features: Payment history (%), Credit utilization (%), Credit history length (years), Types of credit, New credit inquiries
# Labels: Ground truth credit scores (synthetic for demonstration purposes)
data = [
    [95, 30, 10, 3, 2, 750],
    [85, 45, 8, 2, 5, 700],
    [65, 70, 5, 1, 10, 600],
    [90, 40, 15, 4, 3, 720],
    [80, 50, 7, 2, 7, 680],
    [99, 20, 20, 5, 1, 800],
    [70, 60, 4, 1, 9, 640],
    [88, 35, 12, 3, 2, 730],
    [55, 85, 3, 1, 12, 580],
    [92, 25, 18, 4, 4, 760]
]

# Split features and labels
features = np.array([d[:-1] for d in data], dtype=np.float32)
labels = np.array([d[-1] for d in data], dtype=np.float32).reshape(-1, 1)

# Convert to PyTorch tensors
X = torch.tensor(features)
y = torch.tensor(labels)

# Define a simpler linear regression model without a hidden layer
class SimpleCreditScoreModel(nn.Module):
    def __init__(self):
        super(SimpleCreditScoreModel, self).__init__()
        self.linear = nn.Linear(5, 1)  # Direct mapping from 5 features to 1 output

    def forward(self, x):
        return self.linear(x)

# Initialize the linear model, loss function, and optimizer
model = SimpleCreditScoreModel()
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.0001)

# Training loop for the simpler linear model
epochs = 2000
for epoch in range(epochs):
    # Forward pass
    outputs = model(X)
    loss = criterion(outputs, y)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print loss occasionally
    if (epoch + 1) % 200 == 0:
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

# Get the weights and biases of the linear layer to estimate feature importance
weights = model.linear.weight.data
bias = model.linear.bias.data

# Feature names for clarity
feature_names = [
    "Payment History", 
    "Credit Utilization", 
    "Credit History Length", 
    "Types of Credit", 
    "New Credit Inquiries"
]

# Print the equation for interpretability
print("\nEquation for Feature Influence on Credit Score:")

# Construct the equation string for a linear relationship
equation_terms = []
for i, (weight, feature_name) in enumerate(zip(weights[0], feature_names)):
    equation_terms.append(f"({weight.item():.4f}) * {feature_name}")

equation = " + ".join(equation_terms) + f" + Bias({bias.item():.4f})"
print("Credit Score Estimate = " + equation)

# Compute percentage contribution of each feature
absolute_weights = torch.abs(weights[0])  # Take absolute values to consider magnitude only
total_weight = torch.sum(absolute_weights)
percent_contributions = (absolute_weights / total_weight) * 100  # Calculate percentage contributions

# Print the percentage contribution of each feature
print("\nPercentage Contribution of Each Feature to the Credit Score Estimate:")
for i, (feature_name, contribution) in enumerate(zip(feature_names, percent_contributions)):
    print(f"{feature_name}: {contribution.item():.2f}%")


Epoch [200/2000], Loss: 232.8296
Epoch [400/2000], Loss: 221.6768
Epoch [600/2000], Loss: 214.2359
Epoch [800/2000], Loss: 208.9097
Epoch [1000/2000], Loss: 204.8319
Epoch [1200/2000], Loss: 201.5251
Epoch [1400/2000], Loss: 198.7249
Epoch [1600/2000], Loss: 196.2827
Epoch [1800/2000], Loss: 194.1098
Epoch [2000/2000], Loss: 192.1536

Equation for Feature Influence on Credit Score:
Credit Score Estimate = (6.9913) * Payment History + (1.7298) * Credit Utilization + (2.9785) * Credit History Length + (0.1525) * Types of Credit + (2.2867) * New Credit Inquiries + Bias(0.2737)

Percentage Contribution of Each Feature to the Credit Score Estimate:
Payment History: 49.45%
Credit Utilization: 12.23%
Credit History Length: 21.07%
Types of Credit: 1.08%
New Credit Inquiries: 16.17%
