In [1]:
import numpy as np
import pandas as pd


In [13]:
from azureml.core import Workspace

ws = Workspace.from_config()
print("Workspace name:", ws.name)
print("Region:", ws.location)


  import pkg_resources


Performing interactive authentication. Please follow the instructions on the terminal.


The default web browser has been opened at https://login.microsoftonline.com/organizations/oauth2/v2.0/authorize. Please continue the login in the web browser. If no web browser is available or if the web browser fails to open, use device code flow with `az login --use-device-code`.


Interactive authentication successfully completed.
Workspace name: creditbridge-ml-01
Region: centralindia


In [2]:
np.random.seed(42)

n = 1000

data = {
    "avg_daily_income": np.random.uniform(500, 2500, n),
    "income_std_dev": np.random.uniform(50, 800, n),
    "active_days_ratio": np.random.uniform(0.3, 1.0, n),
    "max_income_gap": np.random.randint(0, 15, n),
    "tenure_months": np.random.randint(1, 36, n),
    "income_trend": np.random.uniform(-0.3, 0.3, n),
}

df = pd.DataFrame(data)
df.head()


Unnamed: 0,avg_daily_income,income_std_dev,active_days_ratio,max_income_gap,tenure_months,income_trend
0,1249.080238,188.849697,0.483194,2,13,0.071166
1,2401.428613,456.425711,0.472885,8,32,0.041131
2,1963.987884,704.709377,0.934378,4,26,-0.264494
3,1697.316968,599.168665,0.474682,7,22,0.079682
4,812.037281,654.920861,0.490365,6,4,0.161352


In [3]:
def compute_credit_score(row):
    avg = row["avg_daily_income"]
    std = row["income_std_dev"]

    income_consistency = max(0, min(1, 1 - (std / avg)))
    work_consistency = row["active_days_ratio"]
    volatility_penalty = min(std / avg, 1)
    tenure_score = min(row["tenure_months"] / 12, 1)
    gap_penalty = 1 if row["max_income_gap"] > 7 else 0

    raw_score = (
        0.30 * income_consistency +
        0.25 * work_consistency +
        0.20 * (1 - volatility_penalty) +
        0.15 * tenure_score -
        0.10 * gap_penalty
    )

    return max(0, min(100, raw_score * 100))

df["credit_reliability_score"] = df.apply(compute_credit_score, axis=1)
df.to_csv("creditbridge_training_data.csv", index=False)

df.head()

Unnamed: 0,avg_daily_income,income_std_dev,active_days_ratio,max_income_gap,tenure_months,income_trend,credit_reliability_score
0,1249.080238,188.849697,0.483194,2,13,0.071166,69.520299
1,2401.428613,456.425711,0.472885,8,32,0.041131,57.318917
2,1963.987884,704.709377,0.934378,4,26,-0.264494,70.418678
3,1697.316968,599.168665,0.474682,7,22,0.079682,59.216594
4,812.037281,654.920861,0.490365,6,4,0.161352,26.933332


In [4]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import joblib

X = df.drop(columns=["credit_reliability_score"])
y = df["credit_reliability_score"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

model = LinearRegression()
model.fit(X_train, y_train)

joblib.dump(model, "creditbridge_model.pkl")

print("Model trained and saved.")


Model trained and saved.


In [6]:
sample_df = X_test.iloc[[0]]  
predicted = model.predict(sample_df)[0]

print("Predicted score:", round(predicted, 2))


Predicted score: 36.16


In [9]:
sample_df

Unnamed: 0,avg_daily_income,income_std_dev,active_days_ratio,max_income_gap,tenure_months,income_trend
521,1261.781713,750.82702,0.394105,2,4,-0.099803


In [10]:
pip install lightgbm


Collecting lightgbm
  Downloading lightgbm-4.6.0-py3-none-win_amd64.whl.metadata (17 kB)
Downloading lightgbm-4.6.0-py3-none-win_amd64.whl (1.5 MB)
   ---------------------------------------- 0.0/1.5 MB ? eta -:--:--
   ------------------------------------ --- 1.3/1.5 MB 33.8 MB/s eta 0:00:01
   ---------------------------------------- 1.5/1.5 MB 6.9 MB/s  0:00:00
Installing collected packages: lightgbm
Successfully installed lightgbm-4.6.0
Note: you may need to restart the kernel to use updated packages.


In [11]:
from lightgbm import LGBMRegressor
from sklearn.model_selection import train_test_split
import joblib

X = df.drop(columns=["credit_reliability_score"])
y = df["credit_reliability_score"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

lgbm_model = LGBMRegressor(
    n_estimators=200,
    learning_rate=0.05,
    max_depth=4,
    random_state=42
)

lgbm_model.fit(X_train, y_train)

joblib.dump(lgbm_model, "creditbridge_lgbm_model.pkl")

print("LightGBM model trained.")


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000191 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1071
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 6
[LightGBM] [Info] Start training from score 57.070258
LightGBM model trained.


In [12]:
from sklearn.metrics import mean_absolute_error

lr_pred = model.predict(X_test)
lgbm_pred = lgbm_model.predict(X_test)

print("Linear MAE:", mean_absolute_error(y_test, lr_pred))
print("LightGBM MAE:", mean_absolute_error(y_test, lgbm_pred))


Linear MAE: 4.496074057168729
LightGBM MAE: 1.419479703862673


In [14]:
from azureml.core import Model

model = Model.register(
    workspace=ws,
    model_name="creditbridge-credit-score-lgbm",
    model_path="creditbridge_lgbm_model.pkl",
    description="LightGBM-based credit reliability model for gig workers using income consistency and work behavior",
    tags={
        "project": "CreditBridge",
        "competition": "Imagine Cup 2026",
        "model_type": "LightGBM",
        "mvp": "true"
    }
)

print("Model registered successfully")
print("Name:", model.name)
print("Version:", model.version)


Registering model creditbridge-credit-score-lgbm
Model registered successfully
Name: creditbridge-credit-score-lgbm
Version: 1


In [24]:
def generate_reasons(row):
    reasons = []

    avg = row["avg_daily_income"]
    std = row["income_std_dev"]

    if avg > 0 and (std / avg) > 0.5:
        reasons.append("High income volatility")

    if row["active_days_ratio"] < 0.6:
        reasons.append("Inconsistent work activity")

    if row["tenure_months"] < 6:
        reasons.append("Short work history")

    if row["max_income_gap"] > 7:
        reasons.append("Long gaps between income days")

    if not reasons:
        reasons.append("Stable income and consistent work pattern")

    return reasons

def risk_band(score):
    if score >= 75:
        return "Low Risk"
    elif score >= 50:
        return "Medium Risk"
    else:
        return "High Risk"


In [26]:
def get_credit_decision(row, model):
    feature_names = model.feature_names_in_
    
    X = pd.DataFrame([row[feature_names]], columns=feature_names)
    score = model.predict(X)[0]
    reasons = generate_reasons(row)

    return round(score, 2), reasons
def creditbridge_response(row, model):
    score, reasons = get_credit_decision(row, model)

    return {
        "credit_reliability_score": score,
        "risk_band": risk_band(score),
        "key_reasons": reasons
    }


In [27]:
sample = df.iloc[0]
result = creditbridge_response(sample, lgbm_model)

result


{'credit_reliability_score': 69.16,
 'risk_band': 'Medium Risk',
 'key_reasons': ['Inconsistent work activity']}