In [1]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import numpy as np

# 1. Load dataset
df = pd.read_excel("CACS_dataset_with_binary.xlsx")

# 2. Feature & target
features = ['age', 'HDL', 'CRP', 'NLR', 'BMI', 'SBP', 'LDL', 'DBP', 'TC', 'TG', 'eGFR']
target = 'CACS_binary'

X = df[features]
y = df[target]

# 3. Standardize
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 4. Train logistic regression
model = LogisticRegression(max_iter=1000)
model.fit(X_scaled, y)

# 5. Coefficients & score conversion
coefs = model.coef_[0]
score_weights = np.round(coefs * 10).astype(int)  # scale factor = 10

# 6. Create risk score
risk_score = (X_scaled * score_weights).sum(axis=1)

# 7. Attach score to original data
df['risk_score'] = risk_score

# 8. Save result
df[['id', 'risk_score', 'CACS_binary'] + features].to_excel("CACS_risk_score_output.xlsx", index=False)

# Optional: print weight table
pd.DataFrame({'Variable': features, 'Coefficient': coefs, 'ScoreWeight': score_weights})


Unnamed: 0,Variable,Coefficient,ScoreWeight
0,age,0.299006,3
1,HDL,-0.492865,-5
2,CRP,0.635233,6
3,NLR,0.222415,2
4,BMI,0.094777,1
5,SBP,0.086245,1
6,LDL,-0.558608,-6
7,DBP,0.144234,1
8,TC,0.505563,5
9,TG,-0.760106,-8
