In [1]:
import os
import random
import sys
import time

import json

import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

from utils.direct_dataset import DirectDataset

cfg_fname       = './cfgs/nonfat_cfg.json'
this_cfg        = json.load(open(cfg_fname, 'r'))

print(this_cfg)

# dataset for train/val/test
trainset = DirectDataset(h5_dir=this_cfg['h5_dir'], scanlist=this_cfg['train_file'], tgtlist='./data/nonfat_reference.xlsx',
                         casenum=this_cfg['train_num'], imsize=None, scouts_range=this_cfg['scouts_range'])

valset   = DirectDataset(h5_dir=this_cfg['h5_dir'], scanlist=this_cfg['val_file'], tgtlist='./data/nonfat_reference.xlsx',
                         casenum=this_cfg['val_num'], imsize=None, scouts_range=this_cfg['scouts_range'])

testset  = DirectDataset(h5_dir=this_cfg['h5_dir'], scanlist=this_cfg['test_file'], tgtlist='./data/nonfat_reference.xlsx',
                         casenum=this_cfg['test_num'], imsize=None, scouts_range=this_cfg['scouts_range'])

train_ft = {}
train_ft['Size']        = [trainset[ii]['Size'] for ii in range(len(trainset))]
train_ft['Weight']      = [trainset[ii]['Weight'] for ii in range(len(trainset))]
train_ft['torso_wt']    = [trainset[ii]['torso_wt'] for ii in range(len(trainset))]

train_ft['lean_torso_wt']    = [trainset[ii]['lean_torso_wt'] for ii in range(len(trainset))]
train_ft['subcutaneous_fat'] = [trainset[ii]['subcutaneous_fat'] for ii in range(len(trainset))]


test_ft = {}
test_ft['Size']        = [testset[ii]['Size'] for ii in range(len(testset))]
test_ft['Weight']      = [testset[ii]['Weight'] for ii in range(len(testset))]
test_ft['torso_wt']    = [testset[ii]['torso_wt'] for ii in range(len(testset))]

test_ft['lean_torso_wt']    = [testset[ii]['lean_torso_wt'] for ii in range(len(testset))]
test_ft['subcutaneous_fat'] = [testset[ii]['subcutaneous_fat'] for ii in range(len(testset))]

{'h5_dir': '/data1/AEC_SharedFiles/Stanford_Data/h5s_dosenoise', 'train_file': './data/train_120kVp[CAP]_20240121_20more.xlsx', 'val_file': './data/val_120kVp[CAP].xlsx', 'test_file': './data/test_120kVp[CAP].xlsx', 'train_num': 'all', 'val_num': 'all', 'test_num': 'all', 'ft_arc': 'resnet18', 'save_dir': './wts_20more', 'max_epoch': 1000, 'protocol': '120kVp[CAP]', 'scouts_range': [0, 800]}


In [6]:
# univariate fitting
degree = 2

X_train = np.array(train_ft['Weight']).reshape(-1, 1)
y_train = np.array(train_ft['lean_torso_wt'])

y_train = y_train[np.nonzero(X_train.squeeze())]
X_train = X_train[np.nonzero(X_train.squeeze())]

# Create polynomial features
poly = PolynomialFeatures(degree=degree, include_bias=False)
X_poly = poly.fit_transform(X_train)

# Fit the polynomial regression model
model = LinearRegression()
model.fit(X_poly, y_train)

# predict on test data
X_test = np.array(test_ft['Weight']).reshape(-1, 1)
y_test = np.array(test_ft['lean_torso_wt'])

y_pred = model.predict(poly.fit_transform(X_test))

In [7]:
for ii in range(len(y_pred)):
    print(y_pred[ii])

27.927514498753208
32.95632141462099
24.218320865947003
29.443977723202806
28.982819988582122
34.051988090302146
24.538135180837912
26.870888356783237
28.445269626636893
3.3264438010344826
22.267588180908955
31.552726914350625
31.5710999326073
27.53414798916775
35.3892213095334
