In [196]:
# Random Forest Regression for Spine Texture Analysis
# This script performs random forest regression on spine texture data, analyzing the relationship between age and texture
# scores, and visualizing the results.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import OneHotEncoder
#from xgboost import XGBClassifier
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import seaborn as sns
# Set seaborn style for better aesthetics


In [197]:
df = pd.read_csv('/Users/krishshah/Desktop/Krish_Sky_Long_AI/spine_texture_analysis.csv')
print(df)

    age sex  texture_score  edge_score  shape_score image_angle
0     6   F       5.428603    0.796251     4.701439          AP
1    10   F       5.569553    0.583567     5.067939          AP
2    13   F       5.452931    0.719179     4.386277          AP
3    25   F       4.322031    0.330064     4.552385          AP
4    31   F       5.805200    0.748549     4.920593          AP
5    37   F       6.199163    0.342327     5.128160          AP
6    38   F       4.277627    0.114491     5.118457          AP
7    63   F       4.546191    0.126566     5.264554          AP
8    79   F       4.763363    0.207147     4.971540          AP
9     6   M       5.676108    0.702496     1.095745          AP
10   20   M       5.291117    0.543630     6.464209          AP
11   22   M       4.174701    0.239207     4.984199          AP


In [198]:
# dataframe cleaning
df = df.dropna()  # Drop rows with missing values

#bins = [0, 10, 20, 30, 40, 50, 60, 70, 80, 100]
#labels = ['0-10', '11-20', '21-30', '31-40', '41-50', '51-60', '61-70', '71-80', '81-100']
#df['age_group'] = pd.cut(df['age'], bins=bins, labels=labels)
df['age_group'] = df['age'] // 5

drop_binary_enc = OneHotEncoder(drop='if_binary', sparse_output=False)
X_encoded = drop_binary_enc.fit_transform(df[['sex']])
X_encoded_df = pd.DataFrame(X_encoded, columns=drop_binary_enc.get_feature_names_out(['sex']))
X_final = pd.concat([X_encoded_df, df.drop(columns='sex')], axis=1)
X_final = X_final.dropna()
df = X_final
df

Unnamed: 0,sex_M,age,texture_score,edge_score,shape_score,image_angle,age_group
0,0.0,6,5.428603,0.796251,4.701439,AP,1
1,0.0,10,5.569553,0.583567,5.067939,AP,2
2,0.0,13,5.452931,0.719179,4.386277,AP,2
3,0.0,25,4.322031,0.330064,4.552385,AP,5
4,0.0,31,5.8052,0.748549,4.920593,AP,6
5,0.0,37,6.199163,0.342327,5.12816,AP,7
6,0.0,38,4.277627,0.114491,5.118457,AP,7
7,0.0,63,4.546191,0.126566,5.264554,AP,12
8,0.0,79,4.763363,0.207147,4.97154,AP,15
9,1.0,6,5.676108,0.702496,1.095745,AP,1


In [199]:
X = df.drop(columns=['age', 'age_group', 'sex_M', 'image_angle']).values  # Features
y = df['age'].values  # Target

X

array([[5.428603  , 0.79625076, 4.7014394 ],
       [5.569553  , 0.5835672 , 5.067939  ],
       [5.452931  , 0.71917856, 4.3862767 ],
       [4.322031  , 0.330064  , 4.5523853 ],
       [5.8052    , 0.74854934, 4.9205933 ],
       [6.1991634 , 0.34232706, 5.12816   ],
       [4.277627  , 0.11449142, 5.118457  ],
       [4.5461907 , 0.12656587, 5.264554  ],
       [4.7633634 , 0.20714706, 4.9715405 ],
       [5.6761084 , 0.7024957 , 1.0957452 ],
       [5.2911167 , 0.54362965, 6.464209  ],
       [4.174701  , 0.23920736, 4.984199  ]])

In [200]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [201]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [202]:
# Model Training
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingRegressor, RandomForestClassifier, RandomForestRegressor # gives the highest f1 score
#model = LogisticRegression(random_state=42, max_iter=1000) # Logistic Regression
model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42)
#model = RandomForestRegressor(n_estimators=100, random_state=42)
#model = RandomForestClassifier(random_state=42)  # Random Forest Classifier
model.fit(X_train, y_train)

0,1,2
,loss,'squared_error'
,learning_rate,0.1
,n_estimators,100
,subsample,1.0
,criterion,'friedman_mse'
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_depth,5
,min_impurity_decrease,0.0


In [203]:
# Model Evaluation
from sklearn.metrics import accuracy_score

y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

#print("Accuracy:", accuracy_score(y_test, y_pred))
#print(classification_report(y_test, y_pred))

In [204]:
print(f'MAE: {mae}, MSE: {mse}')

MAE: 7.6145544495276, MSE: 59.66864784379078
