In [1]:
import pydicom
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix

import pandas as pd
from tqdm import tqdm
import cv2
import numpy as np

In [2]:
data_read=pd.read_hdf('Champ_dic copy.h5',key='data')
im_feat=pd.read_hdf('torch_image_feats.h5',key='data')
new_df=im_feat.reset_index().rename(columns={'index':'id'})
full_feats = data_read.merge(new_df, on='id', how='left')

In [3]:
X=full_feats.drop(columns=['id','pass/fail','pass rate', 'distanceTA', 'doseTA'])
y=[1 if rate>=97 else 0 for rate in data_read['pass rate']]

In [8]:
X=X.to_numpy()
np.unique(y,return_counts=True)

(array([0, 1]), array([115, 402]))

In [9]:
from imblearn.over_sampling import SMOTE  # Imbalanced class handling
smote = SMOTE(random_state=42)  # Set random state for reproducibility

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

In [10]:
scaler = StandardScaler()
X_resampled, y_train = smote.fit_resample(X, y)
X_train = scaler.fit_transform(X_resampled)
X_test = scaler.transform(X_test)
SVCmodel = SVC(kernel='rbf',C=10,gamma=0.01)
SVCmodel.fit(X_train, y_train)

# Make predictions
y_pred = SVCmodel.predict(X_test)

# Evaluate the model

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'Predicted values: {y_pred}')
print(f'Actual values: {y_test}')
print(f"Accuracy: {accuracy * 100:.2f}%")
print(f"Precision: {precision * 100:.2f}%")
print(f"Recall: {recall * 100:.2f}%")
print(f"F1 Score: {f1 * 100:.2f}%")
report = classification_report(y_test, y_pred)
print(report)



# Generate a confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

Mean Squared Error: 0.0
Predicted values: [1 1 1 1 0 0 1 0 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 0 1 1 1 0 1 1 1 0 1 1 1 1 1
 0 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 0 0 0 1 1 0 1 1 1 1 0 0 1 1 1 0 1 1 1
 1 1 1 1 1 1 0 1 0 1 1 1 0 1 1 0 1 1 1 0 1 1 0 1 1 1 0 0 0 1]
Actual values: [1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1]
Accuracy: 100.00%
Precision: 100.00%
Recall: 100.00%
F1 Score: 100.00%
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        27
           1       1.00      1.00      1.00        77

    accuracy                           1.00       104
   macro avg       1.00      1.00      1.00       104
weighted avg       1.00      1.00      1.00       104

Confusion Matrix:
[[

In [11]:
from sklearn.model_selection import GridSearchCV
param_grid = {'C': [i for i in range(5,50,5)], 'gamma': [i for i in np.arange(0.1,10,0.05)]}
grid_search = GridSearchCV(estimator=SVCmodel, param_grid=param_grid, scoring='f1_macro', cv=5)
grid_search.fit(X_train, y_train)
# Best parameters found
best_params = grid_search.best_params_

# Best score achieved on validation set
best_score = grid_search.best_score_

print("Best Parameters:", best_params)
print("Best Score:", best_score)

Best Parameters: {'C': 20, 'gamma': 0.15000000000000002}
Best Score: 0.8362398133691842


In [24]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier 
from sklearn.metrics import classification_report, accuracy_score
from imblearn.over_sampling import SMOTE  
smote = SMOTE(random_state=42)  
X_resampled, y_resampled = smote.fit_resample(X, y)

# Now X_resampled and y_resampled have a balanced class distribution
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)


In [25]:
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))
print(accuracy_score(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.88      0.92      0.90        75
           1       0.93      0.90      0.91        86

    accuracy                           0.91       161
   macro avg       0.91      0.91      0.91       161
weighted avg       0.91      0.91      0.91       161

0.906832298136646


In [26]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Random Forest Classifier instance
model = RandomForestClassifier(random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Print the classification report
print(classification_report(y_test, y_pred))

# Print the accuracy score
print('Accuracy:', accuracy_score(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.39      0.29      0.33        24
           1       0.80      0.86      0.83        80

    accuracy                           0.73       104
   macro avg       0.60      0.58      0.58       104
weighted avg       0.71      0.73      0.72       104

Accuracy: 0.7307692307692307
