<a href="https://colab.research.google.com/github/tahahamdii/Medical-Chatbot-Llama2/blob/main/treatment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np

# Define ranges for input parameters
tumor_locations = ['superficial', 'deep']
ages = np.random.randint(18, 85, size=2000)  # Random ages between 18 and 85
sexes = np.random.choice(['male', 'female'], size=2000)

# Redefine the tumor size categories based on the provided guidelines
tumor_size_values_new = {
    'very_small': (0.1, 0.9),   # Very small tumors: a few millimeters to less than 1 cm
    'medium': (1.0, 5.0),       # Medium tumors: 1 to 5 cm
    'large': (5.1, 10.0),       # Large tumors: more than 5 cm
    'very_large': (10.1, 30.0)  # Very large tumors: several tens of centimeters
}

# Update the function to reflect new tumor size categories
def determine_parameters_with_new_size(tumor_size_cm, location, age, sex):
    # Set base values
    intensity = 0
    frequency = 0
    duration = 0

    # Determine size category
    if tumor_size_cm < 1.0:
        intensity = np.random.uniform(5, 8)
        duration = np.random.uniform(5, 10)
    elif 1.0 <= tumor_size_cm <= 5.0:
        intensity = np.random.uniform(8, 12)
        duration = np.random.uniform(10, 20)
    elif 5.0 < tumor_size_cm <= 10.0:
        intensity = np.random.uniform(12, 16)
        duration = np.random.uniform(20, 30)
    else:
        intensity = np.random.uniform(16, 20)
        duration = np.random.uniform(30, 40)

    # Adjust frequency based on location
    if location == 'superficial':
        frequency = np.random.uniform(1, 3)
    else:  # deep
        frequency = np.random.uniform(0.5, 1)

    # Further adjustments based on age and sex
    if age > 50:
        intensity *= 0.9  # Reduce intensity slightly for older patients
        duration *= 0.9  # Reduce duration slightly for older patients

    if sex == 'male':
        intensity *= 1.1  # Slightly higher intensity for male patients

    return intensity, frequency, duration

# Generate dataset with updated tumor size categories
data_with_new_size_cm = []
for _ in range(2000):
    tumor_size_category = np.random.choice(list(tumor_size_values_new.keys()))
    tumor_size_cm = np.random.uniform(*tumor_size_values_new[tumor_size_category])
    location = np.random.choice(tumor_locations)
    age = np.random.randint(18, 85)
    sex = np.random.choice(['male', 'female'])
    intensity, frequency, duration = determine_parameters_with_new_size(tumor_size_cm, location, age, sex)
    data_with_new_size_cm.append([tumor_size_cm, location, age, sex, intensity, frequency, duration])

# Create DataFrame with new tumor size categories
df_with_new_size_cm = pd.DataFrame(data_with_new_size_cm, columns=['tumor_size_cm', 'tumor_location', 'age', 'sex', 'intensity', 'frequency', 'duration'])

# Mapping tumor locations and sexes to integers
location_map = {'superficial': 0, 'deep': 1}
sex_map = {'male': 0, 'female': 1}

# Apply the mapping to the dataset
df_with_new_size_cm['tumor_location'] = df_with_new_size_cm['tumor_location'].map(location_map)
df_with_new_size_cm['sex'] = df_with_new_size_cm['sex'].map(sex_map)

# Save to CSV
df_with_new_size_cm.to_csv('Ultrasound_Treatment_Parameters_Dataset_Encoded.csv', index=False)

df_with_new_size_cm.head()


Unnamed: 0,tumor_size_cm,tumor_location,age,sex,intensity,frequency,duration
0,2.033987,1,31,0,10.356695,0.544624,16.187023
1,27.53004,1,65,1,14.499102,0.776775,32.818988
2,16.034069,1,66,0,19.604619,0.684721,34.491453
3,11.216021,1,56,1,15.330424,0.742391,32.693242
4,3.238718,1,67,1,7.701371,0.541171,9.478668


In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [14]:
!pip install scikit-learn



In [18]:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the dataset from the CSV file
df = pd.read_csv('Ultrasound_Treatment_Parameters_Dataset_Encoded.csv')

# Split the data into train and test sets
train_data, test_data = train_test_split(df, test_size=0.2, random_state=42, shuffle=True)

# Extract the input features and target columns
input_features = df.columns[1:-3]  # All columns except the last 3
target_columns = df.columns[-3:]  # The last 3 columns

# Split the data into inputs (X) and targets (y) for training and testing
X_train = train_data[input_features].values
Y_train = train_data[target_columns].values
X_test = test_data[input_features].values
Y_test = test_data[target_columns].values
# Assuming you have already loaded your data into X and Y


In [26]:
from sklearn.model_selection import RandomizedSearchCV

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Hyperparameter tuning for Random Forest
rf_param_grid = {
    'n_estimators': [50, 100, 200],
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Hyperparameter tuning for Gradient Boosting
gb_param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 4, 5],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

best_rf_models = []
best_gb_models = []

for i in range(Y_train.shape[1]):
    # Random Forest
    rf = RandomForestRegressor(random_state=42)
    rf_random = RandomizedSearchCV(estimator=rf, param_distributions=rf_param_grid, n_iter=100, cv=3, verbose=2, random_state=42, n_jobs=-1)
    rf_random.fit(X_train, Y_train[:, i])
    best_rf = rf_random.best_estimator_
    best_rf_models.append(best_rf)

    # Gradient Boosting
    gb = GradientBoostingRegressor(random_state=42)
    gb_random = RandomizedSearchCV(estimator=gb, param_distributions=gb_param_grid, n_iter=100, cv=3, verbose=2, random_state=42, n_jobs=-1)
    gb_random.fit(X_train, Y_train[:, i])
    best_gb = gb_random.best_estimator_
    best_gb_models.append(best_gb)

# Make predictions with the tuned models
rf_predictions = np.zeros(Y_test.shape)
gb_predictions = np.zeros(Y_test.shape)
rf_mae_list = []
gb_mae_list = []

for i in range(Y_test.shape[1]):
    rf_predictions[:, i] = best_rf_models[i].predict(X_test)
    gb_predictions[:, i] = best_gb_models[i].predict(X_test)
    rf_mae_list.append(mean_absolute_error(Y_test[:, i], rf_predictions[:, i]))
    gb_mae_list.append(mean_absolute_error(Y_test[:, i], gb_predictions[:, i]))

rf_mae_overall = np.mean(rf_mae_list)
gb_mae_overall = np.mean(gb_mae_list)

print(f'Tuned Random Forest Overall MAE: {rf_mae_overall}')
print(f'Tuned Gradient Boosting Overall MAE: {gb_mae_overall}')

# Average predictions for ensemble
ensemble_predictions = (rf_predictions + gb_predictions) / 2
ensemble_mae_list = [mean_absolute_error(Y_test[:, i], ensemble_predictions[:, i]) for i in range(Y_test.shape[1])]
ensemble_mae_overall = np.mean(ensemble_mae_list)
print(f'Ensemble Overall MAE: {ensemble_mae_overall}')

Fitting 3 folds for each of 100 candidates, totalling 300 fits


  pid = os.fork()
120 fits failed out of a total of 300.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
120 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 1145, in wrapper
    estimator._validate_params()
  File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 638, in _validate_params
    validate_parameter_constraints(
  File "/usr/local/lib/python3.10/dist-packages/sklearn/utils/_param_validation.py", line 96, in validate_parameter_constraints
    raise Invalid

Fitting 3 folds for each of 100 candidates, totalling 300 fits
Fitting 3 folds for each of 100 candidates, totalling 300 fits


120 fits failed out of a total of 300.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
120 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 1145, in wrapper
    estimator._validate_params()
  File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 638, in _validate_params
    validate_parameter_constraints(
  File "/usr/local/lib/python3.10/dist-packages/sklearn/utils/_param_validation.py", line 96, in validate_parameter_constraints
    raise InvalidParameterError(
sk

Fitting 3 folds for each of 100 candidates, totalling 300 fits
Fitting 3 folds for each of 100 candidates, totalling 300 fits


120 fits failed out of a total of 300.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
120 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 1145, in wrapper
    estimator._validate_params()
  File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 638, in _validate_params
    validate_parameter_constraints(
  File "/usr/local/lib/python3.10/dist-packages/sklearn/utils/_param_validation.py", line 96, in validate_parameter_constraints
    raise InvalidParameterError(
sk

Fitting 3 folds for each of 100 candidates, totalling 300 fits
Tuned Random Forest Overall MAE: 4.439610652317109
Tuned Gradient Boosting Overall MAE: 4.3441482564041465
Ensemble Overall MAE: 4.362839348711907


In [27]:
# Define the ANN model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dropout(0.2),
    Dense(16, activation='relu'),
    Dropout(0.2),
    Dense(8, activation='relu'),
    Dense(Y_train.shape[1])
])

# Compile the model with Mean Absolute Error as a metric
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_absolute_error'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [28]:
# Print the model summary
model.summary()

# Define early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train the model with early stopping
history = model.fit(X_train, Y_train, epochs=300, batch_size=32, validation_split=0.2 )


Epoch 1/300
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - loss: 210.9162 - mean_absolute_error: 10.7732 - val_loss: 190.7628 - val_mean_absolute_error: 9.8124
Epoch 2/300
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 159.8921 - mean_absolute_error: 8.6893 - val_loss: 82.5983 - val_mean_absolute_error: 5.6136
Epoch 3/300
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 70.8070 - mean_absolute_error: 5.6320 - val_loss: 51.8328 - val_mean_absolute_error: 4.8020
Epoch 4/300
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 56.7732 - mean_absolute_error: 5.2863 - val_loss: 46.4726 - val_mean_absolute_error: 4.6145
Epoch 5/300
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 48.6473 - mean_absolute_error: 4.8425 - val_loss: 45.1992 - val_mean_absolute_error: 4.6006
Epoch 6/300
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s

NameError: name 'X' is not defined