In [None]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV, LeaveOneOut, KFold
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score

In [None]:

data = pd.read_pickle('../create_SL_data/data_18months_levels.pkl')

data.dropna(inplace=True)

data

In [None]:
data['patient_number'].unique()

In [None]:
X = data.drop(['BCRABL', 'patient_number'], axis=1)  
y = data['BCRABL']                                  

patient_numbers = data['patient_number']

## Leave one out nested cross validation

In [None]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import LeaveOneOut, GridSearchCV
from sklearn.metrics import mean_squared_error

models = {
    'RandomForest': {
        'model': RandomForestRegressor(random_state=42),
        'params': {'n_estimators': [50, 100, 200], 'max_depth': [3, 5, 10, 50]}
    },
    'KNN': {
        'model': KNeighborsRegressor(),
        'params': {'n_neighbors': [3, 5, 7, 10], 'weights': ['uniform', 'distance']}
    },
    'LinearRegression': {
        'model': LinearRegression(),
        'params': {}  
    }
}

feature_importances = {name: pd.DataFrame(np.zeros((X.shape[0], X.shape[1])), columns=X.columns) for name in models if name == 'RandomForest'}
scores = {name: {'mse_scores': [], 'rmse_scores': [], 'best_params': []} for name in models}

outer_cv = LeaveOneOut()
inner_cv = LeaveOneOut()

for fold_idx, (train_idx, test_idx) in enumerate(outer_cv.split(X)):
    X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
    y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

    for name, config in models.items():
        if config['params']:
            grid_search = GridSearchCV(config['model'], config['params'], cv=inner_cv, scoring='neg_mean_squared_error')
            grid_search.fit(X_train, y_train)
            best_model = grid_search.best_estimator_
            scores[name]['best_params'].append(grid_search.best_params_)
        else:
            best_model = config['model']
            best_model.fit(X_train, y_train)
            scores[name]['best_params'].append({})

        y_pred = best_model.predict(X_test)
        mse = mean_squared_error(y_test, y_pred)
        rmse = np.sqrt(mse)

        scores[name]['mse_scores'].append(mse)
        scores[name]['rmse_scores'].append(rmse)

        if name == 'RandomForest':
            feature_importances[name].iloc[fold_idx, :] = best_model.feature_importances_

    print(f'Fold {fold_idx + 1} completed')

results = {}
for name in models:
    average_mse = np.mean(scores[name]['mse_scores'])
    average_rmse = np.mean(scores[name]['rmse_scores'])
    results[name] = {
        'Average MSE': average_mse,
        'Average RMSE': average_rmse,
        'Best Parameters': scores[name]['best_params'][-1]
    }

    if name == 'RandomForest':
        mean_fi = feature_importances[name].mean(axis=0).sort_values(ascending=False)
        results[name]['Feature Importances'] = mean_fi

for name, result in results.items():
    print(f"{name} Results:")
    for key, value in result.items():
        print(f"{key}: {value}")


In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.neighbors import KNeighborsRegressor

model1 = RandomForestRegressor(n_estimators=50, max_depth=10, random_state=42)
model2 = KNeighborsRegressor(n_neighbors=10, weights='uniform')

model1.fit(X, y)
model2.fit(X, y)

In [None]:
y.mean()

In [None]:
y.describe()

In [None]:
model1.score(X, y)

In [None]:
model2.score(X, y)

In [None]:
model = RandomForestRegressor(n_estimators=50, max_depth=10, random_state=42)
model.fit(X, y)

In [None]:
pd.set_option('display.max_colwidth', None)

importances = model.feature_importances_

feature_importances = model.feature_importances_
features_df = pd.DataFrame({
    'Feature': X.columns,
    'Importance': feature_importances
})
features_df = features_df.sort_values(by='Importance', ascending=False)

features_df.head(10)

                '176Yb_pS6 S240244', 

In [None]:
data_pool2 = pd.read_pickle('../create_SL_data_pool2/data_18months_levels_an_pool2.pkl')
pool2_patients = pd.DataFrame(data_pool2['patient_number'])

X_test = data_pool2.drop(['BCRABL', 'patient_number'], axis=1)  
y_test = data_pool2['BCRABL']                                  

y_test

In [None]:
model.predict(X_test)

In [None]:
pool2_patients['pred'] = model1.predict(X_test)
pool2_patients['month'] = 18
pool2_patients['patient_number'] = pool2_patients['patient_number'].astype(int)

pool2_patients

In [None]:
patient_numbers_pool2 = data_pool2['patient_number'].astype(int)

response = pd.read_csv('../response/responses_all.csv')
response_pool1 = response[response['patient_number'].isin(patient_numbers_pool2)]

response_pool1

In [None]:
merged_data = pd.merge(response_pool1, pool2_patients, on=['patient_number', 'month'], how='outer')
merged_data


In [None]:
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import plotly.express as px

colors = px.colors.qualitative.Plotly  

fig = go.Figure()

for idx, patient in enumerate(merged_data['patient_number'].unique()):
    patient_data = merged_data[merged_data['patient_number'] == patient]
    patient_color = colors[idx % len(colors)]  

    fig.add_trace(go.Scatter(
        x=patient_data['month'], y=patient_data['BCRABL'],
        mode='lines+markers', name=f'Patient {patient} Actual',
        line=dict(color=patient_color), 
        marker=dict(color=patient_color)  
    ))
    
    predicted_data = patient_data[patient_data['month'] == 18]
    if not predicted_data.empty:
        fig.add_trace(go.Scatter(
            x=[18], y=predicted_data['pred'],
            mode='markers', name=f'Patient {patient} Predicted',
            marker=dict(color=patient_color, size=10),  
            showlegend=False  
        ))
        
        before_pred = patient_data[patient_data['month'] < 18].tail(1)
        after_pred = patient_data[patient_data['month'] > 18].head(1)
        
        if not before_pred.empty:
            fig.add_trace(go.Scatter(
                x=[before_pred['month'].values[0], 18],
                y=[before_pred['BCRABL'].values[0], predicted_data['pred'].values[0]],
                mode='lines', line=dict(dash='dash', color=patient_color),
                showlegend=False  
            ))
        if not after_pred.empty:
            fig.add_trace(go.Scatter(
                x=[18, after_pred['month'].values[0]],
                y=[predicted_data['pred'].values[0], after_pred['BCRABL'].values[0]],
                mode='lines', line=dict(dash='dash', color=patient_color),
                showlegend=False 
            ))

fig.update_layout(
    title='Predicted BCR::ABL% at 18 months and actual values',
    xaxis_title='Test Time (Months)',
    yaxis_title='BCR-ABL%',
    yaxis_type='log'  
)

fig.update_traces(connectgaps=True)

fig.add_hline(y=0.01)

fig.show()



In [None]:
pool1_patients = pd.DataFrame(data['patient_number'])
pool1_patients['patient_number'] = pool1_patients['patient_number'].astype(int)
pool1_patients['pred'] = model1.predict(X)
pool1_patients['month'] = 18

pool1_patients

In [None]:
merged_data = pd.merge(response, pool1_patients, on=['patient_number', 'month'], how='outer')
merged_data

merged_data['BCRABL'] = merged_data['BCRABL'].replace(0, 0.001)

In [None]:
merged_data_1 = merged_data[merged_data['batch'] == 1]
merged_data_2 = merged_data[merged_data['batch'] == 2]
merged_data_3 = merged_data[merged_data['batch'] == 3]
merged_data_4 = merged_data[merged_data['batch'] == 4]

In [None]:
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import plotly.express as px

colors = px.colors.qualitative.Plotly  

fig = go.Figure()

for idx, patient in enumerate(merged_data_1['patient_number'].unique()):
    patient_data = merged_data_1[merged_data_1['patient_number'] == patient]
    patient_color = colors[idx % len(colors)]  

    fig.add_trace(go.Scatter(
        x=patient_data['month'], y=patient_data['BCRABL'],
        mode='lines+markers', name=f'Patient {patient} Actual',
        line=dict(color=patient_color),  
        marker=dict(color=patient_color) 
    ))
    

    predicted_data = patient_data[patient_data['month'] == 18]
    if not predicted_data.empty:
        fig.add_trace(go.Scatter(
            x=[18], y=predicted_data['pred'],
            mode='markers', name=f'Patient {patient} Predicted',
            marker=dict(color=patient_color, size=10),  
            showlegend=False 
        ))
        
        before_pred = patient_data[patient_data['month'] < 18].tail(1)
        after_pred = patient_data[patient_data['month'] > 18].head(1)
        
        if not before_pred.empty:
            fig.add_trace(go.Scatter(
                x=[before_pred['month'].values[0], 18],
                y=[before_pred['BCRABL'].values[0], predicted_data['pred'].values[0]],
                mode='lines', line=dict(dash='dash', color=patient_color),
                showlegend=False  
            ))
        if not after_pred.empty:
            fig.add_trace(go.Scatter(
                x=[18, after_pred['month'].values[0]],
                y=[predicted_data['pred'].values[0], after_pred['BCRABL'].values[0]],
                mode='lines', line=dict(dash='dash', color=patient_color),
                showlegend=False  
            ))

fig.update_layout(
    title='Predicted BCR::ABL% at 18 months and actual values Batch 1',
    xaxis_title='Test Time (Months)',
    yaxis_title='BCR-ABL%',
    yaxis_type='log'  
)

fig.update_traces(connectgaps=True)


fig.add_hline(y=0.01)

fig.show()


In [None]:
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import plotly.express as px

colors = px.colors.qualitative.Plotly  

fig = go.Figure()

for idx, patient in enumerate(merged_data_2['patient_number'].unique()):
    patient_data = merged_data_2[merged_data_2['patient_number'] == patient]
    patient_color = colors[idx % len(colors)]  

    fig.add_trace(go.Scatter(
        x=patient_data['month'], y=patient_data['BCRABL'],
        mode='lines+markers', name=f'Patient {patient} Actual',
        line=dict(color=patient_color),  
        marker=dict(color=patient_color) 
    ))
    
    predicted_data = patient_data[patient_data['month'] == 18]
    if not predicted_data.empty:
        fig.add_trace(go.Scatter(
            x=[18], y=predicted_data['pred'],
            mode='markers', name=f'Patient {patient} Predicted',
            marker=dict(color=patient_color, size=10),
            showlegend=False  
        ))
        
        before_pred = patient_data[patient_data['month'] < 18].tail(1)
        after_pred = patient_data[patient_data['month'] > 18].head(1)
        
        if not before_pred.empty:
            fig.add_trace(go.Scatter(
                x=[before_pred['month'].values[0], 18],
                y=[before_pred['BCRABL'].values[0], predicted_data['pred'].values[0]],
                mode='lines', line=dict(dash='dash', color=patient_color),
                showlegend=False  
            ))
        if not after_pred.empty:
            fig.add_trace(go.Scatter(
                x=[18, after_pred['month'].values[0]],
                y=[predicted_data['pred'].values[0], after_pred['BCRABL'].values[0]],
                mode='lines', line=dict(dash='dash', color=patient_color),
                showlegend=False  
            ))

fig.update_layout(
    title='Predicted BCR::ABL% at 18 months and actual values Batch 2',
    xaxis_title='Test Time (Months)',
    yaxis_title='BCR-ABL%',
    yaxis_type='log'  
)

fig.update_traces(connectgaps=True)

fig.add_hline(y=0.01)

fig.show()


In [None]:
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import plotly.express as px

colors = px.colors.qualitative.Plotly  

fig = go.Figure()

for idx, patient in enumerate(merged_data_3['patient_number'].unique()):
    patient_data = merged_data_3[merged_data_3['patient_number'] == patient]
    patient_color = colors[idx % len(colors)]  

    fig.add_trace(go.Scatter(
        x=patient_data['month'], y=patient_data['BCRABL'],
        mode='lines+markers', name=f'Patient {patient} Actual',
        line=dict(color=patient_color),  
        marker=dict(color=patient_color)  
    ))
    
    predicted_data = patient_data[patient_data['month'] == 18]
    if not predicted_data.empty:
        fig.add_trace(go.Scatter(
            x=[18], y=predicted_data['pred'],
            mode='markers', name=f'Patient {patient} Predicted',
            marker=dict(color=patient_color, size=10),  
            showlegend=False  
        ))
        
        
        before_pred = patient_data[patient_data['month'] < 18].tail(1)
        after_pred = patient_data[patient_data['month'] > 18].head(1)
        
        if not before_pred.empty:
            fig.add_trace(go.Scatter(
                x=[before_pred['month'].values[0], 18],
                y=[before_pred['BCRABL'].values[0], predicted_data['pred'].values[0]],
                mode='lines', line=dict(dash='dash', color=patient_color),
                showlegend=False  
            ))
        if not after_pred.empty:
            fig.add_trace(go.Scatter(
                x=[18, after_pred['month'].values[0]],
                y=[predicted_data['pred'].values[0], after_pred['BCRABL'].values[0]],
                mode='lines', line=dict(dash='dash', color=patient_color),
                showlegend=False 
            ))

# Update layout
fig.update_layout(
    title='Predicted BCR::ABL% at 18 months and actual values Batch 3',
    xaxis_title='Test Time (Months)',
    yaxis_title='BCR-ABL%',
    yaxis_type='log' 
)

fig.update_traces(connectgaps=True)

fig.add_hline(y=0.01)

fig.show()


In [None]:
import plotly.graph_objects as go
import pandas as pd
import numpy as np
import plotly.express as px

colors = px.colors.qualitative.Plotly  

fig = go.Figure()

for idx, patient in enumerate(merged_data_4['patient_number'].unique()):
    patient_data = merged_data_4[merged_data_4['patient_number'] == patient]
    patient_color = colors[idx % len(colors)]  

    fig.add_trace(go.Scatter(
        x=patient_data['month'], y=patient_data['BCRABL'],
        mode='lines+markers', name=f'Patient {patient} Actual',
        line=dict(color=patient_color), 
        marker=dict(color=patient_color)  
        ))
    
    predicted_data = patient_data[patient_data['month'] == 18]
    if not predicted_data.empty:
        fig.add_trace(go.Scatter(
            x=[18], y=predicted_data['pred'],
            mode='markers', name=f'Patient {patient} Predicted',
            marker=dict(color=patient_color, size=10),  
            showlegend=False  
        ))
        
        before_pred = patient_data[patient_data['month'] < 18].tail(1)
        after_pred = patient_data[patient_data['month'] > 18].head(1)
        
        if not before_pred.empty:
            fig.add_trace(go.Scatter(
                x=[before_pred['month'].values[0], 18],
                y=[before_pred['BCRABL'].values[0], predicted_data['pred'].values[0]],
                mode='lines', line=dict(dash='dash', color=patient_color),
                showlegend=False  
            ))
        if not after_pred.empty:
            fig.add_trace(go.Scatter(
                x=[18, after_pred['month'].values[0]],
                y=[predicted_data['pred'].values[0], after_pred['BCRABL'].values[0]],
                mode='lines', line=dict(dash='dash', color=patient_color),
                showlegend=False  
            ))

fig.update_layout(
    title='Predicted BCR::ABL% at 18 months and actual values Batch 4',
    xaxis_title='Test Time (Months)',
    yaxis_title='BCR-ABL%',
    yaxis_type='log' 
)

fig.update_traces(connectgaps=True)

fig.add_hline(y=0.01)

fig.show()
