In [67]:
import pandas  as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, r2_score


In [68]:
targets = ["target_CHEMBL240",
          "target_CHEMBL260",
          "target_CHEMBL279"]

methods = ["RF", "xgboost", "MLP"]


sorting = [True, False]

sorting_vars = ["logp", "pchembl"]

druglike = [True, False]

In [69]:
# DataFrame to store results
results = []

for target in targets:
    for var in sorting_vars:
        for method in methods:
            
            print(target)
            print(method)
            print(var)
            
            df = pd.read_csv(f"../{target}-1_results/{var}_sorted_fcv_results_{method}_testing batches.csv")
            
            
            # Group by 'testing batch' and calculate RMSE and R^2 for each group
            for batch, group in df.groupby('testing batch'):
                rmse = np.sqrt(mean_squared_error(group['True Values'], group['Predictions']))
                r_squared = r2_score(group['True Values'], group['Predictions'])

                # Collect results in the DataFrame
                results.append({
                    'Target': target,
                    'Sorting_Var': var,
                    'Method': method,
                    'Testing Batch': batch,
                    'RMSE': rmse,
                    'R2': r_squared,
                    'evalt': 'sorted_fcv'
                })


# Convert the list of dictionaries to a DataFrame
final_results = pd.DataFrame(results)
# Save the DataFrame to a CSV file
final_results.to_csv('metrics_sortedforwardCV.csv', index=False)


target_CHEMBL240
RF
logp
target_CHEMBL240
xgboost
logp
target_CHEMBL240
MLP
logp
target_CHEMBL240
RF
pchembl
target_CHEMBL240
xgboost
pchembl
target_CHEMBL240
MLP
pchembl
target_CHEMBL260
RF
logp
target_CHEMBL260
xgboost
logp
target_CHEMBL260
MLP
logp
target_CHEMBL260
RF
pchembl
target_CHEMBL260
xgboost
pchembl
target_CHEMBL260
MLP
pchembl
target_CHEMBL279
RF
logp
target_CHEMBL279
xgboost
logp
target_CHEMBL279
MLP
logp
target_CHEMBL279
RF
pchembl
target_CHEMBL279
xgboost
pchembl
target_CHEMBL279
MLP
pchembl


In [70]:
final_results

Unnamed: 0,Target,Sorting_Var,Method,Testing Batch,RMSE,R2,evalt
0,target_CHEMBL240,logp,RF,1,0.539731,0.336317,sorted_fcv
1,target_CHEMBL240,logp,RF,2,0.567511,0.014182,sorted_fcv
2,target_CHEMBL240,logp,RF,3,1.089359,-0.077649,sorted_fcv
3,target_CHEMBL240,logp,RF,4,0.707705,0.518710,sorted_fcv
4,target_CHEMBL240,logp,RF,5,0.758903,0.613842,sorted_fcv
...,...,...,...,...,...,...,...
157,target_CHEMBL279,pchembl,MLP,5,0.932578,-153.216689,sorted_fcv
158,target_CHEMBL279,pchembl,MLP,6,0.959277,-92.407747,sorted_fcv
159,target_CHEMBL279,pchembl,MLP,7,0.941002,-109.565639,sorted_fcv
160,target_CHEMBL279,pchembl,MLP,8,1.743077,-166.221979,sorted_fcv


In [71]:
# DataFrame to store results
results = []

for target in targets:
    for method in methods:

        print(target)
        print(method)
        print(var)

        df = pd.read_csv(f"../{target}-1_results/unsorted_fcv_results_{method}_testing batches.csv")
        
            
        # Group by 'testing batch' and calculate RMSE and R^2 for each group
        for batch, group in df.groupby('testing batch'):
            rmse = np.sqrt(mean_squared_error(group['True Values'], group['Predictions']))
            r_squared = r2_score(group['True Values'], group['Predictions'])

            # Collect results in the DataFrame
            results.append({
                'Target': target,
                'Sorting_Var': "None",
                'Method': method,
                'Testing Batch': batch,
                'RMSE': rmse,
                'R2': r_squared,
                'evalt': 'unsorted_fcv'
            })


# Convert the list of dictionaries to a DataFrame
final_results = pd.DataFrame(results)
# Save the DataFrame to a CSV file

final_results.to_csv('metrics_unsortedforwardCV.csv', index=False)


target_CHEMBL240
RF
pchembl
target_CHEMBL240
xgboost
pchembl
target_CHEMBL240
MLP
pchembl
target_CHEMBL260
RF
pchembl
target_CHEMBL260
xgboost
pchembl
target_CHEMBL260
MLP
pchembl
target_CHEMBL279
RF
pchembl
target_CHEMBL279
xgboost
pchembl
target_CHEMBL279
MLP
pchembl


In [72]:
final_results

Unnamed: 0,Target,Sorting_Var,Method,Testing Batch,RMSE,R2,evalt
0,target_CHEMBL240,,RF,1,0.751020,0.166407,unsorted_fcv
1,target_CHEMBL240,,RF,2,0.673769,0.103620,unsorted_fcv
2,target_CHEMBL240,,RF,3,0.658768,0.308986,unsorted_fcv
3,target_CHEMBL240,,RF,4,0.719491,0.668793,unsorted_fcv
4,target_CHEMBL240,,RF,5,0.736661,0.536954,unsorted_fcv
...,...,...,...,...,...,...,...
76,target_CHEMBL279,,MLP,5,0.783568,0.507207,unsorted_fcv
77,target_CHEMBL279,,MLP,6,0.719734,0.491514,unsorted_fcv
78,target_CHEMBL279,,MLP,7,0.688229,0.618831,unsorted_fcv
79,target_CHEMBL279,,MLP,8,0.700936,0.626667,unsorted_fcv


In [73]:
# DataFrame to store results
results = []

for target in targets:
    for method in methods:

        print(target)
        print(method)
        print(var)

        df = pd.read_csv(f"../{target}-1_results/cv_results_{method}_testing batches.csv")
        # Group by 'testing batch' and calculate RMSE and R^2 for each group
        for batch, group in df.groupby('testing batch'):
            rmse = np.sqrt(mean_squared_error(group['True Values'], group['Predictions']))
            r_squared = r2_score(group['True Values'], group['Predictions'])

            # Collect results in the DataFrame
            results.append({
                'Target': target,
                'Sorting_Var': "None",
                'Method': method,
                'Testing Batch': batch,
                'RMSE': rmse,
                'R2': r_squared,
                'evalt': 'cv'
            })


# Convert the list of dictionaries to a DataFrame
final_results = pd.DataFrame(results)
# Save the DataFrame to a CSV file

final_results.to_csv('metrics_CV.csv', index=False)


target_CHEMBL240
RF
pchembl
target_CHEMBL240
xgboost
pchembl
target_CHEMBL240
MLP
pchembl
target_CHEMBL260
RF
pchembl
target_CHEMBL260
xgboost
pchembl
target_CHEMBL260
MLP
pchembl
target_CHEMBL279
RF
pchembl
target_CHEMBL279
xgboost
pchembl
target_CHEMBL279
MLP
pchembl


In [74]:
final_results

Unnamed: 0,Target,Sorting_Var,Method,Testing Batch,RMSE,R2,evalt
0,target_CHEMBL240,,RF,1,0.588950,0.609461,cv
1,target_CHEMBL240,,RF,2,0.492309,0.783136,cv
2,target_CHEMBL240,,RF,3,0.451102,0.753985,cv
3,target_CHEMBL240,,RF,4,0.494430,0.753029,cv
4,target_CHEMBL240,,RF,5,0.560429,0.789436,cv
...,...,...,...,...,...,...,...
85,target_CHEMBL279,,MLP,6,0.667522,0.597145,cv
86,target_CHEMBL279,,MLP,7,0.748181,0.497309,cv
87,target_CHEMBL279,,MLP,8,0.708776,0.527010,cv
88,target_CHEMBL279,,MLP,9,0.635980,0.635232,cv


In [75]:
# DataFrame to store results
results = []

for target in targets:
    for method in methods:

        print(target)
        print(method)
        print(var)

        df = pd.read_csv(f"../{target}-1_results/scaffold_cv_results_{method}_testing batches.csv")
        # Group by 'testing batch' and calculate RMSE and R^2 for each group
        for batch, group in df.groupby('testing batch'):
            rmse = np.sqrt(mean_squared_error(group['True Values'], group['Predictions']))
            r_squared = r2_score(group['True Values'], group['Predictions'])

            # Collect results in the DataFrame
            results.append({
                'Target': target,
                'Sorting_Var': "None",
                'Method': method,
                'Testing Batch': batch,
                'RMSE': rmse,
                'R2': r_squared,
                'evalt': 'cv_scaffold'
            })


# Convert the list of dictionaries to a DataFrame
final_results = pd.DataFrame(results)
# Save the DataFrame to a CSV file

final_results.to_csv('metrics_scaffold_CV.csv', index=False)


target_CHEMBL240
RF
pchembl
target_CHEMBL240
xgboost
pchembl
target_CHEMBL240
MLP
pchembl
target_CHEMBL260
RF
pchembl
target_CHEMBL260
xgboost
pchembl
target_CHEMBL260
MLP
pchembl
target_CHEMBL279
RF
pchembl
target_CHEMBL279
xgboost
pchembl
target_CHEMBL279
MLP
pchembl


In [76]:

metrics_sortedforwardCV = pd.read_csv("metrics_sortedforwardCV.csv")
metrics_unsortedforwardCV = pd.read_csv("metrics_unsortedforwardCV.csv")
metrics_CV = pd.read_csv("metrics_CV.csv")
metrics_scaffold_CV = pd.read_csv("metrics_scaffold_CV.csv")

# Concatenating the DataFrames
combined_metrics = pd.concat([metrics_sortedforwardCV, metrics_unsortedforwardCV, metrics_CV, metrics_scaffold_CV], axis=0)

# Optional: Reset index if you want a clean, sequential index in the new DataFrame
combined_metrics.reset_index(drop=True, inplace=True)

# Save the concatenated DataFrame to a CSV file
combined_metrics.to_csv('combined_metrics.csv', index=False)

# Display the DataFrame to verify the contents
combined_metrics


Unnamed: 0,Target,Sorting_Var,Method,Testing Batch,RMSE,R2,evalt
0,target_CHEMBL240,logp,RF,1,0.539731,0.336317,sorted_fcv
1,target_CHEMBL240,logp,RF,2,0.567511,0.014182,sorted_fcv
2,target_CHEMBL240,logp,RF,3,1.089359,-0.077649,sorted_fcv
3,target_CHEMBL240,logp,RF,4,0.707705,0.518710,sorted_fcv
4,target_CHEMBL240,logp,RF,5,0.758903,0.613842,sorted_fcv
...,...,...,...,...,...,...,...
418,target_CHEMBL279,,MLP,6,0.909121,0.461547,cv_scaffold
419,target_CHEMBL279,,MLP,7,0.890862,0.354422,cv_scaffold
420,target_CHEMBL279,,MLP,8,0.879394,0.318483,cv_scaffold
421,target_CHEMBL279,,MLP,9,0.818693,0.550499,cv_scaffold
