In [None]:
import os
import pandas as pd

In [None]:
stdf=pd.read_csv('/home/s2861704/ppk/test_kin/prep_storage/test.csv', sep=',')
display(stdf)

In [None]:
from qsprpred.data import QSPRDataset
import os

os.makedirs("prep_storage/data", exist_ok=True)

stdataset = QSPRDataset(
    df=stdf,
    store_dir="prep_storage/data",
    name="SingleTaskDataset",
    target_props=[{"name": "pchembl_value_Mean", "task": "REGRESSION"}],
    random_state=42,
    overwrite=True
)

stdataset.getDF()

In [None]:
from qsprpred.data.descriptors.fingerprints import MorganFP
from qsprpred.data import RandomSplit

# Specifiy random split for creating the train (80%) and test set (20%)
rand_split = RandomSplit(test_fraction=0.2, dataset=stdataset)
## Alt splits: ScaffoldSplit(/ter), TemporalSplit/StratisfiedSplitter, ManualSplit, BootstrapSplit,
## GMBTDataSplit: GBMTRandomSplit, ClusterSplit
# calculate compound features and split dataset into train and test
stdataset.prepareDataset(
    split=rand_split,
    feature_calculators=[MorganFP(radius=3, nBits=2048)],
)

print(f"Number of samples in train set: {len(stdataset.y)}")
print(f"Number of samples in test set: {len(stdataset.y_ind)}")

stdataset.save()

In [None]:
from qsprpred.models import SklearnModel
from sklearn.neighbors import KNeighborsRegressor
from qsprpred.models import CrossValAssessor, TestSetAssessor

os.makedirs("prep_storage/models", exist_ok=True)

# This is an SKlearn model, so we will initialize it with the SklearnModel class
stmodel = SklearnModel(
    base_dir="prep_storage/models",
    alg=KNeighborsRegressor,
    name="SingleTaskModel"
)

# We can now assess the model performance on the training set using cross validation
CrossValAssessor("r2")(stmodel, stdataset)

# and on the test set
TestSetAssessor("r2")(stmodel, stdataset)

# Finally, we need to fit the model on the complete dataset if we want to use it further
# This will save the fitted model and metadata to disk
stmodel.fitDataset(stdataset)

# We can optionally save the model and metadata to disk explicitly as well
_ = stmodel.save()

In [None]:
from qsprpred.plotting.regression import CorrelationPlot

plot = CorrelationPlot([stmodel])
axes, summary = plot.make(save=True, show=True)

In [None]:
summary