In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import time

# Init

In [None]:
import fp_utils

In [None]:
fp_utils.settings.init_fp_utils()

# Data io

In [None]:
data_path = Path('../data')

In [None]:
from fp_utils.data_io import DataFrameLoader

In [None]:
df = DataFrameLoader.pickle(data_path / '1e5.pickle')
df.shape

In [None]:
cols2 = np.load('../data/cols.npy')

In [None]:
df2 = DataFrameLoader.pickle('../data/1e5.pickle', columns=cols2)

# Speed testing

In [None]:
from fp_utils.finders import BFDriveFinder, BFRamFinder, BFPDriveFinder, BFPRamFinder
from fp_utils.tests import FinderSpeedTester
from fp_utils.packing_mixins import PickleMixin

In [None]:
class BFD(BFDriveFinder, PickleMixin):
    pass

class BFPD(BFPDriveFinder, PickleMixin):
    pass

In [None]:
bf_drive_finder = BFD(df, data_path)

In [None]:
bf_ram_finder = BFRamFinder(df)

In [None]:
bfp_drive_finder = BFPD(df, data_path)

In [None]:
bfp_ram_finder = BFPRamFinder(df)

In [None]:
finders = [bf_drive_finder, bfp_drive_finder, bf_ram_finder, bfp_ram_finder]

In [None]:
speed_tester = FinderSpeedTester(finders)

In [None]:
fp_utils.settings.set_progress_bar(False)
speed_tester.test_random(df, 2, verbose=True) 

In [None]:
stat = speed_tester.test_random(df, 2, ans_count=10, verbose=True)

In [None]:
stat.as_plot(figsize=(8, 5))

In [None]:
stat.as_boxplot(figsize=(8, 5))

# Corectness testing

In [None]:
from fp_utils.tests import FinderCorrectnessTester

In [None]:
correctness_tester = FinderCorrectnessTester(finders[0], finders[1:])

In [None]:
correctness_tester.test_random(df, 2)

In [None]:
correctness_tester.test_all(df.sample(2), verbose=True)

In [None]:
wa_finder = BFRamFinder(df.sample(int(len(df) * 0.5)))

In [None]:
correctness_tester = FinderCorrectnessTester(bf_drive_finder, [bfp_drive_finder, wa_finder])

In [None]:
correctness_tester.test_random(df, 2, verbose=True)

# Draw

In [None]:
from fp_utils.draw import MoleculeDrawer

In [None]:
fingerprint = pd.read_pickle("../data/f.pickle")
fingerprint.name = 'NCCc1ccc4c(c1)OCCO4'
fingerprint = fingerprint[df.columns]

In [None]:
MoleculeDrawer.draw_one(fingerprint.name)

In [None]:
answers = list(bf_ram_finder.find(fingerprint, ans_count=10))