In [1]:
import os
import sys

PACKAGE_DIR = "/kaggle/src"
sys.path.append(PACKAGE_DIR)
sys.path.append(os.path.join(PACKAGE_DIR, "Penguin-ML-Library"))

In [2]:
import whoosh_utils

Processing /kaggle/input/whoosh-wheel-2-7-4/Whoosh-2.7.4-py2.py3-none-any.whl
Whoosh is already installed with the same version as the provided wheel. Use --force-reinstall to force an installation of the wheel.


[0m

In [3]:
import gc
import warnings

import numpy as np
import polars as pl
import yaml
from tqdm import tqdm

from utils import compute_ap, evaluate, load_list_bz2

In [4]:
train_idx = whoosh_utils.load_index("/kaggle/input/train-index-2500/index_2500_200k")
searcher = whoosh_utils.get_searcher(train_idx)
qp = whoosh_utils.get_query_parser()
# train_idx.doc_count()

In [5]:
query = 'cpc:G16B40/10 ti:techniques'
to_search = qp.parse(query)
results = searcher.search(to_search, limit=50)
print(len(results))
print(results[0]["id"])
results[0].score

1
US-10825672-B2


17.638431144575904

In [32]:
query = '(cpc:G16B40/10 ti:techniques) AND (ti:techniques)'
to_search = qp.parse(query)
results = searcher.search(to_search, limit=50)
print(results[0]["id"])
results[0].score

US-10825672-B2


17.638431144575904

In [37]:
query = '(cpc:G16B40/10 ti:techniques) OR (ti:techniques)'
to_search = qp.parse(query)
results = searcher.search(to_search, limit=50)
print(results[0]["id"])
results[0].score

US-10825672-B2


24.9611714015678

In [40]:
query = '(cpc:G16B40/10 ti:techniques) OR (ti:techniques) OR (cpc:G16B40/10)'
to_search = qp.parse(query)
results = searcher.search(to_search, limit=50)
print(results[0]["id"])
results[0].score

US-10825672-B2


35.27686228915181

In [41]:
query = '(cpc:G16B40/10 ti:techniques) OR (cpc:G16B40/10 ti:techniques)'
to_search = qp.parse(query)
results = searcher.search(to_search, limit=50)
print(results[0]["id"])
results[0].score

US-10825672-B2


17.638431144575904

In [47]:
query = 'cpc:A23L33/135 (detd:suflate)'
to_search = qp.parse(query)
results = searcher.search(to_search, limit=50)
print(results[0]["id"])
results[0].score

US-2012171163-A1


30.69552963691612

## AND

In [50]:
query = 'cpc:G16B40/10 ti:techniques'
to_search = qp.parse(query)
results = searcher.search(to_search, limit=50)
print(len(results))
print(results[0]["id"])
results[0].score

1
US-10825672-B2


17.638431144575904

In [58]:
query = 'cpc:G16B40/10'
to_search = qp.parse(query)
results = searcher.search(to_search, limit=50)
print(len(results))
print(results[0]["id"])
results[0].score

17
US-2022181132-A1


10.315690887584008

In [59]:
query = 'ti:techniques'
to_search = qp.parse(query)
results = searcher.search(to_search, limit=50, reverse=True)
print(len(results))
print(results[0]["id"])
results[0].score

358
US-2021068014-A1


7.322740256991895

## OR

In [80]:
query = 'cpc:G16B40/10 ti:techniques'
to_search = qp.parse(query)
results = searcher.search(to_search, limit=50)
print(len(results))
print(results[0]["id"])
results[0].score

1
US-10825672-B2


17.638431144575904

In [81]:
query = 'cpc:G16B40/10 ti:defect'
to_search = qp.parse(query)
results = searcher.search(to_search, limit=50)
print(len(results))
print(results[0]["id"])
results[0].score

1
US-10825672-B2


18.9470425545608

In [82]:
query = '(cpc:G16B40/10 ti:defect) OR (cpc:G16B40/10 ti:techniques)'
to_search = qp.parse(query)
results = searcher.search(to_search, limit=50)
print(len(results))
print(results[0]["id"])
results[0].score

1
US-10825672-B2


36.5854736991367

## Boost

In [88]:
query = '(cpc:G16B40/10 ti:techniques) OR (cpc:G16B40/10 ti:techniques)'
to_search = qp.parse(query)
results = searcher.search(to_search, limit=50)
print(len(results))
print(results[0]["id"])
results[0].score

1
US-10825672-B2


17.638431144575904

In [89]:
query = '(cpc:G16B40/10 ti:techniques) OR (ti:techniques cpc:G16B40/10)'
to_search = qp.parse(query)
results = searcher.search(to_search, limit=50)
print(len(results))
print(results[0]["id"])
results[0].score

1
US-10825672-B2


35.27686228915181

In [92]:
query = '(cpc:G16B40/10 ti:techniques) AND (ti:techniques cpc:G16B40/10)'
to_search = qp.parse(query)
results = searcher.search(to_search, limit=50)
print(len(results))
print(results[0]["id"])
results[0].score

1
US-10825672-B2


17.638431144575904

In [90]:
query = '(cpc:G16B40/10 ti:techniques) OR (cpc:"G16B40/10"ti:"techniques")'
to_search = qp.parse(query)
results = searcher.search(to_search, limit=50)
print(len(results))
print(results[0]["id"])
results[0].score

1
US-10825672-B2


17.638431144575904

In [91]:
query = '(cpc:G16B40/10 ti:techniques) OR (cpc:(G16B40/10) ti:(techniques))'
to_search = qp.parse(query)
results = searcher.search(to_search, limit=50)
print(len(results))
print(results[0]["id"])
results[0].score

1
US-10825672-B2


17.638431144575904

In [6]:
query = 'cpc:G16B40/10'
to_search = qp.parse(query)
results = searcher.search(to_search, limit=50)
print(len(results))
print(results[0]["id"])
results[0].score

17
US-2022181132-A1


10.315690887584008

In [11]:
query = 'cpc:G16B40/10 G16B40/1? G16B40/1*  G16B40/1*'
to_search = qp.parse(query)
results = searcher.search(to_search, limit=50)
print(len(results))
print(results[0]["id"])
results[0].score

17
US-2022181132-A1


30.947072662752024

In [1]:
query = 'cpc:G16B40/10 G16B40/1? G16B40/1* G16B40/?0 G16B40?10'
to_search = qp.parse(query)
results = searcher.search(to_search, limit=50)
print(len(results))
print(results[0]["id"])
results[0].score

NameError: name 'qp' is not defined