# Querying Google Patents

Develop a sensible query to fetch relevant Google Patents queries (without incurring too much cost)

In [None]:
from discovery_child_development.utils import google_utils, keywords as kw
from discovery_child_development import PROJECT_DIR

KEYWORD_FILE = PROJECT_DIR / "discovery_child_development/config/patents/keywords.txt"
PATENT_PATH = "data/patents/"

In [None]:
keywords = kw.get_keywords(KEYWORD_FILE)
keywords = kw.deduplicate_keywords(
    keywords
    + kw.replace_word(keywords, "child", "infant")
    + kw.replace_word(keywords, "child", "baby")
)

In [None]:
query_keywords_path = KEYWORD_FILE.parent / "keywords_query.txt"
kw.save_keywords(keywords, query_keywords_path)

In [None]:
client = google_utils.create_client()

In [None]:
query = google_utils.create_patents_query(keywords)
print(query)

In [None]:
google_utils.dry_run(client, query)

In [None]:
query_df = client.query(query).to_dataframe()
len(query_df)

In [None]:
google_utils.upload_query_to_s3(
    query_name="GooglePatents",
    path=PATENT_PATH,
    query_df=query_df,
    query=query,
    metadata=[KEYWORD_FILE, query_keywords_path],
)