In [None]:
import importlib
from pathlib import Path

import pandas as pd

from cuery import utils
from cuery.cli import set_env_vars
from cuery.seo import keywords, seo, serps, traffic

GDRIVE = Path("~/Google Drive/Shared drives/Solutions").expanduser()
DATA_DIR = GDRIVE / "Research/tractor_v2/SEO/"

set_env_vars()

# Keyword planner only

In [None]:
importlib.reload(utils)
importlib.reload(keywords)

kwd_cfg = {
    "keywords": ["análisis de datos"],
    "url": None,
    "whole_site": False,
    "ideas": True,
    "max_ideas": 20,
    "language": "es",
    "country": "es",
    "metrics_start": "2021-01",
    "metrics_end": "2025-06",
}

kwd_cfg = keywords.GoogleKwdConfig(**kwd_cfg)
kwds = keywords.keywords(kwd_cfg)
kwds

In [None]:
kwds

# Serps only

In [None]:
importlib.reload(serps)

serp_cfg = {
    "keywords": ["data analysis", "data science", "machine learning", "artificial intelligence"],
    "batch_size": 25,
    "resultsPerPage": 10,
    "maxPagesPerQuery": 1,
    # "country": "us",
    # "searchLanguage": "en",
    # "languageCode": "en",
    "top_n": 10,
    "brands": ["graphext"],
    "competitors": ["powerbi", "tableau", "qlik", "looker", "google data studio"],
    "topic_max_samples": 100,
}

serp_cfg = serps.SerpConfig(**serp_cfg)
srs = await serps.serps(serp_cfg, keywords=None)
srs


In [None]:
result = kwds.merge(srs, how="left", left_on="keyword", right_on="term")
result

In [None]:
print(str(result.head()))

# Traffic only

In [None]:
importlib.reload(traffic)

trf_cfg = {"batch_size": 25, "apify_token": None}
trf_cfg = traffic.TrafficConfig(**trf_cfg)
trf_cfg

In [None]:
srs[["term", "domains"]]

In [None]:
trf = await traffic.keyword_traffic(srs.term, srs.domains, trf_cfg)
trf

In [None]:
trfinp = srs[["term", "domains"]].explode("domains").reset_index(drop=True)
trfinp

In [None]:
trf = await traffic.fetch_domain_traffic(tuple(trfinp.domains), trf_cfg)
trf

In [None]:
trf

# All in one

In [None]:
importlib.reload(keywords)
importlib.reload(serps)
importlib.reload(traffic)
importlib.reload(seo)

In [None]:
cfg = {
    "kwd_cfg": {
        "keywords": ["análisis de datos"],
        "url": None,
        "whole_site": False,
        "ideas": True,
        "max_ideas": 20,
        "language": "es",
        "country": "es",
        "metrics_start": "2021-01",
        "metrics_end": "2025-06",
    },
    "serp_cfg": {
        "batch_size": 10,
        "resultsPerPage": 10,
        "maxPagesPerQuery": 1,
        "country": "us",
        "searchLanguage": "en",
        "languageCode": "en",
        "top_n": 10,
        "brands": ["graphext"],
        "competitors": [
            "dataiku",
            "tableau",
            "powerbi",
            "power bi",
        ],
    },
    "traffic_cfg": {
        "batch_size": 25,
    },
}

seo_cfg = seo.SeoConfig(**cfg)
seo_cfg

In [None]:
result = await seo.seo_data(seo_cfg)
result

In [None]:
from datetime import datetime

now = datetime.now().time().strftime("%H_%M_%S")
fnm = f"/Users/thomas/Downloads/keywords_gx_{now}.parquet"
result.to_parquet(fnm, index=False)

In [None]:
result = pd.read_parquet("...")
result