In [None]:
import os
import importlib
from pathlib import Path

import pandas as pd

from cuery import utils
from cuery.seo import keywords, seo, serps, traffic

GDRIVE = Path("~/Google Drive/Shared drives/Solutions").expanduser()
DATA_DIR = GDRIVE / "Research/tractor_v2/SEO/"

utils.set_env(apify_secrets=False)

In [None]:
fnm = "/Users/thomas/Library/CloudStorage/GoogleDrive-thomas@graphext.com/Shared drives/Solutions/Stellantis/coches_chinos_seo_data_with_aio_aspects_and_models_08_05.parquet"
df = pd.read_parquet(fnm)
df

# Check tools refactor

In [None]:
topics = {
    "cerraduras electronicas": [
        "cerraduras wifi",
        "cerraduras bluetooth",
        "cerraduras biométricas",
        "cerraduras con cámara",
        "cerraduras con alarma",
    ],
    "cerraduras inteligentes": [
        "cerraduras con control remoto",
        "cerraduras con sensor de movimiento",
        "cerraduras con integración de hogar inteligente",
        "cerraduras con notificaciones en tiempo real",
    ],
}

clf_df = await serps.topic_and_intent(
    df=df.iloc[:100],
    max_samples=100,
    topic_model="google/gemini-2.5-flash",
    assignment_model="openai/gpt-4.1-mini",
    text_column="keyword",
    topics_instructions="",
    min_ldist=2,
)
clf_df

In [None]:
importlib.reload(serps)

ent_df = await serps.extract_aio_entities(df=df.iloc[:100], id_column="keyword")
ent_df

# Keyword planner only

In [None]:
importlib.reload(utils)
importlib.reload(keywords)

kwd_cfg = {
    "keywords": ["análisis de datos"],
    "url": None,
    "whole_site": False,
    "ideas": True,
    "max_ideas": 20,
    "language": "es",
    "country": "es",
    "metrics_start": "2021-01",
    "metrics_end": "2025-06",
}

kwd_cfg = keywords.GoogleKwdConfig(**kwd_cfg)
kwds = keywords.keywords(kwd_cfg)
kwds

In [None]:
kwd_cfg.country

# Serps only

In [None]:
importlib.reload(serps)

serp_cfg = {
    "keywords": ["data analysis", "data science", "machine learning", "artificial intelligence"],
    "batch_size": 25,
    "resultsPerPage": 10,
    "maxPagesPerQuery": 1,
    # "country": "us",
    # "searchLanguage": "en",
    # "languageCode": "en",
    "top_n": 10,
    "brands": ["graphext"],
    "competitors": ["powerbi", "tableau", "qlik", "looker", "google data studio"],
    "topic_max_samples": 100,
}

serp_cfg = serps.SerpConfig(**serp_cfg)
srs = await serps.serps(serp_cfg, keywords=None)
srs

# Traffic only

In [None]:
importlib.reload(traffic)

trf_cfg = {"batch_size": 25, "apify_token": None}
trf_cfg = traffic.TrafficConfig(**trf_cfg)
trf_cfg

In [None]:
srs[["term", "domains"]]

In [None]:
trf = await traffic.keyword_traffic(srs.term, srs.domains, trf_cfg)
trf

In [None]:
trfinp = srs[["term", "domains"]].explode("domains").reset_index(drop=True)
trfinp

In [None]:
trf = await traffic.fetch_domain_traffic(tuple(trfinp.domains), trf_cfg)
trf

In [None]:
trf

# All in one

In [None]:
importlib.reload(keywords)
importlib.reload(serps)
importlib.reload(traffic)
importlib.reload(seo)

In [None]:
cfg = {
    "kwd_cfg": {
        "keywords": ["coches eléctricos", "coches híbridos", "coches eléctricos españa"],
        "url": None,
        "whole_site": False,
        "ideas": True,
        "max_ideas": 10_000,
        "language": "es",
        "country": "es",
        "metrics_start": "2021-01",
        "metrics_end": "2025-07",
    },
    "serp_cfg": {
        "batch_size": 100,
        "resultsPerPage": 20,
        "maxPagesPerQuery": 1,
        "countryCode": "es",
        "searchLanguage": "es",
        "languageCode": "es",
        "top_n": 10,
        "brands": [
            "peugeot",
            "citroën",
            "fiat",
            "alfa romeo",
            "opel",
            "stellantis",
            "vauxhall",
            "lancia",
            "jeep",
            "chrysler",
            "leapmotor",
        ],
        "competitors": [
            "tesla",
            "byd",
            "mg",
            "volkswagen",
            "renault",
            "hyundai",
            "bmw",
            "volvo",
            "geely",
            "zeekr",
            "nissan",
        ],
    },
    "traffic_cfg": {
        "batch_size": 25,
    },
}

seo_cfg = seo.SeoConfig(**cfg)
seo_cfg

In [None]:
result = await seo.seo_data(seo_cfg)
result

In [None]:
from datetime import datetime

now = datetime.now().time().strftime("%H_%M_%S")
fnm = f"/Users/thomas/Library/CloudStorage/GoogleDrive-thomas@graphext.com/Shared drives/Solutions/Stellantis/electric_vehicles_{now}.parquet"
result.to_parquet(fnm, index=False)

In [None]:
result = pd.read_parquet("...")
result