Skip to content

Commit

Permalink
fix followers
Browse files Browse the repository at this point in the history
  • Loading branch information
honzajavorek committed Feb 5, 2024
1 parent 2671f30 commit 92e4126
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 16 deletions.
4 changes: 4 additions & 0 deletions juniorguru/data/followers.jsonl
Expand Up @@ -51,4 +51,8 @@
{"_month": "2024-02", "count": 674, "name": "linkedin"}
{"_month": "2024-02", "count": 166, "name": "mastodon"}
{"_month": "2024-02", "count": 458, "name": "newsletter"}
{"_month": "2024-02", "count": 675, "name": "linkedin"}
{"_month": "2024-02", "count": 3212, "name": "linkedin_personal"}
{"_month": "2024-02", "count": 166, "name": "mastodon"}
{"_month": "2024-02", "count": 459, "name": "newsletter"}
{"_month": "2024-02", "count": 326, "name": "youtube"}
33 changes: 17 additions & 16 deletions juniorguru/sync/followers.py
@@ -1,7 +1,7 @@
import re
from datetime import date
from pathlib import Path
from urllib.parse import urlencode
from urllib.parse import urlencode, urlparse

import click
import requests
Expand All @@ -26,7 +26,9 @@

LINKEDIN_URL = "https://www.linkedin.com/company/juniorguru"

LINKEDIN_PERSONAL_URL = "https://www.linkedin.com/posts/honzajavorek_courting-haskell-honza-javorek-activity-6625070791035756544-J3Hr"
LINKEDIN_PERSONAL_SEARCH_URL = "https://duckduckgo.com/?hps=1&q=honza+javorek&ia=web"

LINKEDIN_PERSONAL_URL = "https://cz.linkedin.com/in/honzajavorek"


@cli.sync_command()
Expand Down Expand Up @@ -67,7 +69,7 @@ def main(history_path: Path, ecomail_api_key: str, ecomail_list_id: int):
scrapers = {
"youtube": scrape_youtube,
"linkedin": scrape_linkedin,
# "linkedin_personal": scrape_linkedin_personal, # they removed the follower count
"linkedin_personal": scrape_linkedin_personal,
"mastodon": scrape_mastodon,
}
for name, scrape in scrapers.items():
Expand Down Expand Up @@ -155,24 +157,23 @@ def scrape_linkedin_personal():
with sync_playwright() as playwright:
browser = playwright.firefox.launch()
page = browser.new_page()
page.goto(LINKEDIN_PERSONAL_URL, wait_until="networkidle")
page.goto(LINKEDIN_PERSONAL_SEARCH_URL, wait_until="networkidle")
url_parts = urlparse(LINKEDIN_PERSONAL_URL)
_, domain = url_parts.netloc.split(".", 1)
page.click(f"a[href*='{domain}{url_parts.path}']")
if "/authwall" in page.url:
logger.error(f"Loaded {page.url}")
return None
response_text = str(page.content())
browser.close()
html_tree = html.fromstring(response_text)
followers_element = html_tree.cssselect(
'[class*="public-post-author-card__followers"]'
)[0]
match = re.search(
r"([\d,]+)\s*(followers|sledujících)", followers_element.text_content()
)
try:
return int(match.group(1).replace(",", ""))
except (AttributeError, ValueError):
logger.error(f"Scraping failed!\n\n{response_text}")
return None

if match := re.search(
r'"name":\s*"Follows"\s*,\s*"userInteractionCount":\s*(\d+)', response_text
):
return int(match.group(1))

logger.error(f"Scraping failed!\n\n{response_text}")
return None


def scrape_mastodon():
Expand Down

0 comments on commit 92e4126

Please sign in to comment.