Skip to content

Commit

Permalink
Merge pull request #52 from life4/add-ids
Browse files Browse the repository at this point in the history
Add post IDs
  • Loading branch information
orsinium committed Aug 2, 2023
2 parents 0134286 + a91f5c0 commit 14939c6
Show file tree
Hide file tree
Showing 32 changed files with 140 additions and 18 deletions.
18 changes: 0 additions & 18 deletions assign_ids.py

This file was deleted.

1 change: 1 addition & 0 deletions posts/add-note.md
@@ -1,5 +1,6 @@
---
author: orsinium
id: 704
published: 2022-11-01
traces:
- [type: BaseException, method: add_note]
Expand Down
1 change: 1 addition & 0 deletions posts/assert-type.md
@@ -1,5 +1,6 @@
---
author: orsinium
id: 709
published: 2022-11-29
traces:
- [module: typing, function: assert_type]
Expand Down
1 change: 1 addition & 0 deletions posts/asyncio-create-task.md
@@ -1,5 +1,6 @@
---
published: 2023-04-18
id: 724
author: orsinium
traces:
- [module: asyncio, function: create_task]
Expand Down
1 change: 1 addition & 0 deletions posts/asyncio-gather.md
@@ -1,5 +1,6 @@
---
published: 2023-04-11
id: 723
author: orsinium
traces:
- [module: asyncio, function: gather]
Expand Down
1 change: 1 addition & 0 deletions posts/asyncio-run.md
@@ -1,5 +1,6 @@
---
published: 2023-03-28
id: 721
author: orsinium
traces:
- [module: asyncio, function: run]
Expand Down
1 change: 1 addition & 0 deletions posts/asyncio-sleep.md
@@ -1,5 +1,6 @@
---
published: 2023-04-04
id: 722
author: orsinium
traces:
- [module: asyncio, function: sleep]
Expand Down
1 change: 1 addition & 0 deletions posts/build-system.md
@@ -1,5 +1,6 @@
---
published: 2023-02-28
id: 717
author: orsinium
topics:
- packaging
Expand Down
1 change: 1 addition & 0 deletions posts/chain-is.md
@@ -1,5 +1,6 @@
---
published: 2022-09-27
id: 699
author: orsinium
traces:
- [keyword: is]
Expand Down
1 change: 1 addition & 0 deletions posts/contextlib-chdir.md
@@ -1,5 +1,6 @@
---
author: orsinium
id: 708
published: 2022-11-22
traces:
- [module: contextlib, function: chdir]
Expand Down
1 change: 1 addition & 0 deletions posts/dataclass-transform.md
@@ -1,5 +1,6 @@
---
author: orsinium
id: 710
published: 2022-12-06
pep: 681
python: "3.11"
Expand Down
1 change: 1 addition & 0 deletions posts/datetime-fold.md
@@ -1,5 +1,6 @@
---
author: orsinium
id: 727
published: 2023-07-25
pep: 495
python: "3.6"
Expand Down
1 change: 1 addition & 0 deletions posts/except-star.md
@@ -1,5 +1,6 @@
---
author: orsinium
id: 706
published: 2022-11-15
traces:
- [{exception: ExceptionGroup}]
Expand Down
1 change: 1 addition & 0 deletions posts/exception-group.md
@@ -1,5 +1,6 @@
---
author: orsinium
id: 705
published: 2022-11-08
traces:
- [{exception: ExceptionGroup}]
Expand Down
1 change: 1 addition & 0 deletions posts/generator-scope.md
@@ -1,5 +1,6 @@
---
author: ypankovych
id: 700
published: 2022-10-11
---

Expand Down
1 change: 1 addition & 0 deletions posts/isinstance.md
@@ -1,5 +1,6 @@
---
author: orsinium
id: 714
published: 2023-01-03
traces:
- [{function: isinstance}]
Expand Down
1 change: 1 addition & 0 deletions posts/literal-string.md
@@ -1,5 +1,6 @@
---
author: orsinium
id: 713
published: 2022-12-27
pep: 675
python: "3.11"
Expand Down
1 change: 1 addition & 0 deletions posts/math-fsum.md
@@ -1,5 +1,6 @@
---
published: 2023-03-14
id: 719
author: orsinium
traces:
- [module: math, function: fsum]
Expand Down
1 change: 1 addition & 0 deletions posts/os-curdir.md
@@ -1,5 +1,6 @@
---
author: orsinium
id: 701
published: 2022-10-18
traces:
- [module: os, function: curdir]
Expand Down
1 change: 1 addition & 0 deletions posts/pyproject-toml.md
@@ -1,5 +1,6 @@
---
published: 2023-02-21
id: 716
author: orsinium
topics:
- packaging
Expand Down
1 change: 1 addition & 0 deletions posts/python-3-11.md
@@ -1,5 +1,6 @@
---
author: orsinium
id: 702
published: 2022-10-24
topics:
- news
Expand Down
1 change: 1 addition & 0 deletions posts/reveal-type.md
@@ -1,5 +1,6 @@
---
author: orsinium
id: 712
published: 2022-12-20
topics:
- typing
Expand Down
1 change: 1 addition & 0 deletions posts/string-template.md
@@ -1,5 +1,6 @@
---
published: 2022-09-20
id: 698
author: orsinium
traces:
- [module: string, type: Template]
Expand Down
1 change: 1 addition & 0 deletions posts/tomllib.md
@@ -1,5 +1,6 @@
---
published: 2023-03-07
id: 718
author: orsinium
traces:
- [module: tomllib]
Expand Down
1 change: 1 addition & 0 deletions posts/traceback-col.md
@@ -1,5 +1,6 @@
---
author: orsinium
id: 703
published: 2022-10-25
pep: 657
python: "3.11"
Expand Down
1 change: 1 addition & 0 deletions posts/typing-self.md
@@ -1,5 +1,6 @@
---
author: orsinium
id: 711
published: 2022-12-03
traces:
- [module: typing, type: Self]
Expand Down
1 change: 1 addition & 0 deletions posts/unittest-mock-seal.md
@@ -1,5 +1,6 @@
---
author: orsinium
id: 728
published: 2023-08-01
traces:
- [module: unittest.mock, function: seal]
Expand Down
1 change: 1 addition & 0 deletions posts/warnings.md
@@ -1,5 +1,6 @@
---
published: 2022-09-13
id: 697
author: orsinium
traces:
- [module: warnings]
Expand Down
1 change: 1 addition & 0 deletions posts/wheel.md
@@ -1,5 +1,6 @@
---
published: 2023-02-14
id: 715
author: orsinium
topics:
- packaging
Expand Down
1 change: 1 addition & 0 deletions posts/zoneinfo.md
@@ -1,5 +1,6 @@
---
author: orsinium
id: 726
published: 2023-07-18
pep: 615
python: "3.6"
Expand Down
2 changes: 2 additions & 0 deletions sdk/commands/__init__.py
@@ -1,3 +1,4 @@
from ._add_ids import AddIDsCommand
from ._check_all import CheckAllCommand
from ._command import Command
from ._html import HTMLCommand
Expand All @@ -10,6 +11,7 @@
__all__ = ['Command', 'COMMANDS']

COMMANDS = (
AddIDsCommand,
HTMLCommand,
RunCodeCommand,
ScheduleCommand,
Expand Down
109 changes: 109 additions & 0 deletions sdk/commands/_add_ids.py
@@ -0,0 +1,109 @@

from __future__ import annotations

import asyncio
import os
import re
from functools import cached_property
from pathlib import Path

from telethon import TelegramClient

from ..post import Post
from ._command import Command


CHANNEL = 'pythonetc'
REX_URL = re.compile(r'https?\:[a-zA-Z0-9\-\/\.\(]+')
REX_PYTHON = re.compile(r'3\.[0-9]{1,2}')
REX_QNAME = re.compile(r'[a-z]{2,}\.[a-zA-Z0-9]{2,}')
URLs = tuple[str, ...]


class AddIDsCommand(Command):
"""Add Telegram post IDs to all posts that don't have it.
"""
name = 'add-ids'

def run(self) -> int:
if 'API_ID' not in os.environ:
self.warn(
'API_ID and API_HASH env vars required, '
'you can get them at https://my.telegram.org/apps',
)
return 1
asyncio.run(self._run())
return 0

async def _run(self) -> None:
self.print('reading posts...')
paths = self._get_paths()
self.print('fetching IDs...')
async with self._client:
ids = await self._get_ids()
self.print('setting IDs...')
for keyword, id in ids.items():
path = paths.get(keyword)
if path is None:
continue
content = path.read_text(encoding='utf-8')
lines = content.splitlines()
lines.insert(2, f'id: {id}')
new_content = '\n'.join(lines)
new_content = new_content.rstrip() + '\n'
assert new_content != content
path.write_text(new_content, encoding='utf-8')
self.print(f'added ID for {path.name}')

def _get_paths(self) -> dict[URLs, Path]:
paths: dict[URLs, Path] = {}
for path in sorted(Path('posts').iterdir()):
if path.suffix != '.md':
continue
post = Post.from_path(path)
if post.id is not None:
continue
keywords = self._get_keywords(post.md_content)
if not keywords:
continue
if keywords in paths:
name1 = path.name
name2 = paths[keywords].name
msg = f'duplicate set of keywords: {name1} and {name2}'
raise RuntimeError(msg)
paths[keywords] = path
return paths

async def _get_ids(self) -> dict[URLs, int]:
ids = {}
async for message in self._client.iter_messages(CHANNEL):
if message.text is None:
continue
keywords = self._get_keywords(message.text)
if not keywords:
continue
if keywords in ids:
continue
ids[keywords] = message.id
return ids

def _get_keywords(self, text: str) -> URLs:
"""
Get some key components from the text (URLs, qualnames, Python versions)
that can be used to uniquely identify a text.
It allows us to match the same text ignoring changes in formatting
or corrected typos.
"""
result = REX_URL.findall(text)
result.extend(REX_PYTHON.findall(text))
result.extend(REX_QNAME.findall(text))
return tuple(result)

@cached_property
def _client(self) -> TelegramClient:
return TelegramClient(
'bot',
api_id=os.environ['API_ID'],
api_hash=os.environ['API_HASH'],
)

0 comments on commit 14939c6

Please sign in to comment.