# Export all data

Delete documents that have not been updated. 

In [101]:
target_file_name = 'doc-20221107-2.json'

In [102]:
!algolia objects browse aws-doc-jp > $target_file_name

In [103]:
!wc -l $target_file_name

   39639 doc-20221107-2.json


In [104]:
import jsonlines
from datetime import datetime

all_doc = []
with jsonlines.open(target_file_name) as reader:
    for doc in reader:
        dt = datetime.strptime(doc["last_modified"],"%Y-%m-%dT%H:%M:%S")
        all_doc.append({
            "objectID": doc["objectID"],
            "last_modified": dt,
            "title": doc["title"]
        })
        

In [105]:
len(all_doc)

39639

In [106]:
all_doc[0:10]

[{'objectID': 'https://docs.aws.amazon.com/ja_jp/lake-formation/latest/dg/implicit-permissions.html',
  'last_modified': datetime.datetime(2022, 11, 4, 11, 46, 36),
  'title': '黙示的な Lake Formation 許可 - AWS Lake Formation'},
 {'objectID': 'https://docs.aws.amazon.com/ja_jp/mediaconvert/latest/ug/video-padding.html',
  'last_modified': datetime.datetime(2022, 11, 4, 11, 46, 41),
  'title': '黒いビデオフレームによるビデオのパディング - MediaConvert'},
 {'objectID': 'https://docs.aws.amazon.com/ja_jp/neptune/latest/userguide/manage-console-fast-reset.html',
  'last_modified': datetime.datetime(2022, 11, 4, 11, 49, 42),
  'title': '高速リセット API を使用して Amazon Neptune DB クラスターを空にする - Amazon Neptune'},
 {'objectID': 'https://docs.aws.amazon.com/ja_jp/lumberyard/latest/userguide/gems-system-gem-fast-noise.html',
  'last_modified': datetime.datetime(2022, 11, 4, 12, 0, 31),
  'title': '高速ノイズ Gem - Lumberyard ユーザーガイド'},
 {'objectID': 'https://docs.aws.amazon.com/ja_jp/AmazonRDS/latest/AuroraUserGuide/AuroraMySQL.Managin

In [107]:
import pandas as pd

all_doc_df = pd.DataFrame(all_doc)

In [108]:
all_doc_df.sort_values("last_modified")

Unnamed: 0,objectID,last_modified,title
28735,https://docs.aws.amazon.com/ja_jp/migrationhub...,2022-09-02 16:48:46,AWS へのサインアップ - Migration Hub 戦略推奨事項
23730,https://docs.aws.amazon.com/ja_jp/migrationhub...,2022-09-02 16:48:46,IAM ユーザーの作成 - Migration Hub 戦略推奨事項
11322,https://docs.aws.amazon.com/ja_jp/migrationhub...,2022-09-02 16:48:46,ステップ 2: 戦略レコメンデーションコレクターを展開する - Migration Hub ...
2650,https://docs.aws.amazon.com/ja_jp/migrationhub...,2022-09-02 16:48:47,戦略推奨事項での戦略推奨事項の表示 - Migration Hub 戦略推奨事項
2652,https://docs.aws.amazon.com/ja_jp/migrationhub...,2022-09-02 16:48:47,戦略レコメンデーション設定 - Migration Hub 戦略推奨事項
...,...,...,...
16101,https://docs.aws.amazon.com/ja_jp/AmazonS3/lat...,2022-11-05 02:08:05,の Amazon S3 リクエスト ID の取得AWS Support - Amazon S...
1558,https://docs.aws.amazon.com/ja_jp/AmazonS3/lat...,2022-11-05 02:08:05,症状別 Amazon S3 のトラブルシューティング - Amazon Simple Sto...
17395,https://docs.aws.amazon.com/ja_jp/AmazonS3/lat...,2022-11-05 02:08:06,Troubleshooting - Amazon Simple Storage Service
28479,https://docs.aws.amazon.com/ja_jp/AmazonS3/lat...,2022-11-05 02:08:28,AWS 用語集 - Amazon Simple Storage Service


# Search delete target documents

In [117]:
before_202211 = all_doc_df.query("last_modified <= '2022-11-01 00:00:00'")
before_202211

Unnamed: 0,objectID,last_modified,title
4,https://docs.aws.amazon.com/ja_jp/AmazonRDS/la...,2022-10-28 02:50:49,高速 DDL を使用して Amazon Aurora でテーブルを変更する - Amazon...
21,https://docs.aws.amazon.com/ja_jp/prescriptive...,2022-10-05 07:23:53,高可用性 - AWS の規範的ガイダンス
22,https://docs.aws.amazon.com/ja_jp/whitepapers/...,2022-10-19 17:05:12,高可用性 - AWS でのマイクロサービスの実装
32,https://docs.aws.amazon.com/ja_jp/directconnec...,2022-10-31 23:11:11,高い回復性 - AWS Direct Connect
43,https://docs.aws.amazon.com/ja_jp/prescriptive...,2022-10-31 23:12:47,顧客が望むものと、規制当局が必要とするもの - AWS の規範的ガイダンス
...,...,...,...
39618,https://docs.aws.amazon.com/ja_jp/datapipeline...,2022-10-19 17:05:49,(オプション) Task Runner に対する Amazon RDS へのアクセス権限の付...
39622,https://docs.aws.amazon.com/ja_jp/AWSEC2/lates...,2022-10-28 03:05:20,(オプション) Linux 用 EC2Rescue の署名を検証する - Amazon El...
39627,https://docs.aws.amazon.com/ja_jp/systems-mana...,2022-10-30 23:58:53,(オプション) AWS CLI 用の Session Manager プラグインをインストー...
39629,https://docs.aws.amazon.com/ja_jp/prometheus/l...,2022-10-31 23:11:12,(オプション) Amazon SNS に JSON を出力するようにアラートマネージャーを設...


In [118]:
delete_target_list = before_202211.values.tolist()

# Delete documents

- https://www.algolia.com/doc/api-client/getting-started/install/python/?client=python
- https://www.algolia.com/doc/api-reference/api-methods/delete-objects/?client=python


In [119]:
from algoliasearch.search_client import SearchClient

client = SearchClient.create('<api-id>', '<api-key>')
index = client.init_index('<index-name>')

# index.search('将軍')

In [123]:
delete_object_ids = [doc[0] for doc in delete_target_list]

res= index.delete_objects(delete_object_ids)