# EPSG コードと JPX コードのデータをマージ

それぞれは `epsg.ipynb`, `jpx.ipynb` で用意した。


In [1]:
import json
import random

## データの読み込み


In [2]:
with open("./output/epsg.json") as f:
    epsg = json.load(f)
len(epsg)

6934

In [3]:
with open("./output/jpx.json") as f:
    jpx = json.load(f)
len(jpx)

4360

## 統合


In [4]:
all_codes = {}

for code in sorted(set(epsg.keys()) | set(jpx.keys())):
    all_codes[code] = {
        "epsg": epsg[code]["name"] if code in epsg else None,
        "jpx": jpx[code]["name"] if code in jpx else None,
    }

len(all_codes), all_codes["4326"]

(8672, {'epsg': 'WGS 84', 'jpx': 'インテージホールディングス'})

「EPSG・JPX どちらにも存在」「EPSG のみ」「JPX のみ」の 3 パターンでまとめる:


In [5]:
all_codes_grouped = {"both": [], "epsg": [], "jpx": []}

for code, data in all_codes.items():
    if data["epsg"] and data["jpx"]:
        group = "both"
    elif data["epsg"]:
        group = "epsg"
    elif data["jpx"]:
        group = "jpx"

    all_codes_grouped[group].append(
        {
            "code": code,
            **data,
        }
    )

for k, v in all_codes_grouped.items():
    print(k, len(v))

both 2622
epsg 4312
jpx 1738


In [6]:
random.choice(all_codes_grouped["both"])

{'code': '6616',
 'epsg': 'NAD83(2011) / Wyoming West (ftUS)',
 'jpx': 'トレックス・セミコンダクター'}

In [7]:
random.choice(all_codes_grouped["epsg"])

{'code': '3260', 'epsg': 'WGS 84 / SCAR IMW ST01-04', 'jpx': None}

In [8]:
random.choice(all_codes_grouped["jpx"])

{'code': '6898', 'epsg': None, 'jpx': 'トミタ電機'}

## 4 桁の数字のみ

ゲーム性を考慮し、候補を絞る（例えば 5 桁が出てきたら、これは証券コードではないとすぐわかってしまうため）。

- 5 桁の EPSG コード
- 5 桁の証券コード
- 4 桁の証券コードで、数字のみではないもの

が除外される。


In [9]:
from collections import Counter

print(Counter([len(c["code"]) for c in all_codes_grouped["epsg"]]))
print(Counter([len(c["code"]) for c in all_codes_grouped["jpx"]]))

Counter({4: 2497, 5: 1815})
Counter({4: 1736, 5: 2})


In [10]:
[c for c in all_codes_grouped["jpx"] if not c["code"].isdigit()][:5]

[{'code': '130A', 'epsg': None, 'jpx': 'Veritas In Silico'},
 {'code': '131A', 'epsg': None, 'jpx': 'CCNグループ'},
 {'code': '132A', 'epsg': None, 'jpx': 'アイエヌホールディングス'},
 {'code': '133A', 'epsg': None, 'jpx': 'グローバルX 超短期米国債 ETF'},
 {'code': '134A', 'epsg': None, 'jpx': 'アプライズ'}]

In [11]:
all_codes_grouped_4_digits = {}

for g in ["both", "epsg", "jpx"]:
    all_codes_grouped_4_digits[g] = [
        c for c in all_codes_grouped[g] if c["code"].isdigit() and len(c["code"]) == 4
    ]


for k, v in all_codes_grouped_4_digits.items():
    print(k, len(v))

both 2622
epsg 2497
jpx 1683


## 出力


In [12]:
with open("./output/all.json", "w") as f:
    json.dump(all_codes_grouped, f, ensure_ascii=False, indent=4)

In [13]:
with open("./output/all_4digits.json", "w") as f:
    json.dump(all_codes_grouped_4_digits, f, ensure_ascii=False, indent=4)

In [14]:
!ls -lh ./output/*.json

-rw-r--r--  1 sorami  staff   1.1M May 22 09:43 ./output/all.json
-rw-r--r--  1 sorami  staff   871K May 22 09:43 ./output/all_4digits.json
-rw-r--r--  1 sorami  staff   2.1M May 21 21:01 ./output/epsg.json
-rw-r--r--  1 sorami  staff   644K May 21 13:43 ./output/jpx.json


In [15]:
!head ./output/all.json

{
    "both": [
        {
            "code": "2001",
            "epsg": "Antigua 1943 / British West Indies Grid",
            "jpx": "ニップン"
        },
        {
            "code": "2002",
            "epsg": "Dominica 1945 / British West Indies Grid",


In [16]:
!head ./output/all_4digits.json

{
    "both": [
        {
            "code": "2001",
            "epsg": "Antigua 1943 / British West Indies Grid",
            "jpx": "ニップン"
        },
        {
            "code": "2002",
            "epsg": "Dominica 1945 / British West Indies Grid",
