# nlp100 chapter7

Try:http://www.cl.ecei.tohoku.ac.jp/nlp100

## 60. KVSの構築

In [1]:
import json
import redis
import codecs

r = redis.Redis(host='localhost', port=6379, db=0)
with open('data/artist.json') as reader:
    for line in reader:
        data = json.loads(line.rstrip())
        if "name" in data and "area" in data:
            r.set(data['name'], data['area'])

## 61. KVSの検索

In [2]:
print(r.get('Thomas Selditz').decode())
print(r.get('George Gao').decode())

Germany
China


## 62. KVS内の反復処理

In [3]:
count = 0
for k in r.keys():
    if r.get(k) == 'Japan'.encode():
        count += 1
print('The number of Japanese:', count)

The number of Japanese: 22128


## 63. オブジェクトを値に格納したKVS

In [4]:
r2 = redis.Redis(host='localhost', port=6379, db=0)
with open('data/artist.json') as reader:
    for line in reader:
        data = json.loads(line.rstrip())
        if 'tags' in data:
            tags = json.dumps(data['tags'])
            r2.set(data['name'].encode(), tags.encode())

In [5]:
r2.get('Oasis'.encode()).decode()

'[{"count": 1, "value": "rock"}, {"count": 3, "value": "britpop"}, {"count": 4, "value": "british"}, {"count": 1, "value": "uk"}, {"count": 1, "value": "britannique"}, {"count": 1, "value": "rock and indie"}, {"count": 1, "value": "england"}, {"count": 1, "value": "manchester"}]'

## 64. MongoDBの構築

In [6]:
import pymongo

with open('data/artist.json') as reader:
    client = pymongo.MongoClient()
    db = client.musics
    print('db:', db.name)
    collection = db.artist
    
    for line in reader:
        data = json.loads(line)
        collection.insert_one(data)

db: musics


In [7]:
collection.create_index([('name', pymongo.ASCENDING)])  
collection.create_index([('aliases.name', pymongo.ASCENDING)])  
collection.create_index([('tags.value', pymongo.ASCENDING)])
collection.create_index([('rating.value', pymongo.ASCENDING)])

'rating.value_1'

## 65. MongoDBの検索

In [8]:
for name in collection.find({'name': 'Queen'}):
    print(name)

{'aliases': [{'sort_name': 'Queen', 'name': 'Queen'}], 'gid': '420ca290-76c5-41af-999e-564d7c71f1a7', 'ended': True, 'id': 701492, 'gender': 'Female', 'type': 'Character', 'sort_name': 'Queen', '_id': ObjectId('58d1d74425c5cf3a7164608a'), 'tags': [{'count': 1, 'value': 'kamen rider w'}, {'count': 1, 'value': 'related-akb48'}], 'area': 'Japan', 'name': 'Queen'}
{'aliases': [{'sort_name': '女王', 'name': '女王'}], 'rating': {'count': 24, 'value': 92}, 'ended': True, 'id': 192, 'type': 'Group', 'tags': [{'count': 2, 'value': 'hard rock'}, {'count': 1, 'value': '70s'}, {'count': 1, 'value': 'queen family'}, {'count': 1, 'value': '90s'}, {'count': 1, 'value': '80s'}, {'count': 1, 'value': 'glam rock'}, {'count': 4, 'value': 'british'}, {'count': 1, 'value': 'english'}, {'count': 2, 'value': 'uk'}, {'count': 1, 'value': 'pop/rock'}, {'count': 1, 'value': 'pop-rock'}, {'count': 1, 'value': 'britannique'}, {'count': 1, 'value': 'classic pop and rock'}, {'count': 1, 'value': 'queen'}, {'count': 1, 

## 66. 検索件数の取得

In [9]:
count = 0
for v in collection.find({'area': 'Japan'}):
    count += 1
print(count)

91284


## 67. 複数のドキュメントの取得

In [10]:
aliases_name = "Queen"

for aliases in collection.find({"aliases.name": aliases_name}):
    print(aliases)

{'aliases': [{'sort_name': 'Queen', 'name': 'Queen'}], 'gid': '420ca290-76c5-41af-999e-564d7c71f1a7', 'ended': True, 'id': 701492, 'gender': 'Female', 'type': 'Character', 'sort_name': 'Queen', '_id': ObjectId('58d1d74425c5cf3a7164608a'), 'tags': [{'count': 1, 'value': 'kamen rider w'}, {'count': 1, 'value': 'related-akb48'}], 'area': 'Japan', 'name': 'Queen'}
{'aliases': [{'sort_name': 'Queen', 'name': 'Queen'}], 'gid': '420ca290-76c5-41af-999e-564d7c71f1a7', 'ended': True, 'id': 701492, 'gender': 'Female', 'type': 'Character', 'sort_name': 'Queen', '_id': ObjectId('58d3d0e325c5cf3a71726f86'), 'tags': [{'count': 1, 'value': 'kamen rider w'}, {'count': 1, 'value': 'related-akb48'}], 'area': 'Japan', 'name': 'Queen'}
{'aliases': [{'sort_name': 'Queen', 'name': 'Queen'}], 'gid': '420ca290-76c5-41af-999e-564d7c71f1a7', 'ended': True, 'id': 701492, 'gender': 'Female', 'type': 'Character', 'sort_name': 'Queen', '_id': ObjectId('58d3d26325c5cf3a71807e80'), 'tags': [{'count': 1, 'value': 'kam

## 68. ソート

In [11]:
dancer = []
for dance in collection.find({"tags.value": "dance"}):
    if 'rating' in dance:
        dancer.append([dance['name'],dance['rating']['count']])

dancecount = sorted(dancer, key=lambda x:x[1], reverse=True)

print('Dance artist rating count top 10:')

n = 1
for dance in dancecount:
    if n > 10:
        break
    print('No.'+str(n)+':', dance[0], dance[1])
    n += 1

Dance artist rating count top 10:
No.1: Madonna 26
No.2: Madonna 26
No.3: Madonna 26
No.4: Madonna 26
No.5: Björk 23
No.6: The Prodigy 23
No.7: Björk 23
No.8: The Prodigy 23
No.9: Björk 23
No.10: The Prodigy 23


## 69. Webアプリケーションの作成
cgi-bin/に切り出し