# nlp100 chapter7

Try:http://www.cl.ecei.tohoku.ac.jp/nlp100

## 60. KVSの構築

In [1]:
import json
import redis
import codecs

r = redis.Redis(host='localhost', port=6379, db=0)
with open('data/artist.json') as reader:
    for line in reader:
        data = json.loads(line.rstrip())
        if "name" in data and "area" in data:
            r.set(data['name'], data['area'])

## 61. KVSの検索

In [2]:
print(r.get('Thomas Selditz').decode())
print(r.get('George Gao').decode())

Germany
China


## 62. KVS内の反復処理

In [3]:
count = 0
for k in r.keys():
    if r.get(k) == 'Japan'.encode():
        count += 1
print('The number of Japanese:', count)

The number of Japanese: 22128


## 63. オブジェクトを値に格納したKVS

In [4]:
r2 = redis.Redis(host='localhost', port=6379, db=0)
with open('data/artist.json') as reader:
    for line in reader:
        data = json.loads(line.rstrip())
        if 'tags' in data:
            tags = json.dumps(data['tags'])
            r2.set(data['name'].encode(), tags.encode())

In [5]:
r2.get('Oasis'.encode()).decode()

'[{"value": "rock", "count": 1}, {"value": "britpop", "count": 3}, {"value": "british", "count": 4}, {"value": "uk", "count": 1}, {"value": "britannique", "count": 1}, {"value": "rock and indie", "count": 1}, {"value": "england", "count": 1}, {"value": "manchester", "count": 1}]'

## 64. MongoDBの構築

In [10]:
import pymongo

with open('data/artist.json') as reader:
    client = pymongo.MongoClient()
    db = client.musics
    print('db:', db.name)
    collection = db.artist
    
    for line in reader:
        data = json.loads(line)
        collection.insert_one(data)

db: musics


In [11]:
collection.create_index([('name', pymongo.ASCENDING)])  
collection.create_index([('aliases.name', pymongo.ASCENDING)])  
collection.create_index([('tags.value', pymongo.ASCENDING)])
collection.create_index([('rating.value', pymongo.ASCENDING)])

'rating.value_1'

## 65. MongoDBの検索

In [12]:
for name in collection.find({'name': 'Queen'}):
    print(name)

{'tags': [{'value': 'kamen rider w', 'count': 1}, {'value': 'related-akb48', 'count': 1}], 'gender': 'Female', 'aliases': [{'name': 'Queen', 'sort_name': 'Queen'}], 'type': 'Character', 'sort_name': 'Queen', 'gid': '420ca290-76c5-41af-999e-564d7c71f1a7', '_id': ObjectId('58d1d74425c5cf3a7164608a'), 'ended': True, 'name': 'Queen', 'id': 701492, 'area': 'Japan'}
{'tags': [{'value': 'hard rock', 'count': 2}, {'value': '70s', 'count': 1}, {'value': 'queen family', 'count': 1}, {'value': '90s', 'count': 1}, {'value': '80s', 'count': 1}, {'value': 'glam rock', 'count': 1}, {'value': 'british', 'count': 4}, {'value': 'english', 'count': 1}, {'value': 'uk', 'count': 2}, {'value': 'pop/rock', 'count': 1}, {'value': 'pop-rock', 'count': 1}, {'value': 'britannique', 'count': 1}, {'value': 'classic pop and rock', 'count': 1}, {'value': 'queen', 'count': 1}, {'value': 'united kingdom', 'count': 1}, {'value': 'langham 1 studio bbc', 'count': 1}, {'value': 'kind of magic', 'count': 1}, {'value': 'ban

## 66. 検索件数の取得

In [13]:
count = 0
for v in collection.find({'area': 'Japan'}):
    count += 1
print(count)

22821


## 67. 複数のドキュメントの取得

In [14]:
aliases_name = "Queen"

for aliases in collection.find({"aliases.name": aliases_name}):
    print(aliases)

{'tags': [{'value': 'kamen rider w', 'count': 1}, {'value': 'related-akb48', 'count': 1}], 'gender': 'Female', 'aliases': [{'name': 'Queen', 'sort_name': 'Queen'}], 'type': 'Character', 'sort_name': 'Queen', 'gid': '420ca290-76c5-41af-999e-564d7c71f1a7', '_id': ObjectId('58d1d74425c5cf3a7164608a'), 'ended': True, 'name': 'Queen', 'id': 701492, 'area': 'Japan'}


## 68. ソート

In [17]:
dancer = []
for dance in collection.find({"tags.value": "dance"}):
    if 'rating' in dance:
        dancer.append([dance['name'],dance['rating']['count']])

dancecount = sorted(dancer, key=lambda x:x[1], reverse=True)

print('Dance artist rating count top 10:')

n = 1
for dance in dancecount:
    if n > 10:
        break
    print('No.'+str(n)+':', dance[0], dance[1])
    n += 1

Dance artist rating count top 10:
No.1: Madonna 26
No.2: Björk 23
No.3: The Prodigy 23
No.4: Rihanna 15
No.5: Britney Spears 13
No.6: Maroon 5 11
No.7: Adam Lambert 7
No.8: Fatboy Slim 7
No.9: Basement Jaxx 6
No.10: Cornershop 5
