# Google Natural Language API Client
## Setup
[https://cloud.google.com/natural-language/docs/reference/libraries#client-libraries-install-python](https://cloud.google.com/natural-language/docs/reference/libraries#client-libraries-install-python)

Run:
```
export GOOGLE_APPLICATION_CREDENTIALS="[PATH]"
```

## Sample Code
Run the below code to make sure you have everything set up. Something should show up similar to:
```
Text: Hello, world!
Sentiment: 0.300000011921, 0.300000011921
```

In [None]:
# Imports the Google Cloud client library
from google.cloud import language
from google.cloud.language import enums
from google.cloud.language import types

# Instantiates a client
client = language.LanguageServiceClient()

# The text to analyze
text = u'Hello, world!'
document = types.Document(
    content=text,
    type=enums.Document.Type.PLAIN_TEXT)

# Detects the sentiment of the text
sentiment = client.analyze_sentiment(document=document).document_sentiment

print('Text: {}'.format(text))
print('Sentiment: {}, {}'.format(sentiment.score, sentiment.magnitude))

# REST API Call

Export API Key to environmental variable:
```
export GOOGLE_KEY=MY_SECRET_KEY
```

In [1]:
import gzip

META_CELLPHONE = 'Datasets/meta_Cell_Phones_and_Accessories.json.gz'

def get_documents(file_name):
    g = gzip.open(file_name, 'r')
    results = []
    for line in g:
        document = eval(line)
        results.append(document)
    return results

In [2]:
all_documents = get_documents(META_CELLPHONE)

In [3]:
import os
api_key = os.environ['GOOGLE_KEY']

In [4]:
import requests
import json

LANGUAGE_ENDPOINT = 'https://language.googleapis.com/v1/documents:analyzeEntities?key={}'.format(api_key)

def merge_title_description(document):
    title = ''
    description = ''
    if 'title' in document:
        title = document['title']
    if 'description' in document:
        description = document['description']
    return title + description

def perform_api_request(document):
    payload = {
        "document": {
            "type": "PLAIN_TEXT",
            "language": "EN",
            "content": merge_title_description(document)
        },
        "encodingType": "UTF8"
    }

    r = requests.post(LANGUAGE_ENDPOINT, data=json.dumps(payload))
    return r.text

In [5]:
def save_api_request(document_name, r):
    file_name = 'Processed/{}.json'.format(document_name)
    with open(file_name, 'w') as outfile:
        json_data = json.dump(r, outfile)

In [6]:
def serialize_processed_document(asin):
    already_processed = os.path.exists('Processed/{}.json'.format(asin))
    if already_processed:
        data = json.load(open('Processed/{}.json'.format(asin), 'r'))
        return json.loads(data)
    else:
        return None

In [None]:
import os

RUN_LIMIT = 5000
def process_all_documents(all_documents):
    count = 0
    for document in all_documents:
        asin = document['asin']
        already_processed = os.path.exists('Processed/{}.json'.format(asin))
        serialized = serialize_processed_document(asin)
        
        if not already_processed:
            r = perform_api_request(document)
            save_api_request(asin, r)
            count = count + 1
            if (count == RUN_LIMIT):
                break

process_all_documents(all_documents)

In [7]:
def read_files_into_memory(all_documents):
    serialized_documents = {}
    for document in all_documents:
        asin = document['asin']
        already_processed = os.path.exists('Processed/{}.json'.format(asin))
        if already_processed:
            serialized = serialize_processed_document(asin)
            serialized_documents[asin] = serialized
        else:
            return serialized_documents        

In [10]:
serialized = read_files_into_memory(all_documents)

In [12]:
def read_first_ten(serialized):
    count = 0
    for document in serialized:
        print document
        count += 1
        if count == 10:
            break
read_first_ten(serialized)

B002M25U0E
B003ZYHOK2
B002ES6F3M
B0038YGUM2
B0035VECNM
B0012UIDRO
B001656W46
B002P4YXBQ
B003XP09A0
B001RD858I


For now, we are just working with the highest salience CONSUMER_GOODS. Later, we will make it more complex and consider all CONSUMER_GOODS returned from each product. We will also be erasing duplicates that come up.

temporary code here that will remove capitilization and spaces. We will eventually need a classifier to return us the same string that will be used as the key to our map.

        entities = {}
        name = ""
        # function to parse file 
         for entity in entities.entities:
             # The entities are sorted by highest salience -> lowest salience
             if (entity.type == language.types.Entity.CONSUMER_GOOD):
                 # for now, just remove capitilization and spaces. but classifier is ideal
                 name = ''.join(entity.name.split()).lower()
                 break

In [None]:
def getConsumerGood(file):
    return ""

In [None]:
# I'm assuming that the map<asin, file>files exists and the function getConsumerGood
# exists that takes in a file and returns the highest salience consumer good name 
all_categories = {}
def map_categories(all_documents):
    for document in all_documents:
        name = getConsumerGood(files[document.asin])
        for category in document['categories']:
            if (category[0][0] in all_categories):
                sub_category = all_categories[category[0][0]] # map<category, map<consumer_good, list<document> > >
                if(category[0][1] in sub_category):
                    if(name in sub_category[category[0][1]]):
                        sub_category[category[0][1]][name].append(document[i]) 
                    else:
                        sub_category[category[0][1]][name] = [document[i]]
                else: 
                    sub_category[category[0][1]] = {name: [document[i]]}
            else:
                all_categories[category[0][0]] = {category[0][1]: {name: [document[i]]}}

## Query-based Algorithm
This will recommend three items from each sub-category that the product isn't classified as in the same overall category based on the query of the user. The query will come in a json file as well.

In [None]:
# map_categories(all_documents)
def recommendItems(query):
    recommend = []
    name = getConsumerGood(query)
    category = query['categories'][0][0]
    sub_category = query['categories'][0][1]
    for key in all_categories[category].keys():
        if (key == sub_category):
            continue
        if (not all_categories[category][key][name]): #empty
            continue
        for i in range(1,3):
            recommend.append(all_categories[category][key][name][i])
    return recommend

In [None]:
# recommendItems(file)