## NLP Feature Extraction using IBM Watson API 

In [1]:
import json
from ibm_watson import NaturalLanguageUnderstandingV1
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator

# Categories Features

from ibm_watson.natural_language_understanding_v1 import Features, CategoriesOptions

authenticator = IAMAuthenticator('INSERT YOUR KEY HERE')
natural_language_understanding = NaturalLanguageUnderstandingV1(
    version='2019-07-12',
    authenticator=authenticator
)

natural_language_understanding.set_service_url('INSERT YOUR URL HERE')

response = natural_language_understanding.analyze(
    url='www.ibm.com',
    features=Features(categories=CategoriesOptions(limit=3))).get_result()

print(json.dumps(response, indent=2))

{
  "usage": {
    "text_units": 1,
    "text_characters": 499,
    "features": 1
  },
  "retrieved_url": "https://www.ibm.com/uk-en",
  "language": "en",
  "categories": [
    {
      "score": 0.847509,
      "label": "/technology and computing/operating systems"
    },
    {
      "score": 0.842159,
      "label": "/technology and computing/hardware/computer"
    },
    {
      "score": 0.786008,
      "label": "/technology and computing/hardware/computer peripherals"
    }
  ]
}


In [7]:
# Concepts Features

from ibm_watson.natural_language_understanding_v1 import Features, ConceptsOptions
 
response = natural_language_understanding.analyze(
    url='www.ibm.com',
    features=Features(concepts=ConceptsOptions(limit=3))).get_result()

print(json.dumps(response, indent=2))

{
  "usage": {
    "text_units": 1,
    "text_characters": 499,
    "features": 1
  },
  "retrieved_url": "https://www.ibm.com/uk-en",
  "language": "en",
  "concepts": [
    {
      "text": "World",
      "relevance": 0.906329,
      "dbpedia_resource": "http://dbpedia.org/resource/World"
    },
    {
      "text": "IBM",
      "relevance": 0.886784,
      "dbpedia_resource": "http://dbpedia.org/resource/IBM"
    }
  ]
}


In [8]:
# Emotions Features
from ibm_watson.natural_language_understanding_v1 import Features, EmotionOptions

response = natural_language_understanding.analyze(
    html="<html><head><title>Fruits</title></head><body><h1>Apples and Oranges</h1><p>I love apples! I don't like oranges.</p></body></html>",
    features=Features(emotion=EmotionOptions(targets=['apples','oranges']))).get_result()

print(json.dumps(response, indent=2))

{
  "usage": {
    "text_units": 1,
    "text_characters": 37,
    "features": 1
  },
  "language": "en",
  "emotion": {
    "targets": [
      {
        "text": "apples",
        "emotion": {
          "sadness": 0.028574,
          "joy": 0.859042,
          "fear": 0.02752,
          "disgust": 0.017519,
          "anger": 0.012855
        }
      },
      {
        "text": "oranges",
        "emotion": {
          "sadness": 0.514253,
          "joy": 0.078317,
          "fear": 0.074223,
          "disgust": 0.058103,
          "anger": 0.126859
        }
      }
    ],
    "document": {
      "emotion": {
        "sadness": 0.32665,
        "joy": 0.563273,
        "fear": 0.033387,
        "disgust": 0.022637,
        "anger": 0.041796
      }
    }
  }
}


In [2]:
# Entities Features 
from ibm_watson.natural_language_understanding_v1 import Features, EntitiesOptions

response = natural_language_understanding.analyze(
    url='www.mitwpu.edu.in',
    features=Features(entities=EntitiesOptions(sentiment=True,limit=1))).get_result()

print(json.dumps(response, indent=2))

{
  "usage": {
    "text_units": 2,
    "text_characters": 18244,
    "features": 1
  },
  "retrieved_url": "https://mitwpu.edu.in/",
  "language": "en",
  "entities": [
    {
      "type": "Organization",
      "text": "MIT",
      "sentiment": {
        "score": 0.967961,
        "label": "positive"
      },
      "relevance": 0.958404,
      "count": 27,
      "confidence": 1
    }
  ]
}


In [10]:
# Keywprds Features
from ibm_watson.natural_language_understanding_v1 import Features, KeywordsOptions

response = natural_language_understanding.analyze(
    url='www.ibm.com',
    features=Features(keywords=KeywordsOptions(sentiment=True,emotion=True,limit=2))).get_result()

print(json.dumps(response, indent=2))


{
  "usage": {
    "text_units": 1,
    "text_characters": 499,
    "features": 1
  },
  "retrieved_url": "https://www.ibm.com/uk-en",
  "language": "en",
  "keywords": [
    {
      "text": "IBM CEO\u2019s Letter",
      "sentiment": {
        "score": 0,
        "label": "neutral"
      },
      "relevance": 0.848638,
      "emotion": {
        "sadness": 0.176185,
        "joy": 0.027905,
        "fear": 0.054308,
        "disgust": 0.111108,
        "anger": 0.098238
      },
      "count": 1
    },
    {
      "text": "sensitive data",
      "sentiment": {
        "score": 0,
        "label": "neutral"
      },
      "relevance": 0.807989,
      "emotion": {
        "sadness": 0.196886,
        "joy": 0.215446,
        "fear": 0.073413,
        "disgust": 0.051766,
        "anger": 0.0296
      },
      "count": 1
    }
  ]
}


In [11]:
#Metadata Feature
from ibm_watson.natural_language_understanding_v1 import Features, MetadataOptions

response = natural_language_understanding.analyze(
    url='www.ibm.com',
    features=Features(metadata=MetadataOptions())).get_result()

print(json.dumps(response, indent=2))

{
  "usage": {
    "text_units": 1,
    "text_characters": 499,
    "features": 1
  },
  "retrieved_url": "https://www.ibm.com/uk-en",
  "metadata": {
    "title": "IBM - United Kingdom",
    "publication_date": "",
    "image": "https://1.www.s81c.com/common/images/ibm-leadspace-1200x627.jpg",
    "feeds": [],
    "authors": []
  },
  "language": "en"
}


In [12]:
# Relations Features 
from ibm_watson.natural_language_understanding_v1 import Features, RelationsOptions

response = natural_language_understanding.analyze(
    text='Leonardo DiCaprio won Best Actor in a Leading Role for his performance.',
    features=Features(relations=RelationsOptions())).get_result()

print(json.dumps(response, indent=2))

{
  "usage": {
    "text_units": 1,
    "text_characters": 71,
    "features": 1
  },
  "relations": [
    {
      "type": "awardedTo",
      "sentence": "Leonardo DiCaprio won Best Actor in a Leading Role for his performance.",
      "score": 0.680715,
      "arguments": [
        {
          "text": "Best Actor",
          "location": [
            22,
            32
          ],
          "entities": [
            {
              "type": "EntertainmentAward",
              "text": "Best Actor"
            }
          ]
        },
        {
          "text": "Leonardo DiCaprio",
          "location": [
            0,
            17
          ],
          "entities": [
            {
              "type": "Person",
              "text": "Leonardo DiCaprio"
            }
          ]
        }
      ]
    }
  ],
  "language": "en"
}


In [13]:
# Semantic Analysis
from ibm_watson.natural_language_understanding_v1 import Features, SemanticRolesOptions

response = natural_language_understanding.analyze(
    text='IBM has one of the largest workforces in the world',
    features=Features(semantic_roles=SemanticRolesOptions())).get_result()

print(json.dumps(response, indent=2))

{
  "usage": {
    "text_units": 1,
    "text_characters": 50,
    "features": 1
  },
  "semantic_roles": [
    {
      "subject": {
        "text": "IBM"
      },
      "sentence": "IBM has one of the largest workforces in the world",
      "object": {
        "text": "one of the largest workforces in the world"
      },
      "action": {
        "verb": {
          "text": "have",
          "tense": "present"
        },
        "text": "has",
        "normalized": "have"
      }
    }
  ],
  "language": "en"
}


In [17]:
# Sentiment Analysis
from ibm_watson.natural_language_understanding_v1 import Features, SentimentOptions

response = natural_language_understanding.analyze(
    url='www.ibm.com/in-en',
    features=Features(sentiment=SentimentOptions(targets=['AI']))).get_result()

print(json.dumps(response, indent=2))

{
  "usage": {
    "text_units": 1,
    "text_characters": 655,
    "features": 1
  },
  "sentiment": {
    "targets": [
      {
        "text": "AI",
        "score": 0.613903,
        "label": "positive"
      }
    ],
    "document": {
      "score": 0.97574,
      "label": "positive"
    }
  },
  "retrieved_url": "https://www.ibm.com/in-en",
  "language": "en"
}


In [18]:
# Syntax Analysis
from ibm_watson.natural_language_understanding_v1 import Features, SyntaxOptions, SyntaxOptionsTokens

response = natural_language_understanding.analyze(
    text='With great power comes great responsibility',
    features=Features(
      syntax=SyntaxOptions(
        sentences=True,
        tokens=SyntaxOptionsTokens(
          lemma=True,
          part_of_speech=True,
        )))).get_result()

print(json.dumps(response, indent=2))

{
  "usage": {
    "text_units": 1,
    "text_characters": 43,
    "features": 0
  },
  "syntax": {
    "tokens": [
      {
        "text": "With",
        "part_of_speech": "ADP",
        "location": [
          0,
          4
        ],
        "lemma": "with"
      },
      {
        "text": "great",
        "part_of_speech": "ADJ",
        "location": [
          5,
          10
        ],
        "lemma": "great"
      },
      {
        "text": "power",
        "part_of_speech": "NOUN",
        "location": [
          11,
          16
        ],
        "lemma": "power"
      },
      {
        "text": "comes",
        "part_of_speech": "VERB",
        "location": [
          17,
          22
        ],
        "lemma": "come"
      },
      {
        "text": "great",
        "part_of_speech": "ADJ",
        "location": [
          23,
          28
        ],
        "lemma": "great"
      },
      {
        "text": "responsibility",
        "part_of_speech": "NOUN",
        "loc