## NLP Website Feature Extraction using IBM Watson API 

In [2]:
import json
from ibm_watson import NaturalLanguageUnderstandingV1
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator

# Categories Features

from ibm_watson.natural_language_understanding_v1 import Features, CategoriesOptions

authenticator = IAMAuthenticator('INSERT YOUR KEY HERE')
natural_language_understanding = NaturalLanguageUnderstandingV1(
    version='2019-07-12',
    authenticator=authenticator
)

natural_language_understanding.set_service_url('INSERT YOUR URL HERE')
response = natural_language_understanding.analyze(
    url='www.mitwpu.edu.in',
    features=Features(categories=CategoriesOptions(limit=5))).get_result()

print(json.dumps(response, indent=2))

{
  "usage": {
    "text_units": 2,
    "text_characters": 15336,
    "features": 1
  },
  "retrieved_url": "https://mitwpu.edu.in/",
  "language": "en",
  "categories": [
    {
      "score": 0.971248,
      "label": "/science/engineering"
    },
    {
      "score": 0.878459,
      "label": "/education/alumni and reunions"
    },
    {
      "score": 0.862872,
      "label": "/science/computer science"
    },
    {
      "score": 0.808773,
      "label": "/education/graduate school/college"
    },
    {
      "score": 0.77299,
      "label": "/education/studying business"
    }
  ]
}


In [3]:
# Concepts Features

from ibm_watson.natural_language_understanding_v1 import Features, ConceptsOptions
 
response = natural_language_understanding.analyze(
    url='www.mitwpu.edu.in',
    features=Features(concepts=ConceptsOptions(limit=3))).get_result()

print(json.dumps(response, indent=2))

{
  "usage": {
    "text_units": 2,
    "text_characters": 15336,
    "features": 1
  },
  "retrieved_url": "https://mitwpu.edu.in/",
  "language": "en",
  "concepts": [
    {
      "text": "Academic degree",
      "relevance": 0.963111,
      "dbpedia_resource": "http://dbpedia.org/resource/Academic_degree"
    },
    {
      "text": "Engineering",
      "relevance": 0.655268,
      "dbpedia_resource": "http://dbpedia.org/resource/Engineering"
    },
    {
      "text": "Electrical engineering",
      "relevance": 0.58971,
      "dbpedia_resource": "http://dbpedia.org/resource/Electrical_engineering"
    }
  ]
}


In [4]:
# Emotions Features
from ibm_watson.natural_language_understanding_v1 import Features, EmotionOptions

response = natural_language_understanding.analyze(
    text="Science has contributed to all the sectors increased employment opportunities, saved millions of lives and has played a major role in a lot of industries. Science is very important for the growth and development of India. It even plays a key role in our daily lives. Every country should invest as much as possible in research and development for scientific technologies.",
    features=Features(emotion=EmotionOptions(targets=['Science','Students']))).get_result()

print(json.dumps(response, indent=2))

{
  "usage": {
    "text_units": 1,
    "text_characters": 371,
    "features": 1
  },
  "language": "en",
  "emotion": {
    "targets": [
      {
        "text": "Science",
        "emotion": {
          "sadness": 0.127974,
          "joy": 0.595299,
          "fear": 0.032478,
          "disgust": 0.00761,
          "anger": 0.007259
        }
      }
    ],
    "document": {
      "emotion": {
        "sadness": 0.174299,
        "joy": 0.7121,
        "fear": 0.049577,
        "disgust": 0.003456,
        "anger": 0.015958
      }
    }
  }
}


In [5]:
# Entities Features 
from ibm_watson.natural_language_understanding_v1 import Features, EntitiesOptions

response = natural_language_understanding.analyze(
    url='www.cnn.com',
    features=Features(entities=EntitiesOptions(sentiment=True,limit=1))).get_result()

print(json.dumps(response, indent=2))

{
  "usage": {
    "text_units": 1,
    "text_characters": 3223,
    "features": 1
  },
  "retrieved_url": "https://www.cnn.com/",
  "language": "en",
  "entities": [
    {
      "type": "Company",
      "text": "CNN",
      "sentiment": {
        "score": 0.858806,
        "label": "positive"
      },
      "relevance": 0.969919,
      "disambiguation": {
        "subtype": [
          "Broadcast",
          "AwardWinner",
          "RadioNetwork",
          "TVNetwork"
        ],
        "name": "CNN",
        "dbpedia_resource": "http://dbpedia.org/resource/CNN"
      },
      "count": 18,
      "confidence": 1
    }
  ]
}
