# IBM NLU 
In this notebook we will cover services provided by IBM NLU.
>  NLU Servces has been already created on IBM cloud.

In [5]:
import json
! pip install --upgrade "ibm-watson>=4.2.1" # To Install package . Add "!" sign
from ibm_watson import NaturalLanguageUnderstandingV1
from ibm_cloud_sdk_core.authenticators import IAMAuthenticator
from ibm_watson.natural_language_understanding_v1 import Features, CategoriesOptions,EmotionOptions, SentimentOptions, EntitiesOptions, ConceptsOptions, KeywordsOptions, MetadataOptions, RelationsOptions, SemanticRolesOptions

Collecting ibm-watson>=4.2.1
[?25l  Downloading https://files.pythonhosted.org/packages/da/f4/7e256026ee22c75a630c6de53eb45b6fef4840ac6728b80a92dd2e523a1a/ibm-watson-4.2.1.tar.gz (348kB)
[K     |████████████████████████████████| 358kB 8.1MB/s eta 0:00:01
Collecting websocket-client==0.48.0 (from ibm-watson>=4.2.1)
[?25l  Downloading https://files.pythonhosted.org/packages/8a/a1/72ef9aa26cfe1a75cee09fc1957e4723add9de098c15719416a1ee89386b/websocket_client-0.48.0-py2.py3-none-any.whl (198kB)
[K     |████████████████████████████████| 204kB 27.1MB/s eta 0:00:01
[?25hCollecting ibm_cloud_sdk_core==1.5.1 (from ibm-watson>=4.2.1)
  Downloading https://files.pythonhosted.org/packages/b7/f6/10d5271c807d73d236e6ae07b68035fed78b28b5ab836704d34097af3986/ibm-cloud-sdk-core-1.5.1.tar.gz
Collecting PyJWT>=1.7.1 (from ibm_cloud_sdk_core==1.5.1->ibm-watson>=4.2.1)
  Downloading https://files.pythonhosted.org/packages/87/8b/6a9f14b5f781697e51259d81657e6048fd31a113229cf346880bb7545565/PyJWT-1.7.1-py

# Here your Credentials 
> On the Manage page of IBM cloud, click Show Credentials.

In [6]:
# Add your credentials. Below cred won't work
API_KEY = "ogufaS1iFOLKiQKkhTQlUnvJ_fdsf fd"
URL = "https://api.eu-gb.natural-language-understanding.watson.cloud.ibm.com/instances/f4bd7fe5-3dsfdsfdsfds"

### Configuring our NLU instance

In [7]:
authenticator = IAMAuthenticator(API_KEY)
natural_language_understanding = NaturalLanguageUnderstandingV1(
    version='2019-07-12',
    authenticator=authenticator
)

natural_language_understanding.set_service_url(URL)

### Entities extraction from a web page
> Identifies people, cities, organizations, and other entities in the content. 
Refer: https://cloud.ibm.com/docs/services/natural-language-understanding?topic=natural-language-understanding-entity-type-systems

In [8]:
response = natural_language_understanding.analyze(
    url='www.cnn.com',
    features=Features(entities=EntitiesOptions(sentiment=True,limit=10))).get_result()
print(json.dumps(response, indent=2))

{
  "usage": {
    "text_units": 1,
    "text_characters": 3084,
    "features": 1
  },
  "retrieved_url": "https://www.cnn.com/",
  "language": "en",
  "entities": [
    {
      "type": "Company",
      "text": "CNN",
      "sentiment": {
        "score": 0.87046,
        "label": "positive"
      },
      "relevance": 0.973112,
      "disambiguation": {
        "subtype": [
          "Broadcast",
          "AwardWinner",
          "RadioNetwork",
          "TVNetwork"
        ],
        "name": "CNN",
        "dbpedia_resource": "http://dbpedia.org/resource/CNN"
      },
      "count": 19,
      "confidence": 1
    },
    {
      "type": "Company",
      "text": "Digital Studios",
      "sentiment": {
        "score": 0,
        "label": "neutral"
      },
      "relevance": 0.384099,
      "count": 2,
      "confidence": 0.745343
    },
    {
      "type": "Location",
      "text": "US",
      "sentiment": {
        "score": 0,
        "label": "neutral"
      },
      "relevance": 

### Emotion extraction
> Detects anger, disgust, fear, joy, or sadness that is conveyed in the content or by the context around target phrases specified in the targets parameter.

Sentence = **Apples and Oranges. I love apples and I don't link oranges.**

In [9]:
response = natural_language_understanding.analyze(
    html="<html><head><title>Fruits</title></head><body><h1>Apples and Oranges</h1><p>I love apples! I don't like oranges.</p></body></html>",
    features=Features(emotion=EmotionOptions(targets=['apples','oranges']))).get_result()
print(json.dumps(response, indent=2))

{
  "usage": {
    "text_units": 1,
    "text_characters": 37,
    "features": 1
  },
  "language": "en",
  "emotion": {
    "targets": [
      {
        "text": "apples",
        "emotion": {
          "sadness": 0.028574,
          "joy": 0.859042,
          "fear": 0.02752,
          "disgust": 0.017519,
          "anger": 0.012855
        }
      },
      {
        "text": "oranges",
        "emotion": {
          "sadness": 0.514253,
          "joy": 0.078317,
          "fear": 0.074223,
          "disgust": 0.058103,
          "anger": 0.126859
        }
      }
    ],
    "document": {
      "emotion": {
        "sadness": 0.32665,
        "joy": 0.563273,
        "fear": 0.033387,
        "disgust": 0.022637,
        "anger": 0.041796
      }
    }
  }
}


### Concepts
> Returns high-level concepts in the content. 

In [10]:
response = natural_language_understanding.analyze(
    text= 'Machine learning is closely related to computational statistics, which focuses on making predictions using computers. The study of mathematical optimization delivers methods, theory and application domains to the field of machine learning.',
    features=Features(concepts=ConceptsOptions(limit=3))).get_result()

print(json.dumps(response, indent=2))

{
  "usage": {
    "text_units": 1,
    "text_characters": 239,
    "features": 1
  },
  "language": "en",
  "concepts": [
    {
      "text": "Mathematics",
      "relevance": 0.984733,
      "dbpedia_resource": "http://dbpedia.org/resource/Mathematics"
    },
    {
      "text": "Applied mathematics",
      "relevance": 0.874728,
      "dbpedia_resource": "http://dbpedia.org/resource/Applied_mathematics"
    },
    {
      "text": "Artificial intelligence",
      "relevance": 0.874726,
      "dbpedia_resource": "http://dbpedia.org/resource/Artificial_intelligence"
    }
  ]
}


## Categories
> Returns a five-level taxonomy of the content. The top three categories are returned.

In [11]:
response = natural_language_understanding.analyze(
    url='www.ibm.com',
    features=Features(categories=CategoriesOptions(limit=3))).get_result()

print(json.dumps(response, indent=2))

{
  "usage": {
    "text_units": 1,
    "text_characters": 493,
    "features": 1
  },
  "retrieved_url": "https://www.ibm.com/uk-en",
  "language": "en",
  "categories": [
    {
      "score": 0.971989,
      "label": "/technology and computing/programming languages/java"
    },
    {
      "score": 0.946998,
      "label": "/technology and computing/operating systems"
    },
    {
      "score": 0.874673,
      "label": "/technology and computing/hardware/computer"
    }
  ]
}


## Keywords
> Returns important keywords in the content.

In [12]:
response = natural_language_understanding.analyze(
    url='www.ibm.com',
    features=Features(keywords=KeywordsOptions(sentiment=True,emotion=True,limit=2))).get_result()

print(json.dumps(response, indent=2))

{
  "usage": {
    "text_units": 1,
    "text_characters": 493,
    "features": 1
  },
  "retrieved_url": "https://www.ibm.com/uk-en",
  "language": "en",
  "keywords": [
    {
      "text": "IBM Cloud",
      "sentiment": {
        "score": 0,
        "label": "neutral"
      },
      "relevance": 0.82754,
      "emotion": {
        "sadness": 0.02059,
        "joy": 0.042895,
        "fear": 0.014017,
        "disgust": 0.004434,
        "anger": 0.005316
      },
      "count": 1
    },
    {
      "text": "latest IBM stories",
      "sentiment": {
        "score": 0,
        "label": "neutral"
      },
      "relevance": 0.734463,
      "emotion": {
        "sadness": 0.050359,
        "joy": 0.556432,
        "fear": 0.078952,
        "disgust": 0.014179,
        "anger": 0.045997
      },
      "count": 1
    }
  ]
}


## Metadata
> Returns information from the document, including author name, title, RSS/ATOM feeds, prominent page image, and publication date. Supports URL and HTML input types only.

In [13]:
response = natural_language_understanding.analyze(
    url='www.ibm.com',
    features=Features(metadata=MetadataOptions())).get_result()

print(json.dumps(response, indent=2))

{
  "usage": {
    "text_units": 1,
    "text_characters": 493,
    "features": 1
  },
  "retrieved_url": "https://www.ibm.com/uk-en",
  "metadata": {
    "title": "IBM - United Kingdom",
    "publication_date": "",
    "image": "https://1.www.s81c.com/common/images/ibm-leadspace-1200x627.jpg",
    "feeds": [],
    "authors": []
  },
  "language": "en"
}


## Relations
> Recognizes when two entities are related and identifies the type of relation.

In [14]:
response = natural_language_understanding.analyze(
    text='Leonardo DiCaprio won Best Actor in a Leading Role for his performance.',
    features=Features(relations=RelationsOptions())).get_result()
print(json.dumps(response, indent=2))

{
  "usage": {
    "text_units": 1,
    "text_characters": 71,
    "features": 1
  },
  "relations": [
    {
      "type": "awardedTo",
      "sentence": "Leonardo DiCaprio won Best Actor in a Leading Role for his performance.",
      "score": 0.680715,
      "arguments": [
        {
          "text": "Best Actor",
          "location": [
            22,
            32
          ],
          "entities": [
            {
              "type": "EntertainmentAward",
              "text": "Best Actor"
            }
          ]
        },
        {
          "text": "Leonardo DiCaprio",
          "location": [
            0,
            17
          ],
          "entities": [
            {
              "type": "Person",
              "text": "Leonardo DiCaprio"
            }
          ]
        }
      ]
    }
  ],
  "language": "en"
}


## Semantic Roles
> Parses sentences into subject, action, and object form.

In [15]:
response = natural_language_understanding.analyze(
    text='John from IBM is travelling to Tokyo',
    features=Features(semantic_roles=SemanticRolesOptions())).get_result()

print(json.dumps(response, indent=2))

{
  "usage": {
    "text_units": 1,
    "text_characters": 36,
    "features": 1
  },
  "semantic_roles": [
    {
      "subject": {
        "text": "John from IBM"
      },
      "sentence": "John from IBM is travelling to Tokyo",
      "object": {
        "text": "travelling to Tokyo"
      },
      "action": {
        "verb": {
          "text": "be",
          "tense": "present"
        },
        "text": "is",
        "normalized": "be"
      }
    },
    {
      "subject": {
        "text": "John from IBM"
      },
      "sentence": "John from IBM is travelling to Tokyo",
      "object": {
        "text": "to Tokyo"
      },
      "action": {
        "verb": {
          "text": "travel",
          "tense": "present"
        },
        "text": "is travelling",
        "normalized": "be travel"
      }
    }
  ],
  "language": "en"
}


# Chat Analytic using IBM NLU

In [16]:

import types
import pandas as pd
from botocore.client import Config
import ibm_boto3

def __iter__(self): return 0

# @hidden_cell
# The following code accesses a file in your IBM Cloud Object Storage. It includes your credentials.
# You might want to remove those credentials before you share the notebook.
client_0c40be34a95c47ca92970b3a5b2daa55 = ibm_boto3.client(service_name='s3',
    ibm_api_key_id='K7s4OIoDq4udop7aXhUjQGAuWwY1CnJrJdUBfcwiVTYh',
    ibm_auth_endpoint="https://iam.eu-gb.bluemix.net/oidc/token",
    config=Config(signature_version='oauth'),
    endpoint_url='https://s3.eu-geo.objectstorage.service.networklayer.com')

body = client_0c40be34a95c47ca92970b3a5b2daa55.get_object(Bucket='projectdemo1-donotdelete-pr-unt5jk5ac2fgxd',Key='complains.csv')['Body']
# add missing __iter__ method, so pandas accepts body as file-like object
if not hasattr(body, "__iter__"): body.__iter__ = types.MethodType( __iter__, body )

df_data_1 = pd.read_csv(body)
print(df_data_1.shape)
df_data_1.head()

(20000, 2)


Unnamed: 0,PRODUCT,COMPLAINT_TEXT
0,"Credit reporting, credit repair services, or o...",and Transunion are reporting incorrectly tha...
1,"Credit reporting, credit repair services, or o...",and Transunion are reporting incorrectly that...
2,"Credit reporting, credit repair services, or o...",", , and Experian need to remove the collection..."
3,"Credit reporting, credit repair services, or o...","3 company with inconsistencies, violations and..."
4,"Credit reporting, credit repair services, or o...",I have a personal loan from Patriot finance. T...


In [19]:
subset = df_data_1.head(10)
subset["COMPLAINT_TEXT"]

0      and Transunion are reporting incorrectly tha...
1     and Transunion are reporting incorrectly that...
2    , , and Experian need to remove the collection...
3    3 company with inconsistencies, violations and...
4    I have a personal loan from Patriot finance. T...
5    Starting in // I started receiving calls from ...
6    Card issuer received confirmation of payment i...
7    YOUR COMPLAINT I am disputing the debt owed to...
8    I met the requirements for the ability to bene...
9    , , and Transunion are not taking my claims se...
Name: COMPLAINT_TEXT, dtype: object

In [18]:
for index, row in subset.iterrows():
    response = natural_language_understanding.analyze(
    text=row["COMPLAINT_TEXT"],
    features=Features(keywords=KeywordsOptions(sentiment=True,emotion=True,limit=2))).get_result()
    print(json.dumps(response, indent=2))
    print("########")

{
  "usage": {
    "text_units": 1,
    "text_characters": 422,
    "features": 1
  },
  "language": "en",
  "keywords": [
    {
      "text": "partial account numbers",
      "sentiment": {
        "score": -0.78011,
        "label": "negative"
      },
      "relevance": 0.856042,
      "emotion": {
        "sadness": 0.205481,
        "joy": 0.029508,
        "fear": 0.063324,
        "disgust": 0.019176,
        "anger": 0.032383
      },
      "count": 1
    },
    {
      "text": "past due",
      "sentiment": {
        "score": -0.26892,
        "mixed": "1",
        "label": "negative"
      },
      "relevance": 0.848379,
      "emotion": {
        "sadness": 0.239088,
        "joy": 0.025649,
        "fear": 0.053461,
        "disgust": 0.000599,
        "anger": 0.005452
      },
      "count": 2
    }
  ]
}
########
{
  "usage": {
    "text_units": 1,
    "text_characters": 407,
    "features": 1
  },
  "language": "en",
  "keywords": [
    {
      "text": "partial account 

In [47]:
for index, row in subset.iterrows():
    response = natural_language_understanding.analyze(
    text=row["COMPLAINT_TEXT"],
#     features=Features(entities=EntitiesOptions(sentiment=True,limit=10))).get_result()
    features=Features(keywords=KeywordsOptions(sentiment=True,emotion=True,limit=2))).get_result()
    print(json.dumps(response, indent=2))
    print("####################")

{
  "usage": {
    "text_units": 1,
    "text_characters": 422,
    "features": 1
  },
  "language": "en",
  "keywords": [
    {
      "text": "partial account numbers",
      "sentiment": {
        "score": -0.78011,
        "label": "negative"
      },
      "relevance": 0.856042,
      "emotion": {
        "sadness": 0.205481,
        "joy": 0.029508,
        "fear": 0.063324,
        "disgust": 0.019176,
        "anger": 0.032383
      },
      "count": 1
    },
    {
      "text": "past due",
      "sentiment": {
        "score": -0.26892,
        "mixed": "1",
        "label": "negative"
      },
      "relevance": 0.848379,
      "emotion": {
        "sadness": 0.239088,
        "joy": 0.025649,
        "fear": 0.053461,
        "disgust": 0.000599,
        "anger": 0.005452
      },
      "count": 2
    }
  ]
}
####################
{
  "usage": {
    "text_units": 1,
    "text_characters": 407,
    "features": 1
  },
  "language": "en",
  "keywords": [
    {
      "text": "part

In [49]:
l = []
for index, row in subset.iterrows():
    response = natural_language_understanding.analyze(
    text=row["COMPLAINT_TEXT"],
#     features=Features(entities=EntitiesOptions(sentiment=True,limit=10))).get_result()
    features=Features(keywords=KeywordsOptions(sentiment=True,emotion=True,limit=2))).get_result() 
#     print(response)
    print("Text Sentence = ", row["COMPLAINT_TEXT"], "\n")
    i = 1
    
    for x in response["keywords"]:
        print(i, ". Keyword = ",x["text"])
        l.append(x["text"])
        i += 1
    print("####################\n")
print(l)

Text Sentence =    and Transunion are reporting incorrectly that I am 120 days past due on loans with the   - partial account numbers  ;  ;  ; .. These accounts reflect a {$0.00} balance and a {$0.00} past due. I have contacted the two bureaus and requested these coding errors be corrected. This incorrect reporting is harming my credit score. It is  impossible to be past due on accounts that have a {$0.00} balance and {$0.00} past due. 

1 . Keyword =  partial account numbers
2 . Keyword =  past due
####################

Text Sentence =   and Transunion are reporting incorrectly that I am 120 days past due on a loan with the  - partial account number. This account reflects a {$0.00} balance and a {$0.00} past due. I have contacted the two bureaus and requested these coding errors be corrected. This incorrect reporting is harming my credit score. It is impossible to be past due on accounts that have a {$0.00} balance and {$0.00} past due. 

1 . Keyword =  partial account number
2 . Keyw