# Let's see how to work text data

#### General Imports

In [1]:
import pandas as pd
import numpy as np

### text related imports

#### load NLP model

In [7]:

import spacy
import en_core_web_md
nlp = en_core_web_md.load()

## [Jovian](https://www.jovian.ml/) is a collaboration platform for Data Science and machine learning community.

You can maintain your code, models, talk to other people, get knowledge, network and grow!!

In [1]:
import jovian

In [2]:
jovian.commit()

<IPython.core.display.Javascript object>

[jovian] Attempting to save notebook..
[jovian] Uploading notebook..
[jovian] Capturing environment..
[jovian] Committed successfully! https://jovian.ml/samrudh/day-1-deep-learning-applications-4525c


'https://jovian.ml/samrudh/day-1-deep-learning-applications-4525c'

# Text application: deep learning

#### Can we find countries from the text?

In [14]:
sentence = 'India is a "good example" as solar auctions have seen popularity amidst the height of the COVID-19 pandemic, UN chief Antonio Guterres said on Thursday'#"India bans China’s TikTok"  

#### Apply NLP model

In [15]:
doc = nlp(sentence)

#### Entities present in the sentences

In [24]:
for entity in doc.ents:
    print(f"Entity: {entity.text} with label: {entity.label_}")
    

Entity: India with label: GPE
Entity: UN with label: ORG
Entity: Antonio Guterres with label: PERSON
Entity: Thursday with label: DATE


In [22]:
for entity in doc.ents:
    if entity.label_ == 'GPE':
        print(f"Found country {entity}")

Found country India


#### Write a small function to get country given sentence

In [25]:
def get_country_from_sentence(sentence ):
    '''
    input : sentence : text line
    output: countries: a list of countries present in the sentence
    '''
    doc = nlp(sentence)
    countries = []
    for entity in doc.ents:
        if entity.label_ == 'GPE':
            countries.append(entity.text)
    return countries

### Lets now get some latest news from Google andf find countries mentioned

In [26]:
from pygooglenews import GoogleNews

In [27]:
gn = GoogleNews()

In [28]:
world_news = gn.topic_headlines("world")

In [None]:
world_news

In [None]:
world_news['entries']

In [None]:
for news in world_news['entries']:
    print(news['title'])

In [32]:
all_countries_in_news = []
for news in world_news['entries']:
    headline = news['title']
    all_countries_in_news = all_countries_in_news + get_country_from_sentence(headline)

In [None]:
all_countries_in_news

## Unique locations mentioned in the news today

In [None]:
set(all_countries_in_news)

## Remove punctuations to avoid duplicates

In [None]:
import string
clean_countries = [country.translate(str.maketrans('', '', string.punctuation)) for country in all_countries_in_news]
set(clean_countries)

## Most common locations in the news today

In [36]:
from collections import Counter
Counter(clean_countries).most_common(5)

[('US', 7), ('China', 5), ('Russia', 4), ('Seoul', 2), ('New Zealand', 2)]

## Youtube comment analytics

https://medium.com/tech-that-works/create-a-song-playlist-from-youtube-comments-using-nlp-a810a6198bac