# __**BLOCK 3**__


# 4. Analayzing a piece of news text. 
### Dash python framework. Part 1

### For more information regarding the Dash interface, see the below link:
https://dash.plot.ly/

## The code written in the DASH interface:
- gets a piece of news from the API.
- classify the news using the model ([1_Classifier.ipynb](https://github.com/paquinho89/TFM/blob/master/1_Classifier.ipynb))
- count the number of words.
- translate the text
- classify the words morphologically 
- plot the result in a bar graph

## Instructions to run the code
- When the code is runned a link will pop up at the end of the code. The link will take a while to appear.  Click in that link to go to the interface. The server will run in your computer
- The data set is used to train the model. Therefore, the "dataset_news.csv" file should be saved in a folder called data to run the code.

#### To run the code, the below packages shoud be installed.

In [None]:
pip install plotly && pip install dash

In [None]:
! pip install dash==0.28.5  # The core dash backend
! pip install dash-html-components==0.13.2  # HTML components
! pip install dash-core-components==0.36.0  # Supercharged components
! pip install dash-table==3.1.2  # Interactive DataTable component (new!)

# DASH interface

In [None]:
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
from sklearn.externals import joblib
import plotly.graph_objs as go
import pandas as pd
import numpy as np
import plotly.graph_objs as go
import matplotlib.pyplot as plt
from sklearn.externals import joblib
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from textblob import TextBlob
from collections import Counter
from newsapi import NewsApiClient
from datetime import date, timedelta

In [None]:
file=pd.read_csv('./data/dataset_news.csv', sep=',')
newsapi = NewsApiClient(api_key='9fe0d6dd387c40bc8cb5fdec346f0bda')
news_sources = newsapi.get_sources()
today= str(date.today())
yesterday = str(date.today()-timedelta(1))

app = dash.Dash()

app.layout = html.Div(children=[
        html.H1(children='News Classifier for a piece of news', style={'textAlign':'center'}),
    
        html.Div(children=[
            html.Label('Analyzing a piece of news from the chosen media organization')], style={'textAlign': 'center'}),
            html.Div(id='space1', style={'padding': 10}),
            dcc.Markdown('**Choose a media organization:**'),
            dcc.Dropdown(
                id='drop-down-media-source', style={'width': '50%'},
                options=[{'label': i, 'value': i} 
                for i in list(map(lambda x: x['id'], filter(lambda x: x['language']=='en', news_sources['sources'])))]
                ),
            html.Label("NOTE: If text does not appear, please, choose another organization media"),
            html.Div(id='output-container-news-text', children=''),
            html.Div(id='space2', style={'padding': 10}),
            dcc.Markdown('**Result:**'),
            html.Div(id='output-container-button', children= 'IMPORTANT: This text will take a while to appear (45 seconds)'),
            html.Div(id='space3', style={'padding': 10}),
            html.Div(
                    children=[
                    dcc.Markdown('**Translate to**'),
                    dcc.Dropdown(id='drop-down languages-to',style={'width': '50%'}, value='gl',
                        options=[
                            {'label': 'Galician', 'value': 'gl'},
                            {'label': 'Euskara', 'value': 'eu'},
                            {'label': 'Catalan', 'value': 'ca'},
                            {'label': 'Spanish', 'value': 'es'},
                            {'label': 'French', 'value': 'fr'},
                            {'label': 'English', 'value': 'en'},
                            {'label': 'Chinese', 'value':'zh'},
                            {'label': 'Russian', 'value':'ru'},
                            {'label': 'Portuguese', 'value':'pt'},
                            {'label': 'Arabic', 'value':'ar'},
                            {'label': 'Japanese', 'value':'ja'},
                            {'label': 'German', 'value':'de'},
                            {'label': 'Hindi', 'value':'hi'}])
                    ]), 
            html.Div(id='output-container-tranlation', children= ''),
            html.Div(id='space4', style={'padding': 10}),
            html.Button('Submit', id='button'),
            html.Div(id='space5', style={'padding': 10}),
            dcc.Markdown('**Bar graph which shows the number of words of each morphological class:**'),
            dcc.Graph(id='bar_graph',style={'overflowY': 'scroll', 'height': 500})
    ])
#######---------------------------------------------------------------------------------
@app.callback(
    dash.dependencies.Output(component_id='output-container-news-text', component_property='children'),
    [dash.dependencies.Input('button', 'n_clicks')],
    [dash.dependencies.State('drop-down-media-source', 'value')])

def get_news(n_clicks, source_media):
    newsapi = NewsApiClient(api_key='9fe0d6dd387c40bc8cb5fdec346f0bda')
    today = str(date.today())
    all_articles = newsapi.get_everything(sources= source_media,
                                          from_param= yesterday,
                                          to= today,
                                          language='en',
                                          sort_by='relevancy',
                                          page=1)
    news_text_1 = all_articles['articles'][1]['content']
    return news_text_1

#-------------------------------------------------------------------
@app.callback(
    dash.dependencies.Output(component_id='output-container-button', component_property='children'),
    [dash.dependencies.Input('output-container-news-text', 'children')])

def news_classifier( news_text):
    
    vectorizer = CountVectorizer()
    X = vectorizer.fit_transform(file['TITLE'])
    encoder = LabelEncoder()
    y = encoder.fit_transform(file['CATEGORY'])
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
    nb = MultinomialNB()
    classifier_model = nb.fit(X_train, y_train)
    prediction = nb.predict(vectorizer.transform([news_text]))
    le = preprocessing.LabelEncoder()
    le.fit(["business", "science and technology", "entertainment", "health"])
    prediction = nb.predict(vectorizer.transform([news_text]))
    prediction_result=list(le.inverse_transform(prediction))
    #Number of words
    text_news_2 = TextBlob(news_text)
    number_words=len(text_news_2.words)
    return '''The piece of news is about {}, and the numbers of word
            in the text is {}.'''. format(prediction_result[0], number_words)
#----------------------------------------------------------------------------
@app.callback(
    dash.dependencies.Output(component_id='bar_graph', component_property='figure'),
    [dash.dependencies.Input('output-container-news-text', 'children')])
    
def bar_graph ( news_text):
    text_news_2 = TextBlob(news_text)
    words_analysis = text_news_2.parse().split()
    class_word=[]
    for i0 in words_analysis:
        
        for i1 in i0:
            class_word.append(i1[1])
    count_class_word=Counter(class_word)

    symbols = '''@"{}()[].,:;+-*/&|'<>=~#$%€\ºª_?¿!¡'''

    for x in symbols:
        if x in count_class_word:
            del count_class_word[x]
        
    list_class_word=list((count_class_word.items()))
 
    list_class_word_sorted=sorted(list_class_word, key=lambda x: x[1], reverse=True)

    
    tag_words={'CC':'coordi-conjunction', 'CD':'cardinal-digit', 'DT':'determiner',
              'EX':'existential', 'FW':'foreign-word', 'IN':'prepo/subor-conjunction',
              'JJ':'adjective', 'JJR':'adjective-comparative', 'JJS':'adjective-superlative',
              'LS':'list-marker','MD':'modal', 'NN':'noun-singular', 'NNS':'noun-plural',
              'NNP':'proper-noun-sing', 'NNPS':'proper-noun-plur','PDT':'predeterminer',
              'POS':'possessive-ending','PRP':'personal-pronoun','PRP$':'possessive-pronoun',
              'RB':'adver','RBR':'adverb-comp','RBS':'adverb-superl', 'RP':'particle',
              'TO':'to','UH':'interjection','VB':'verb-base-form','VBD':'verb-past-tense',
              'VBG':'verb-gerund/present-parti','VBN':'verb-past-parti',
              'VBP':'verb-sing.-present-non-3d','VBZ':'verb-3rd-person-sing.-present',
              'WDT':'wh-determiner','WP':'wh-pronoun','WP$':'possessive wh-pronoun','WRB':'wh-abverb'}  
    
    return {'data': [go.Bar(
            x= [tag_words.get(label)for label in[label[0] for label in list_class_word]],
            y= [label[1] for label in list_class_word_sorted])],
            'layout': go.Layout(
            xaxis=dict(tickangle=-20))}
#-------------------------------------------------------------------------
@app.callback(
    dash.dependencies.Output(component_id='output-container-tranlation', component_property='children'),
  
    [dash.dependencies.Input('output-container-news-text', 'children')],
    [dash.dependencies.State('drop-down languages-to', 'value')])

def translation( text_news, to_languages):
    text_news_2 = TextBlob(text_news)
    text_translated= text_news_2.translate(from_lang= 'en', to= to_languages)
    return '{}'.format(text_translated)


app.css.append_css({'external_url': 'https://codepen.io/chriddyp/pen/bWLwgP.css'})

if __name__ == '__main__':
    app.run_server(debug=False)