<a href="https://colab.research.google.com/github/thiagofuruchima/disaster_message_classification/blob/main/app%5CML_Disaster_WebApp.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install flask-ngrok



In [2]:
from flask_ngrok import run_with_ngrok

In [3]:
import json
import plotly
import pandas as pd

from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize

from flask import Flask
from flask import render_template, request, jsonify
from plotly.graph_objs import Bar
import joblib
from sqlalchemy import create_engine

In [4]:
import nltk
nltk.download(['punkt', 'wordnet', 'averaged_perceptron_tagger'])

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [5]:
app = Flask(__name__)
run_with_ngrok(app)   #starts ngrok when the app is run

In [6]:
# load data
engine = create_engine('sqlite:////content/DisasterResponse.db')
df = pd.read_sql_table('CLEAN_MESSAGES', engine)

In [7]:
def tokenize(text):
    tokens = word_tokenize(text)
    lemmatizer = WordNetLemmatizer()

    clean_tokens = []
    for tok in tokens:
        clean_tok = lemmatizer.lemmatize(tok).lower().strip()
        clean_tokens.append(clean_tok)

    return clean_tokens

# index webpage displays cool visuals and receives user input text for model
@app.route('/')
@app.route('/index')
def index():
    
 
    categories = df.iloc[:,4:].sum().sort_values(ascending=False)
    category_names = list(categories.index.str.title().str.replace('_', ' '))
    category_counts = list(categories)

    genre_counts = df.groupby('genre').count()['message']
    genre_names = list(genre_counts.index.str.title())

    related_percentage = (df['related']>0).mean().round(2)*100
    related_counts = [related_percentage, 100-related_percentage]
    related_names = ['Related', 'Non Related']
    
    # create visuals
    graphs = [
        {
            'data': [
                Bar(
                    x=category_names,
                    y=category_counts
                )
            ],

            'layout': {
                'title': 'Top Categories',
                'yaxis': {
                    'title': "Count"
                },
                'xaxis': {
                    'title': "Category"
                }
            }
        },

        {
            'data': [
                Bar(
                    x=related_names,
                    y=related_counts
                )
            ],

            'layout': {
                'title': 'Percentage of Messages Disaster Related',
                'yaxis': {
                    'title': "Percentage"
                },
                'xaxis': {
                    'title': "Related x Non Related"
                },
                'color': '[Red, Blue]'
            }
        },
    ]

    # encode plotly graphs in JSON
    ids = ["graph-{}".format(i) for i, _ in enumerate(graphs)]
    graphJSON = json.dumps(graphs, cls=plotly.utils.PlotlyJSONEncoder)

    # render web page with plotly graphs
    return render_template('master.html', ids=ids, graphJSON=graphJSON)


# web page that handles user query and displays model results
@app.route('/go')
def go():
    # save user input in query
    query = request.args.get('query', '') 

    # use model to predict classification for query
    classification_labels = model.predict([query])[0]
    classification_results = dict(zip(df.columns[4:], classification_labels))

    # This will render the go.html Please see that file. 
    return render_template(
        'go.html',
        query=query,
        classification_result=classification_results
    )


def main():
    app.run(host='0.0.0.0', port=3001, debug=True)

In [8]:
# load model
model = joblib.load("/content/classifier.pkl")


Trying to unpickle estimator CountVectorizer from version 0.24.0 when using version 0.22.2.post1. This might lead to breaking code or invalid results. Use at your own risk.


Trying to unpickle estimator TfidfTransformer from version 0.24.0 when using version 0.22.2.post1. This might lead to breaking code or invalid results. Use at your own risk.


Trying to unpickle estimator DecisionTreeClassifier from version 0.24.0 when using version 0.22.2.post1. This might lead to breaking code or invalid results. Use at your own risk.


Trying to unpickle estimator RandomForestClassifier from version 0.24.0 when using version 0.22.2.post1. This might lead to breaking code or invalid results. Use at your own risk.


Trying to unpickle estimator MultiOutputClassifier from version 0.24.0 when using version 0.22.2.post1. This might lead to breaking code or invalid results. Use at your own risk.


Trying to unpickle estimator Pipeline from version 0.24.0 when using version 0.22.2.post1. This might l

In [9]:
genre_counts = df.groupby('genre').count()['message']
genre_names = list(genre_counts.index)

In [10]:
df.head()

Unnamed: 0,id,message,original,genre,related,request,offer,aid_related,medical_help,medical_products,search_and_rescue,security,military,child_alone,water,food,shelter,clothing,money,missing_people,refugees,death,other_aid,infrastructure_related,transport,buildings,electricity,tools,hospitals,shops,aid_centers,other_infrastructure,weather_related,floods,storm,fire,earthquake,cold,other_weather,direct_report
0,2,Weather update - a cold front from Cuba that c...,Un front froid se retrouve sur Cuba ce matin. ...,direct,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,7,Is the Hurricane over or is it not over,Cyclone nan fini osinon li pa fini,direct,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0
2,8,Looking for someone but no name,"Patnm, di Maryani relem pou li banm nouvel li ...",direct,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,9,UN reports Leogane 80-90 destroyed. Only Hospi...,UN reports Leogane 80-90 destroyed. Only Hospi...,direct,1,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0
4,12,"says: west side of Haiti, rest of the country ...",facade ouest d Haiti et le reste du pays aujou...,direct,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [11]:
categories = df.iloc[:,4:].sum().sort_values(ascending=False)
category_names = list(categories.index.str.title().str.replace('_', ' '))
category_counts = list(categories)

category_names

['Related',
 'Aid Related',
 'Weather Related',
 'Direct Report',
 'Request',
 'Other Aid',
 'Food',
 'Earthquake',
 'Storm',
 'Shelter',
 'Floods',
 'Medical Help',
 'Infrastructure Related',
 'Water',
 'Other Weather',
 'Buildings',
 'Medical Products',
 'Transport',
 'Death',
 'Other Infrastructure',
 'Refugees',
 'Military',
 'Search And Rescue',
 'Money',
 'Electricity',
 'Cold',
 'Security',
 'Clothing',
 'Aid Centers',
 'Missing People',
 'Hospitals',
 'Fire',
 'Tools',
 'Shops',
 'Offer',
 'Child Alone']

In [12]:
(df['related']>0).mean().round(2)

0.77

In [13]:
if __name__ == '__main__':
    # main()
    app.run()

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)


 * Running on http://acf64d096e77.ngrok.io
 * Traffic stats available on http://127.0.0.1:4040


127.0.0.1 - - [17/Jan/2021 19:54:04] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [17/Jan/2021 19:54:05] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
