In [1]:
import warnings

import joblib
import pandas as pd
import plotly
import plotly.graph_objs as go
import plotly.io as pio
from plotly.subplots import make_subplots
from sqlalchemy import create_engine

warnings.filterwarnings("ignore", category=UserWarning)

In [2]:
def tokenize(text):
    """
    :arg text: (string) text to process
    :return clean_tokens: (list) of lemmatized tokens
    """

    # Normalize text
    text = re.sub(r'[^a-zA-Z0-9]', " ", text)
    # Tokenize text
    tokens = word_tokenize(text)
    # Remove stop words
    tokens = [w for w in tokens if w not in stopwords.words('english')]

    lemmatizer = WordNetLemmatizer()
    clean_tokens = []
    for tok in tokens:
        clean_tok = lemmatizer.lemmatize(tok.lower().strip())
        clean_tokens.append(clean_tok)

    return clean_tokens

In [3]:
# load data
engine = create_engine('sqlite:///data/DisasterResponse.db')
df = pd.read_sql_table('DisasterResponse', engine)

# load model
model = joblib.load("models/model.pkl")

In [4]:
import plotly.express as px

In [29]:
cat_perc = df.drop(['message', 'original', 'genre'], axis=1).sum()/len(categories)*100
cat_perc = cat_perc.sort_values()
cat_perc

child_alone                0.000000
offer                      0.450124
shops                      0.457753
tools                      0.606523
fire                       1.075720
hospitals                  1.079535
missing_people             1.136754
aid_centers                1.178714
clothing                   1.544917
security                   1.796681
cold                       2.021743
electricity                2.029372
money                      2.304024
search_and_rescue          2.761778
military                   3.280565
refugees                   3.337784
other_infrastructure       4.390616
death                      4.554644
transport                  4.581347
medical_products           5.008583
buildings                  5.084875
other_weather              5.248903
water                      6.378028
infrastructure_related     6.503910
medical_help               7.949647
floods                     8.220484
shelter                    8.827007
storm                      9

In [25]:
genre_counts = df.groupby('genre').count()['message']/df['message'].count()*100
genre_names = list(genre_counts.index)
cat_names = cat_perc.index.to_list()

# create visuals
graphs = [
    {
        'data': [
            go.Bar(x=genre_names, y=genre_counts, name='Distribution by Message Genres')
        ],

        'layout': {
            'title': 'Distribution of Message Genres',
            'yaxis': {'title_text': "Percentage [%]", 'showgrid': True},
            'xaxis': {'title_text': "Genre"}
        }
    },
    {
        'data': [
            go.Bar(x=cat_perc, y=cat_names, name='Distribution by Categories', orientation='h')
        ],

        'layout': {
            'title': 'Distribution by Categories',
            'yaxis': {'title_text': 'Category'},
            'xaxis': {'title_text': 'Percentage [%]', 'showgrid': True}
        }
    }
]

In [28]:
p1_data = graphs[0]['data']
p2_data = graphs[1]['data']
p1_layout = graphs[0]['layout']
p2_layout = graphs[1]['layout']

fig = make_subplots(rows=5, cols=1,
                    specs=[
                        [{}],
                        [{"rowspan": 4}],
                        [{}],
                        [{}],
                        [{}]
                    ],
                    subplot_titles=(p1_layout['title'], p2_layout['title'])
                   )

fig.add_trace(p1_data[0], row=1, col=1)
fig.add_trace(p2_data[0], row=2, col=1)

fig.update_xaxes(p1_layout['xaxis'], row=1, col=1)
fig.update_xaxes(p2_layout['xaxis'], row=2, col=1)

fig.update_yaxes(p1_layout['yaxis'], row=1, col=1)
fig.update_yaxes(p2_layout['yaxis'], row=2, col=1)

fig.update_layout(height=1600, width=900, showlegend=False, template='seaborn')
fig.show()