In [65]:
import os
import sys
import spacy
import plotly
import pandas as pd
from collections import Counter
from plotly.subplots import make_subplots
import plotly.graph_objects as go

# ---------------------------------------------------
# Load up spaCy and designate the subfolder location.
# ---------------------------------------------------

dir_path = os.getcwd()
subfolder = dir_path + '/storage/'
nlp = spacy.load('en_core_web_sm')


def get_dialogue_info(character_csv: str) -> tuple:
    '''
    Given a file name from the "storage" subfolder, return the character's
    name, their responses, and their script notes (approximately their 
    sentiments) as a mixed tuple of string and spaCy docs.

                Parameters:
                    character_csv (str):
                    The name of the file found in the "storage" subfolder.

                Returns:
                    character_name, words, nouns, verbs, adjs, sentiment_doc (tuple):
                    The character's name, their responses, and their responses
                    sorted based on the type of word.

    '''
    # ---------------------------------------------------
    # Recover character's name from filename and also
    # concatenate every response into a single string for
    # spaCy to handle.
    # ---------------------------------------------------
    character_name = ' '.join(map(lambda x: str(x).capitalize(), character_csv.split('_'))).replace('.csv', '')
    character_csv = pd.read_csv(subfolder + character_csv)
    doc = nlp(' '.join(map(lambda x: str(x).lower(), character_csv['RESPONSE TEXT'])))

    # ---------------------------------------------------
    # Use list comprehensions to get words and specific
    # types of words for ease-of-use with a plotly
    # dropdown graph.
    # ---------------------------------------------------
    words = [token.text
             for token in doc
             if not token.is_stop and not token.is_punct]

    nouns = [token.text
             for token in doc
             if (not token.is_stop and
                 not token.is_punct and
                 token.pos_ == 'NOUN')]

    verbs = [token.text
             for token in doc
             if (not token.is_stop and
                 not token.is_punct and
                 token.pos_ == 'VERB')]

    adjs = [token.text
             for token in doc
             if (not token.is_stop and
                 not token.is_punct and
                 token.pos_ == 'ADJ')]

    # ---------------------------------------------------
    # Create a sentiment doc for spaCy to handle, similar
    # to doc above.
    # ---------------------------------------------------
    script_notes = [str(entry).split('/')[-1].strip().lower() 
                      for entry in character_csv['SCRIPT NOTES'].fillna('neutral')]
    sentiment_doc = nlp(' '.join(map(lambda x: str(x).lower(), script_notes)))
    sentiment = [token.text
                 for token in sentiment_doc]

    return character_name, words, nouns, verbs, adjs, sentiment

def make_graph(character_filename: str) -> plotly.graph_objects.Figure:
        
        try:
            # ---------------------------------------------------
            # Allow to run with or without system arguments as
            # a parameter.
            # ---------------------------------------------------
            character_dialogue = get_dialogue_info(character_filename)
            words, nouns, verbs, adjs = character_dialogue[1:-1]

            fig = make_subplots(rows=2, cols=1, subplot_titles=['{character}\'s Most Common Words'.format(character=character_dialogue[0]), 
                                                                'Most Common Sentiments'])

            final_cols = character_dialogue[1:-1]
            final_names = ['All', 'Noun', 'Verb', 'Adjective']

            # ---------------------------------------------------
            # Iterate over the dialogue columns to create a 2x1
            # subplot of most common words and frequency, as well
            # as the most common sentiments and frequency.
            # ---------------------------------------------------

            for column, column_name, color in zip(final_cols, final_names, ['#6afcb8', '#6aaefc', '#6e6afc', '#fcb86a']):
                    if column_name == 'All':
                        temp = pd.DataFrame(Counter(column).most_common(10))
                        fig.add_trace(
                            go.Bar(
                                x=pd.DataFrame(temp)[0],
                                y=pd.DataFrame(temp)[1],
                                visible=True,
                            ),
                            row=1,
                            col=1
                        )
                    else:
                        temp = pd.DataFrame(Counter(column).most_common(10))
                        fig.add_trace(
                            go.Bar(
                                x=pd.DataFrame(temp)[0],
                                y=pd.DataFrame(temp)[1],
                                visible=False,
                                marker_color=color
                            ),
                            row=1,
                            col=1
                        )
            fig.add_trace(
                go.Bar(
                    x=pd.DataFrame(Counter(character_dialogue[-1]).most_common(10))[0],
                    y=pd.DataFrame(Counter(character_dialogue[-1]).most_common(10))[1]
                ),
                row=2,
                col=1
            )

            # ---------------------------------------------------
            # Hide the legend since the dropdown menu will show 
            # what's selected anyway
            # ---------------------------------------------------
            fig.update(layout_showlegend=False)

            # Dropdown
            fig.update_layout(
                updatemenus=[
                    dict(
                        active=0,
                        buttons=list([
                            dict(label='All',
                                method='update',
                                args=[{'visible': [True, False, False, False, True]}
                                        ]),
                            dict(label='Verbs',
                                method='update',
                                args=[{'visible': [False, True, False, False, True]},
                                        ]),
                            dict(label='Nouns',
                                method='update',
                                args=[{'visible': [False, False, True, False, True]},
                                        ]),
                            dict(label='Adjectives',
                                method='update',
                                args=[{'visible': [False, False, False, True, True]},
                                        ]),
                        ])
                    )
                ]
            )

            fig.show()

        # ---------------------------------------------------
        # Bare minimum error handling: needs improvement.
        # ---------------------------------------------------
        except Exception as e:
            print(e)

if __name__ == '__main__':

        if len(sys.argv) == 1:
            print('Enter the filename for the character (i.e., "nick_valentine.csv" for the character Nick Valentine):')
            character_filename = input()

        else:
            character_filename = sys.argv[1]

        if '/storage' in character_filename:
            character_dialogue = character_filename.split('/')[-1]

In [61]:
'/storage' in '/storage/captain_zao.csv'

True