In [1]:
import pandas as pd
import altair as alt
from altair_saver import save
import numpy as np
from IPython.display import Image
from nltk.stem import PorterStemmer
ps = PorterStemmer()

# Ranking comparison - surverys and dueling-bandints

## Read in data

In [2]:
df_free = pd.read_csv('../clean_data/free_choice_emotion_uw_students.csv')
df_free_labels = pd.read_csv('../data/emotion_labels_free_choice.csv')

df_forced = pd.read_csv('../clean_data/forced_choice_emotion_uw_students.csv')
df_forced_labels = pd.read_csv('../data/emotion_labels.csv')

In [3]:
df_emo_answers_free = df_free.loc[:, 'Q2.1_1':'Q195.1_4'] # get cols with words only
df_emo_answers_forced = df_forced.loc[:, 'Q2.1':'Q195.1'] 

In [4]:
def emotion_df_formated(df_emo_answers, emotion_label, df_labels):
    df_emo_cat = df_emo_answers.copy() 
    df_emo_cat_t = df_emo_cat.T # transpose
    df_emo_cat_t['photo_id'] = df_emo_cat_t.index # get index as col
    df_emo_cat_t = df_emo_cat_t.reset_index(drop=True) # clean index
    df_emo_cat_t_labels = pd.concat([df_emo_cat_t, df_labels], axis=1) # add metadata cols
    df_label =  df_emo_cat_t_labels[df_emo_cat_t_labels['label'] == emotion_label]
    
    return df_label

In [5]:
def formating_words(df, len_words=1, len_letters=2):
    df_stack = df.stack().reset_index(drop=True) # stack as series
    df_stack = df_stack.to_frame(name='emotion') # as DF
    df_stack['emotion'] = df_stack['emotion'].str.strip() # remove blank spaces
    df_stack['emotion'] = df_stack['emotion'].str.lower() # as lower case
    df_stack['emotion'] = df_stack['emotion'].replace({'na':'none'}) 
    df_stack['len_words'] = df_stack['emotion'].str.split().apply(len) # cnt number of words
    df_stack['len_letters'] = df_stack['emotion'].apply(len) # cont number of letters
    # get df with single words of 3 or more letters
    df_stack_single_word = df_stack[(df_stack['len_words'] == len_words) & (df_stack['len_letters'] > len_letters)] 
    return df_stack_single_word

In [6]:
def simple_per_bar_concat(
    df, title='Title', X='percent:Q', Y='emotion:N', \
    width=450, height=250, sort='-x', \
    text_size = 12, label_size = 11, title_size=12, \
    emotion='Some', color1='#0570b0', color2='orange'):
    
    bars = alt.Chart(df, title=title).mark_bar().encode(
        alt.X(X, axis=alt.Axis(format='.0%')),
        y=alt.Y(Y, sort=sort), 
        color=alt.condition(
            alt.datum.emotion == emotion,
            alt.value(color2),
            alt.value(color1)
        ))
    
    text = bars.mark_text(
    align='left',
    baseline='middle',
    dx=3,  # Nudges text to right so it doesn't appear on top of the bar
    fontSize=text_size
    ).encode(
        alt.Text(X, format='.1%')
    )
    
    chart = (bars + text)
    
    return chart

In [7]:
def simple_cnt_bar_concat(
    df, title='Title', X='percent:Q', Y='emotion:N', \
    width=450, height=250, sort='-x', \
    text_size = 12, label_size = 11, title_size=12, \
    emotion='Some', color1='#0570b0', color2='orange'):
    
    bars = alt.Chart(df, title=title).mark_bar().encode(
        alt.X(X, axis=alt.Axis(format='.0%')),
        y=alt.Y(Y, sort=sort), 
        color=alt.condition(
            alt.datum.emotion == emotion,
            alt.value(color2),
            alt.value(color1)
        ))
    
    text = bars.mark_text(
    align='left',
    baseline='middle',
    dx=3,  # Nudges text to right so it doesn't appear on top of the bar
    fontSize=text_size
    ).encode(
        alt.Text(X)
    )
    
    chart = (bars + text)
    
    return chart

In [8]:
def count_freq_labels(df, X="all", col="emotion"):
    if X == "all":
        df_counts = df.stack().reset_index(drop=True).value_counts() # stack as series
        df_counts = df_counts.to_frame('counts') # get value_counts as df
        df_counts['emotion'] = df_counts.index # get index as col
    elif X == "stacked":
        df_counts = df[col].value_counts() # count word frequency
        df_counts = df_counts.to_frame('counts') # get value_counts as df
        df_counts['emotion'] = df_counts.index # get index as col    
    else:
        df_counts = df[X].reset_index(drop=True).value_counts() # stack as series
        df_counts = df_counts.to_frame('counts') # get value_counts as df
        df_counts[X] = df_counts.index # get index as col

    df_counts = df_counts.reset_index(drop=True) # clean index
    df_counts['percent'] = df_counts['counts'] / df_counts['counts'].sum() # compute percentage
    return df_counts

In [9]:
def photo_chart(df_emo_answers, emotion,  i, df_labels, yoffset=220, emotion_st = None, n_mentions=2, photo = None):
    df = emotion_df_formated(df_emo_answers, emotion, df_labels) # add emotion label
    photo_id_list = df['photo_id'].tolist() # photo id to list
    splited_photo_id = np.array_split(photo_id_list, 24) # split photo id by photo
    df = df[df['photo_id'].isin(splited_photo_id[i])] # select photo rows   
    df_url = df[['url']].head(1)  # get url for chart
    
    face = alt.Chart(df_url).mark_image(width=150, height=150, align='center', xOffset=-100, yOffset=yoffset).encode(url='url')
    
    df_emotion_ans = df.drop(['photo_id', 'ethnicity', 'sex', 'age', 'label', 'url', 'photoId'], axis=1) # clean cols
    df_stack_emotion = formating_words(df_emotion_ans) # clean up words
    df_stack_emotion['emotion_ps_steamed'] = df_stack_emotion['emotion'].apply(lambda x: ps.stem(x)) # steam
    df_stack_emotion_count = count_freq_labels(df_stack_emotion, X="stacked", col='emotion_ps_steamed') # group and count
    source = df_stack_emotion_count[df_stack_emotion_count['counts'] >= n_mentions]
    
    chart = simple_per_bar_concat(source,\
                           title=f"Categories with {n_mentions} or more mentions | n= {source['counts'].sum().astype(str)}", \
                           X='percent:Q', Y='emotion:N', emotion=emotion_st)

    if photo == True:
        return chart + face
    else:
        return chart

In [10]:
def next_chart_wrapper(file_path, emotion, head):
    df = pd.read_csv(file_path)
    
    df = df.round(decimals=2)

    df.rename(columns={
        'Target': 'emotion',
        'Score': 'borda score'}, inplace=True)

    title=f"Dueling bandits ranking | Top: {head}"

    chart = simple_cnt_bar_concat(df.head(head), title= title, \
                                  X='borda score:Q', Y='emotion:N', emotion=emotion)
    return chart

## Anger - Female of color

In [11]:
anger_bfa_forced = photo_chart(df_emo_answers_forced, 'anger',  0, df_forced_labels, emotion_st='anger', n_mentions=2, photo=True, yoffset=230)
anger_bfa_free = photo_chart(df_emo_answers_free, 'anger',  0, df_free_labels, emotion_st='angri', n_mentions=2)

In [12]:
path = '../../emo-ranking-next/anger_bipoc_female/borda_lilucb_ranking.csv'

anger_bfa_next = next_chart_wrapper(path, emotion='angry', head=15)

In [13]:
anger_bfa_next_panel = (anger_bfa_forced.properties(width=200) | anger_bfa_free.properties(width=200) | anger_bfa_next.properties(width=200))
anger_bfa_next_panel

In [14]:
save(anger_bfa_next_panel, "../../emotions_dashboard/data/anger_bfa_next_panel.png")

## Anger - Male of color

In [15]:
anger_bma_forced = photo_chart(df_emo_answers_forced, 'anger',  2, df_forced_labels, emotion_st='anger', n_mentions=2, photo=True)
anger_bma_free = photo_chart(df_emo_answers_free, 'anger',  2, df_free_labels, emotion_st='angri', n_mentions=2)

In [16]:
path = '../../emo-ranking-next/anger_bipoc_male/borda_lilucb_ranking.csv'

anger_bma_next = next_chart_wrapper(path, emotion='angry', head=15)

In [17]:
anger_bma_next_panel = (anger_bma_forced.properties(width=200) | anger_bma_free.properties(width=200) | anger_bma_next.properties(width=200))
anger_bma_next_panel

In [18]:
save(anger_bma_next_panel, "../../emotions_dashboard/data/anger_bma_next_panel.png")

## Anger - White Female 

In [19]:
anger_wfa_forced = photo_chart(df_emo_answers_forced, 'anger',  12, df_forced_labels, emotion_st='anger', n_mentions=2, photo=True)
anger_wfa_free = photo_chart(df_emo_answers_free, 'anger',  12, df_free_labels, emotion_st='angri', n_mentions=2)

In [20]:
path = '../../emo-ranking-next/anger_white_female/borda_lilucb_ranking.csv'

anger_wfa_next = next_chart_wrapper(path, emotion='angry', head=10)

In [21]:
anger_wfa_next_panel = (anger_wfa_forced.properties(width=200) | anger_wfa_free.properties(width=200) | anger_wfa_next.properties(width=200))
anger_wfa_next_panel

In [22]:
save(anger_wfa_next_panel, "../../emotions_dashboard/data/anger_wfa_next_panel.png")

## Anger - White male

In [23]:
anger_wma_forced = photo_chart(df_emo_answers_forced, 'anger',  6, df_forced_labels, emotion_st='anger', n_mentions=2, photo=True, yoffset=230)
anger_wma_free = photo_chart(df_emo_answers_free, 'anger',  6, df_free_labels, emotion_st='angri', n_mentions=2)

In [24]:
path = '../../emo-ranking-next/anger_white_male/borda_lilucb_ranking.csv'

anger_wma_next = next_chart_wrapper(path, emotion='angry', head=30)

In [25]:
anger_wma_next_panel = (anger_wma_forced.properties(width=200) | anger_wma_free.properties(width=200) | anger_wma_next.properties(width=200))
anger_wma_next_panel

In [26]:
save(anger_wma_next_panel, "../../emotions_dashboard/data/anger_wma_next_panel.png")

## Disgust - Female of color

In [27]:
disgust_bfa_forced = photo_chart(df_emo_answers_forced, 'disgust',  0, df_forced_labels, emotion_st='disgust', n_mentions=2, photo=True, yoffset=230)
disgust_bfa_free = photo_chart(df_emo_answers_free, 'disgust',  0, df_free_labels, emotion_st='disgust', n_mentions=2)

In [28]:
path = '../../emo-ranking-next/disgust_bipoc_female/borda_lilucb_ranking.csv'

disgust_bfa_next = next_chart_wrapper(path, emotion='disgusted', head=10)

In [29]:
disgust_bfa_next_panel = (disgust_bfa_forced.properties(width=200) | disgust_bfa_free.properties(width=200) | disgust_bfa_next.properties(width=200))
disgust_bfa_next_panel

In [30]:
save(disgust_bfa_next_panel, "../../emotions_dashboard/data/disgust_bfa_next_panel.png")

## Disgust - Male of color

In [31]:
disgust_bma_forced = photo_chart(df_emo_answers_forced, 'disgust',  2, df_forced_labels, emotion_st='disgust', n_mentions=2, photo=True)
disgust_bma_free = photo_chart(df_emo_answers_free, 'disgust',  2, df_free_labels, emotion_st='disgust', n_mentions=2)

In [32]:
path = '../../emo-ranking-next/disgust_bipoc_male/borda_lilucb_ranking.csv'

disgust_bma_next = next_chart_wrapper(path, emotion='disgusted', head=10)

In [33]:
disgust_bma_next_panel = (disgust_bma_forced.properties(width=200) | disgust_bma_free.properties(width=200) | disgust_bma_next.properties(width=200))
disgust_bma_next_panel

In [34]:
save(disgust_bma_next_panel, "../../emotions_dashboard/data/disgust_bma_next_panel.png")

## Disgust - White female

In [35]:
disgust_wfa_forced = photo_chart(df_emo_answers_forced, 'disgust',  4, df_forced_labels, emotion_st='disgust', n_mentions=2, photo=True)
disgust_wfa_free = photo_chart(df_emo_answers_free, 'disgust',  4, df_free_labels, emotion_st='disgust', n_mentions=2)

In [36]:
path = '../../emo-ranking-next/disgust_white_female/borda_lilucb_ranking.csv'

disgust_wfa_next = next_chart_wrapper(path, emotion='disgusted', head=10)

In [37]:
disgust_wfa_next_panel = (disgust_wfa_forced.properties(width=200) | disgust_wfa_free.properties(width=200) | disgust_wfa_next.properties(width=200))
disgust_wfa_next_panel

In [38]:
save(disgust_wfa_next_panel, "../../emotions_dashboard/data/disgust_wfa_next_panel.png")

## Disgust - White male

In [39]:
disgust_wma_forced = photo_chart(df_emo_answers_forced, 'disgust',  6, df_forced_labels, emotion_st='disgust', n_mentions=2, photo=True)
disgust_wma_free = photo_chart(df_emo_answers_free, 'disgust',  6, df_free_labels, emotion_st='disgust', n_mentions=2)

In [40]:
path = '../../emo-ranking-next/disgust_white_male/borda_lilucb_ranking.csv'

disgust_wma_next = next_chart_wrapper(path, emotion='disgusted', head=30)

In [41]:
disgust_wma_next_panel = (disgust_wma_forced.properties(width=200) | disgust_wma_free.properties(width=200) | disgust_wma_next.properties(width=200))
disgust_wma_next_panel

In [42]:
save(disgust_wma_next_panel, "../../emotions_dashboard/data/disgust_wma_next_panel.png")

## Fear - Female of color

In [43]:
fear_bfa_forced = photo_chart(df_emo_answers_forced, 'fear',  0, df_forced_labels, emotion_st='fear', n_mentions=2, photo=True, yoffset=260)
fear_bfa_free = photo_chart(df_emo_answers_free, 'fear',  0, df_free_labels, emotion_st='fear', n_mentions=2)

In [44]:
path = '../../emo-ranking-next/fear_bipoc_female/borda_lilucb_ranking.csv'

fear_bfa_next = next_chart_wrapper(path, emotion='fear', head=25)

In [45]:
fear_bfa_next_panel = (fear_bfa_forced.properties(width=200) | fear_bfa_free.properties(width=200) | fear_bfa_next.properties(width=200))
fear_bfa_next_panel

In [46]:
save(fear_bfa_next_panel, "../../emotions_dashboard/data/fear_bfa_next_panel.png")

## Fear - Male of color

In [47]:
fear_bma_forced = photo_chart(df_emo_answers_forced, 'fear',  2, df_forced_labels, emotion_st='fear', n_mentions=2, photo=True, yoffset=230)
fear_bma_free = photo_chart(df_emo_answers_free, 'fear',  2, df_free_labels, emotion_st='fear', n_mentions=2)

In [48]:
path = '../../emo-ranking-next/fear_bipoc_male/borda_lilucb_ranking.csv'

fear_bma_next = next_chart_wrapper(path, emotion='fear', head=35)

In [49]:
fear_bma_next_panel = (fear_bma_forced.properties(width=200) | fear_bma_free.properties(width=200) | fear_bma_next.properties(width=200))
fear_bma_next_panel

In [50]:
save(fear_bma_next_panel, "../../emotions_dashboard/data/fear_bma_next_panel.png")

## Fear - White female

In [51]:
fear_wfa_forced = photo_chart(df_emo_answers_forced, 'fear',  20, df_forced_labels, emotion_st='fear', n_mentions=2, photo=True)
fear_wfa_free = photo_chart(df_emo_answers_free, 'fear',  20, df_free_labels, emotion_st='fear', n_mentions=2)

In [52]:
path = '../../emo-ranking-next/fear_white_female/borda_lilucb_ranking.csv'

fear_wfa_next = next_chart_wrapper(path, emotion='fear', head=15)

In [53]:
fear_wfa_next_panel = (fear_wfa_forced.properties(width=200) | fear_wfa_free.properties(width=200) | fear_wfa_next.properties(width=200))
fear_wfa_next_panel

In [54]:
save(fear_wfa_next_panel, "../../emotions_dashboard/data/fear_wfa_next_panel.png")

## Fear - White male

In [55]:
fear_wma_forced = photo_chart(df_emo_answers_forced, 'fear',  6, df_forced_labels, emotion_st='fear', n_mentions=2, photo=True, yoffset=250)
fear_wma_free = photo_chart(df_emo_answers_free, 'fear',  6, df_free_labels, emotion_st='fear', n_mentions=2)

In [56]:
path = '../../emo-ranking-next/fear_white_male/borda_lilucb_ranking.csv'

fear_wma_next = next_chart_wrapper(path, emotion='fear', head=15)

In [57]:
fear_wma_next_panel = (fear_wma_forced.properties(width=200) | fear_wma_free.properties(width=200) | fear_wma_next.properties(width=200))
fear_wma_next_panel

In [58]:
save(fear_wma_next_panel, "../../emotions_dashboard/data/fear_wma_next_panel.png")

## Happiness - Female of color

In [59]:
happiness_bfa_forced = photo_chart(df_emo_answers_forced, 'happiness',  0, df_forced_labels, emotion_st='happi', n_mentions=2, photo=True)
happiness_bfa_free = photo_chart(df_emo_answers_free, 'happiness',  0, df_free_labels, emotion_st='happi', n_mentions=2)

In [60]:
path = '../../emo-ranking-next/happiness_bipoc_female/borda_lilucb_ranking.csv'

happiness_bfa_next = next_chart_wrapper(path, emotion='happy', head=30)

In [61]:
happiness_bfa_next_panel = (happiness_bfa_forced.properties(width=200) | happiness_bfa_free.properties(width=200) | happiness_bfa_next.properties(width=200))
happiness_bfa_next_panel

In [62]:
save(happiness_bfa_next_panel, "../../emotions_dashboard/data/happiness_bfa_next_panel.png")

## Happiness - Male of color

In [63]:
happiness_bma_forced = photo_chart(df_emo_answers_forced, 'happiness',  2, df_forced_labels, emotion_st='happi', n_mentions=2, photo=True, yoffset=230)
happiness_bma_free = photo_chart(df_emo_answers_free, 'happiness',  2, df_free_labels, emotion_st='happi', n_mentions=2)

In [64]:
path = '../../emo-ranking-next/happiness_bipoc_male/borda_lilucb_ranking.csv'

happiness_bma_next = next_chart_wrapper(path, emotion='happy', head=10)

In [65]:
happiness_bma_next_panel = (happiness_bma_forced.properties(width=200) | happiness_bma_free.properties(width=200) | happiness_bma_next.properties(width=200))
happiness_bma_next_panel

In [66]:
save(happiness_bma_next_panel, "../../emotions_dashboard/data/happiness_bma_next_panel.png")