In [12]:
import pandas as pd
import altair as alt

In [22]:
def act_scene(row):
    act = str(row["act"]).strip()
    scene = str(row["scene"]).strip()
    return act + "." + scene

def words_in_sentence(text):
    return len(text.split())

df_oth = pd.read_csv("csvs/othello_by_uninterupted_speaker.csv", index_col=0)
df_oth = df_oth[df_oth["speaker"] == "OTHELLO"]
df_oth["speaker"] = df_oth["speaker"].str.title()
df_oth["act_scene"] = df_oth.apply(act_scene, axis=1)
df_oth["words_in_text"] = df_oth["text"].apply(words_in_sentence)
df_oth['cumulative_words_spoken'] = df_oth['words_in_text'].cumsum()

df_ham = pd.read_csv("csvs/hamlet_by_uninterupted_speaker.csv", index_col=0)
df_ham = df_ham[df_ham["speaker"] == "HAMLET"]
df_ham["speaker"] = df_ham["speaker"].str.title()
df_ham["act_scene"] = df_ham.apply(act_scene, axis=1)
df_ham["words_in_text"] = df_ham["text"].apply(words_in_sentence)
df_ham['cumulative_words_spoken'] = df_ham['words_in_text'].cumsum()

df_rich = pd.read_csv("csvs/richardiii_by_uninterupted_speaker.csv", index_col=0)
df_rich = df_rich[df_rich["speaker"] == "RICHARD"]
df_rich["speaker"] = df_rich["speaker"].str.title()
df_rich["speaker"] = df_rich["speaker"] + " III"
df_rich["act_scene"] = df_rich.apply(act_scene, axis=1)
df_rich["words_in_text"] = df_rich["text"].apply(words_in_sentence)
df_rich['cumulative_words_spoken'] = df_rich['words_in_text'].cumsum()

df_titular = pd.concat([df_oth, df_ham], axis=0)
df_titular = pd.concat([df_titular, df_rich], axis=0)

In [23]:
domain = ["Hamlet", "Othello", "Richard III"]
range_ = ["#077187", "#FFA600", "#DB8180"]

df_titular_filtered = df_titular[["speaker", "act_scene", "words_in_text"]]

alt.Chart(df_titular_filtered).transform_window(
    cumulative='sum(words_in_text)',
     sort=[{"field": "act_scene"}],
     groupby=['speaker'],
).mark_line().encode(
    x=alt.X('act_scene:N', title="Act, Scene"),
    y=alt.Y('cumulative:Q', title="Cumulative Words Spoken"),
    color=alt.Color('speaker:N', title="Character", scale=alt.Scale(domain=domain, range=range_))
).properties(
    title="Cumulative Words Spoken by Titular Shakespeare Characters"
)

In [16]:
df_titular.groupby('speaker')['cumulative_words_spoken'].max()

speaker
Hamlet         11537
Othello         6210
Richard III     8789
Name: cumulative_words_spoken, dtype: int64

In [26]:
df_oth = pd.read_csv("csvs/othello_by_uninterupted_speaker.csv")
df_oth = df_oth[df_oth["speaker"].isin(["IAGO", "OTHELLO"])]

domain = ["Othello", "Iago"]
range_ = ["#FFA600", "#D11033"]


df_oth["act_scene"] = df_oth.apply(act_scene, axis=1)
df_oth["words_in_text"] = df_oth["text"].apply(words_in_sentence)
df_oth["speaker"] = df_oth["speaker"].str.title()

df_oth_filtered = df_oth[["speaker", "act_scene", "words_in_text"]]

alt.Chart(df_oth_filtered).transform_window(
    cumulative='sum(words_in_text)',
     sort=[{"field": "act_scene"}],
     groupby=['speaker']
).mark_line().encode(
    x=alt.X('act_scene:N', title = "Act, Secene"),
    y=alt.Y('cumulative:Q', title="Cumulative Words Spoken"),
    color=alt.Color('speaker:N', title="Character", scale=alt.Scale(domain=domain, range=range_))
).properties(
    title = "Cumulative Words Spoken by Iago and Othello",
    width= 400
)

In [19]:
df_oth.groupby("speaker")["words_in_text"].agg(["count", "max", "mean"])

Unnamed: 0_level_0,count,max,mean
speaker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Iago,273,259,30.692308
Othello,273,344,22.747253


In [20]:
df_oth.groupby("speaker")["act_scene"].nunique()

speaker
Iago       13
Othello    12
Name: act_scene, dtype: int64

In [21]:
df_oth = pd.read_csv("csvs/othello_by_uninterupted_speaker.csv")


df_oth["words_in_text"] = df_oth["text"].apply(words_in_sentence)


df_oth[df_oth["words_in_text"] == max(df_oth["words_in_text"])]

Unnamed: 0.1,Unnamed: 0,act,scene,sentence_number,speaker,text,words_in_text
116,116,1,3,40,OTHELLO,"Her father loved me, oft invited me, Still que...",344
