This project was inspired by [u/Gandagorn](https://www.reddit.com/u/Gandagorn/) [post](https://www.reddit.com/r/dataisbeautiful/comments/kkp3lr/oc_interaction_intensity_in_the_simpsons/) on Reddit. 
I am a big fan the The Office show. It is a rough estimation on who speaks to whom. The interaction between characters are counted by checking if they are speaking in the same scene.

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
import networkx as nx
import random
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
office =  pd.read_csv('/kaggle/input/the-office-script-lines/The_Office_lines.csv')
office.head(5)

**Prepare Data**

In [None]:
#Create episode_id for comparation later 
office['episode_id'] = office['season'].astype(str)+office['episode'].astype(str)
#get 20 main characters 
main_characters = list(office['speaker'].value_counts().index[:20])

main_characters_suffle = random.sample(main_characters, len(main_characters))
#print(main_characters_suffle)
character_dict = {character: i for i, character in enumerate(main_characters)}
id_dict = {i: character for i, character in enumerate(main_characters)}

In [None]:
#create networkx object
G = nx.Graph()

**Get coversation info betwwen characters**

In [None]:
#%% Get coversation info betwwen characters
scene_before = ""
episode_id_before = -1
for i in range(len(office)):
    #dont need this code from original repo 
    #     # is something spoken?
    #     if office["speaker"].iloc[i] == False or sorted_text_df["speaking_line"].iloc[i] == "false":
    #         # print(sorted_text_df["speaking_line"].iloc[i])
    #         continue

    # check if episode and location of text is the same
    if scene_before != office["scene"].iloc[i] or office["episode_id"].iloc[i] != episode_id_before:
        scene_before = office.iloc[i]["scene"]
        episode_id_before = office.iloc[i]["episode_id"]
        continue

    scene_before = office.iloc[i]["scene"]
    episode_id_before = office.iloc[i]["episode_id"]

    # get characters
    c1 = office["speaker"].iloc[i]
    c2 = office["speaker"].iloc[i+1]

    # if type(first) == float or type(second) == float:
    #fail check for character not in the interested list 
    if c1 not in main_characters_suffle or c2 not in main_characters_suffle:
        continue

    sorted_characters = sorted([c1, c2])
    try:
        #add +1 to weight if characters have conversation on the same sence
        G.edges[sorted_characters]["weight"] += 1
    except KeyError:
        G.add_edge(sorted_characters[0], sorted_characters[1], weight=1)

**Plot the conversations between characters**

In [None]:
def plot_fig():
    plt.figure(figsize=(25, 25))
    pos = nx.circular_layout(G)
    edges = G.edges()
   
    #darker colors for higher weigth
    colors = [G[u][v]['weight']**0.39 for u, v in edges]
    #only looking into characters that had conversation more than 10 times
    weights = [G[u][v]['weight']**0.4 if G[u][v]['weight'] > 10 else 0 for u, v in edges]
    
    #colors
    cmap = matplotlib.cm.get_cmap('plasma_r')
 
    nx.draw_networkx(G, pos, width=weights, edge_color=colors,
                     node_color="black", edge_cmap=cmap, with_labels=False, alpha=0.99)
   

    labels_pos = {name: [pos_list[0], pos_list[1]-0.04] for name, pos_list in pos.items()}
    nx.draw_networkx_labels(G, labels_pos, font_size=35, font_family="sans-serif",
                            font_color="#000000", font_weight='normal')

    ax = plt.gca()
    # ax.set_solid_capstyle("butt")
    ax.margins(0.25)
    plt.axis("equal")
    plt.tight_layout()
    

    #plt.savefig("Theoffice_conversation")



In [None]:
plot_fig()