In [32]:
import pandas as pd
import plotly.graph_objects as go
import networkx as nx
from ipywidgets import widgets
from IPython.display import display

dframe = pd.read_csv('/Users/alan/Desktop/gender-gap-in-film-industry/network/imdb_all_v2.csv')
gender_frame = pd.read_csv('/Users/alan/Desktop/gender-gap-in-film-industry/network/gender.csv')

star_list = []
star_id_list = []
for i in range(0, len(dframe)):
    try:
        star_split = dframe['star'][i].split(',')
        id_split = dframe['star_id'][i].split(',')
        if len(star_split) == len(id_split):
            star_list += star_split
            star_id_list += id_split
    except:
        pass
    
name_id = pd.DataFrame(data = [star_list, star_id_list]).T
name_id = name_id.drop_duplicates()
name_id.columns = ['name', 'id']
name_id = name_id.set_index('id')
gender_frame = gender_frame.set_index('id')

name_id_gender = name_id.join(gender_frame, how = 'outer', on = 'id')
name_id_gender = name_id_gender.drop_duplicates('name')
name_id_gender = name_id_gender.set_index('name')

df = dframe[(2010 <= dframe['year']) & (dframe['year'] <= 2020)]
df = df.sort_values('num_votes', ascending = False).reset_index(drop = True)
df = df.iloc[0:100].reset_index(drop = True)

slider = widgets.IntRangeSlider(
    value=[2010, 2020],
    min=1920,
    max=2021,
    step=1,
    description='Year:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d',
    layout=widgets.Layout(width='500px')
)

edges = []
nodes = []
for i in range(0, len(df)):
    try:
        this_star_list = df['star'][i].split(',')
        if len(this_star_list) == 4:
            nodes += this_star_list
            for m in range(len(this_star_list)):
                for n in range(m + 1, len(this_star_list)):
                    edges.append((this_star_list[m], this_star_list[n]))
    except:
        pass

G = nx.Graph(edges)
pos = nx.spring_layout(G)

edge_x = []
edge_y = []
for edge in G.edges():
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_x.append(x0)
    edge_x.append(x1)
    edge_x.append(None)
    edge_y.append(y0)
    edge_y.append(y1)
    edge_y.append(None)

edge_trace = go.Scatter(
    x=edge_x, y=edge_y,
    line=dict(width=0.3, color='#888'),
    hoverinfo='none',
    mode='lines')

node_x = []
node_y = []
for node in G.nodes():
    x, y = pos[node]
    node_x.append(x)
    node_y.append(y)
    
node_adjacencies = []
node_gender = []
node_text = []

for node, adjacencies in enumerate(G.adjacency()):
    node_adjacencies.append(len(adjacencies[1]))
    try:
        node_gender.append(name_id_gender.loc[adjacencies[0]]['gender'])
            
    except:
        node_gender.append(None)
    node_text.append(str(adjacencies[0]) + ': ' + str(len(adjacencies[1])) + ' co-occurences')
    
node_trace = go.Scatter(
    x=node_x, y=node_y,
    mode='markers',
    hoverinfo='text',
    marker=dict(
        showscale=False,
        # colorscale options
        #'Greys' | 'YlGnBu' | 'Greens' | 'YlOrRd' | 'Bluered' | 'RdBu' |
        #'Reds' | 'Blues' | 'Picnic' | 'Rainbow' | 'Portland' | 'Jet' |
        #'Hot' | 'Blackbody' | 'Earth' | 'Electric' | 'Viridis' |
        colorscale='Bluered',
        # reversescale = True,
        color = node_gender,
        opacity = 0.9,
        size = node_adjacencies,
        line_width = 0))
    
node_trace.text = node_text   

fig = go.FigureWidget(data=[edge_trace, node_trace],
             layout=go.Layout(
                title='Movie Star Co-occurences',
                titlefont_size=16,
                showlegend=False,
                hovermode='closest',
                margin=dict(b=20,l=5,r=5,t=40),
                plot_bgcolor='rgba(0,0,0,0)',
                xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                yaxis=dict(showgrid=False, zeroline=False, showticklabels=False), width=800, height=600)
                )

In [33]:
def response(change):
    df = dframe[(slider.value[0] <= dframe['year']) & (dframe['year'] <= slider.value[1])]
    df = df.sort_values('num_votes', ascending = False).reset_index(drop = True)
    df = df.iloc[0:100].reset_index(drop = True)

    edges = []
    nodes = []
    for i in range(0, len(df)):
        try:
            this_star_list = df['star'][i].split(',')
            if len(this_star_list) == 4:
                nodes += this_star_list
                for m in range(len(this_star_list)):
                    for n in range(m + 1, len(this_star_list)):
                        edges.append((this_star_list[m], this_star_list[n]))
        except:
            pass

    G = nx.Graph(edges)
    # pos = nx.kamada_kawai_layout(G)
    pos = nx.spring_layout(G)

    edge_x = []
    edge_y = []
    for edge in G.edges():
        x0, y0 = pos[edge[0]]
        x1, y1 = pos[edge[1]]
        edge_x.append(x0)
        edge_x.append(x1)
        edge_x.append(None)
        edge_y.append(y0)
        edge_y.append(y1)
        edge_y.append(None)

    edge_trace = go.Scatter(
        x=edge_x, y=edge_y,
        line=dict(width=0.3, color='#888'),
        hoverinfo='none',
        mode='lines')

    node_x = []
    node_y = []
    for node in G.nodes():
        x, y = pos[node]
        node_x.append(x)
        node_y.append(y)

    node_adjacencies = []
    node_gender = []
    node_text = []

    for node, adjacencies in enumerate(G.adjacency()):
        node_adjacencies.append(len(adjacencies[1]))
        try:
            node_gender.append(name_id_gender.loc[adjacencies[0]]['gender'])

        except:
            node_gender.append(None)
        node_text.append(str(adjacencies[0]) + ': ' + str(len(adjacencies[1])) + ' co-occurences')

    node_trace = go.Scatter(
        x=node_x, y=node_y,
        mode='markers',
        hoverinfo='text',
        marker=dict(
            showscale=False,
            colorscale='Bluered',
            color = node_gender,
            opacity = 0.9,
            size = node_adjacencies,
            line_width = 0))

    node_trace.text = node_text   

    with fig.batch_update():
        fig.data[0].x = edge_trace.x
        fig.data[0].y = edge_trace.y
        fig.data[1].marker = node_trace.marker
        fig.data[1].text = node_trace.text
        fig.data[1].x = node_trace.x
        fig.data[1].y = node_trace.y

slider.observe(response, names="value")

container = widgets.HBox([slider])
network = widgets.VBox([container,
              fig])
display(network)

VBox(children=(HBox(children=(IntRangeSlider(value=(2010, 2020), continuous_update=False, description='Year:',…