In [104]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import networkx as nx
import matplotlib.pyplot as plt

In [5]:
# Import csv as dataframes

brasil = pd.read_csv('https://raw.githubusercontent.com/nicoIwas/ars/main/data/brasil_sub.csv?token=GHSAT0AAAAAACLDY3WMSELGHKVTSZHLTVGGZLSBDOA')
brasilivre = pd.read_csv('https://raw.githubusercontent.com/nicoIwas/ars/main/data/brasilivre_sub.csv?token=GHSAT0AAAAAACLDY3WNOHWN47JLTM32RCJGZLSBEYA')

## Formatting data from r/Brasil

In [78]:
# Cada usuário com as subreddits únicas

max_unique_values = max(brasil.apply(lambda x: len(x.unique())))
df_user_brasil = pd.DataFrame(index=range(max_unique_values))


for username in brasil.columns:
  df_user_brasil.loc[:, username] = pd.DataFrame(brasil[username].unique())

In [86]:
# Todas subreddits relacionadas com a comunidade

lista_sub_brasil = np.array([])
for username in brasil.columns:
  lista_sub_brasil = np.concatenate((lista_sub_brasil, brasil[username].unique()))
df_sub_brasil = pd.DataFrame({'Subreddits_related': lista_sub_brasil})
df_sub_brasil = df_sub_brasil.drop_duplicates(subset=['Subreddits_related']) # Remove duplicates
df_sub_brasil = df_sub_brasil[df_sub_brasil['Subreddits_related'] != 'brasil'] # Remove r/brasil



## Formatting data from r/brasilivre

In [100]:
# Cada usuário com as subreddits únicas

max_unique_values = max(brasilivre.apply(lambda x: len(x.unique())))
df_user_brasilivre = pd.DataFrame(index=range(max_unique_values))


for username in brasilivre.columns:
  df_user_brasilivre.loc[:, username] = pd.DataFrame(brasilivre[username].unique())

In [121]:
# Todas subreddits relacionadas com a comunidade

lista_sub_brasilivre = np.array([])
for username in brasilivre.columns:
  lista_sub_brasilivre = np.concatenate((lista_sub_brasilivre, brasilivre[username].unique()))
df_sub_brasilivre = pd.DataFrame({'Subreddits_related': lista_sub_brasilivre})
df_sub_brasilivre = df_sub_brasilivre.drop_duplicates(subset=['Subreddits_related']) # Remove duplicates
df_sub_brasilivre = df_sub_brasilivre[df_sub_brasilivre['Subreddits_related'] != 'brasilivre'] # Remove r/brasilivre


## Network Graph 1

Ficou feio, pegando só as subreddits

In [204]:
G = nx.Graph()

In [205]:
G.add_node('brasilivre')
G.add_node('brasil')

In [206]:
for idx,sub in df_sub_brasilivre['Subreddits_related'].items():
    G.add_node(sub)
    G.add_edges_from([('brasilivre', sub)])

for idx,sub in df_sub_brasil['Subreddits_related'].items():
    if sub not in df_sub_brasilivre['Subreddits_related'].tolist():
        G.add_node(sub)
    G.add_edges_from([('brasil', sub)])

In [207]:
# Choose the layout
pos = nx.spring_layout(G, k=0.5, iterations=100)
for n, p in pos.items():
    G.nodes[n]['pos'] = p

In [208]:
edge_trace = go.Scatter(
    x=[],
    y=[],
    line=dict(width=0.5, color='#888'),
    hoverinfo='none',
    mode='lines')
for edge in G.edges():
    x0, y0 = G.nodes[edge[0]]['pos']
    x1, y1 = G.nodes[edge[1]]['pos']
    edge_trace['x'] += tuple([x0, x1, None])
    edge_trace['y'] += tuple([y0, y1, None])

In [209]:
node_trace = go.Scatter(
    x=[],
    y=[],
    text=[],
    mode='markers+text',
    hoverinfo='text',
    marker=dict(
        showscale=True,
        colorscale='pinkyl',
        reversescale=True,
        color=[],
        size=37,
        colorbar=dict(
            thickness=1,
            title='Node Connections',
            xanchor='left',
            titleside='right'
        ),
        line=dict(width=0)))
for node in G.nodes():
    x, y = G.nodes[node]['pos']
    node_trace['x'] += tuple([x])
    node_trace['y'] += tuple([y])

for node, adjacencies in enumerate(G.adjacency()):
    node_trace['marker']['color'] += tuple([len(adjacencies[1])])
    node_info = adjacencies[0]
    node_trace['text'] += tuple([node_info])

In [210]:
title = "Network Graph Demonstration"
fig = go.Figure(data=[edge_trace, node_trace],
                layout=go.Layout(
                title=title,
                titlefont=dict(size=16),
                showlegend=False,
                hovermode='closest',
                margin=dict(b=21, l=5, r=5, t=40),
                # annotations=[dict(
                #     text="Text Here",
                #     showarrow=False,
                #     xref="paper", yref="paper")],
                xaxis=dict(showgrid=False, zeroline=False,
                           showticklabels=False, mirror=True),
                yaxis=dict(showgrid=False, zeroline=False, showticklabels=False, mirror=True)))
fig.show()

## Network Graph 2

Peganod só as subreddits, mais legível

In [248]:
G = nx.Graph()
G.add_node('brasilivre')
G.add_node('brasil')

for idx,sub in df_sub_brasilivre['Subreddits_related'].items():
    G.add_node(sub)
    G.add_edges_from([('brasilivre', sub)])

for idx,sub in df_sub_brasil['Subreddits_related'].items():
    if sub not in df_sub_brasilivre['Subreddits_related'].tolist():
        G.add_node(sub)
    G.add_edges_from([('brasil', sub)])

# Choose the layout
pos = nx.spring_layout(G, k=0.5, iterations=100)
for n, p in pos.items():
    G.nodes[n]['pos'] = p

In [249]:
edge_x = []
edge_y = []
for edge in G.edges():
    x0, y0 = G.nodes[edge[0]]['pos']
    x1, y1 = G.nodes[edge[1]]['pos']
    edge_x.append(x0)
    edge_x.append(x1)
    edge_x.append(None)
    edge_y.append(y0)
    edge_y.append(y1)
    edge_y.append(None)

edge_trace = go.Scatter(
    x=edge_x, y=edge_y,
    line=dict(width=0.5, color='#888'),
    hoverinfo='none',
    mode='lines')

node_x = []
node_y = []
for node in G.nodes():
    x, y = G.nodes[node]['pos']
    node_x.append(x)
    node_y.append(y)

node_trace = go.Scatter(
    x=node_x, y=node_y,
    mode='markers',
    hoverinfo='text',
    marker=dict(
        showscale=True,
        # colorscale options
        #'Greys' | 'YlGnBu' | 'Greens' | 'YlOrRd' | 'Bluered' | 'RdBu' |
        #'Reds' | 'Blues' | 'Picnic' | 'Rainbow' | 'Portland' | 'Jet' |
        #'Hot' | 'Blackbody' | 'Earth' | 'Electric' | 'Viridis' |
        colorscale='YlGnBu',
        reversescale=True,
        color=[],
        size=10,
        colorbar=dict(
            thickness=15,
            title='Node Connections',
            xanchor='left',
            titleside='right'
        ),
        line_width=2))

In [250]:
node_adjacencies = []
node_text = []
for node, adjacencies in enumerate(G.adjacency()):
    node_adjacencies.append(len(adjacencies[1]))
    node_text.append('# of connections: '+str(len(adjacencies[1])))

node_trace.marker.color = node_adjacencies
node_trace.text = node_text

In [251]:
fig = go.Figure(data=[edge_trace, node_trace],
             layout=go.Layout(
                title='<br>Relação entre r/brasil e r/brasil livre com as outras comunidades com publicações recentes pelos usuários selecionados',
                titlefont_size=16,
                showlegend=False,
                hovermode='closest',
                margin=dict(b=20,l=5,r=5,t=40),
                # annotations=[ dict(
                #     text="Python code: <a href='https://plotly.com/ipython-notebooks/network-graphs/'> https://plotly.com/ipython-notebooks/network-graphs/</a>",
                #     showarrow=False,
                #     xref="paper", yref="paper",
                #     x=0.005, y=-0.002 ) ],
                xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
                )
fig.show()


## Network Graph 3

Teste com users

In [457]:
# df_user_brasil
G = nx.Graph()
G.add_node('brasilivre')
G.add_node('brasil')


for username in df_user_brasil.columns:
  if username not in G.nodes():
    G.add_node(username, color="orangered")
  else:
    G.nodes[username]['color'] = "purple"
  G.add_edges_from([(username, 'brasil')])
  for idx,sub in df_user_brasil[username].items():
      if pd.isna(sub):
        break
      if sub not in G.nodes():
        G.add_node(sub, color="darksalmon")
      G.add_edges_from([(username, sub)])

for username in df_user_brasilivre.columns:
  if username not in G.nodes():
    G.add_node(username, color="steelblue")
  else:
    G.nodes[username]['color'] = "purple"
    print("ta nos dois"+G.nodes[username]['color'])
  G.add_edges_from([(username, 'brasil')])
  for idx,sub in df_user_brasilivre[username].items():
      if sub not in G.nodes():
        G.add_node(sub, color="lightblue")
      else:
        G.nodes[sub]['color'] = "plum"
      G.add_edges_from([(username, sub)])

common_users = list(set(df_user_brasilivre.columns) & set(df_user_brasil.columns))
for user in common_users:
  G.nodes[user]['color'] = "purple"

G.nodes['brasil']['color'] = "red"
G.nodes['brasilivre']['color'] = "blue"
node_colors = [G.nodes[node]['color'] if 'color' in G.nodes[node] else 'gray' for node in G.nodes]
# node_colors = [G.nodes[node].get('color', 'gray') for node in G.nodes]

In [458]:
# Choose the layout
# https://python.plainenglish.io/create-a-network-graph-in-python-8829e0ec6741

pos = nx.spring_layout(G, k=0.5, iterations=100)
for n, p in pos.items():
    G.nodes[n]['pos'] = p

In [459]:
edge_x = []
edge_y = []
for edge in G.edges():
    x0, y0 = G.nodes[edge[0]]['pos']
    x1, y1 = G.nodes[edge[1]]['pos']
    edge_x.append(x0)
    edge_x.append(x1)
    edge_x.append(None)
    edge_y.append(y0)
    edge_y.append(y1)
    edge_y.append(None)

edge_trace = go.Scatter(
    x=edge_x, y=edge_y,
    line=dict(width=0.5, color='#888'),
    hoverinfo='none',
    mode='lines')

node_x = []
node_y = []
for node in G.nodes():
    x, y = G.nodes[node]['pos']
    node_x.append(x)
    node_y.append(y)


In [465]:
node_degrees = dict(G.degree)
node_trace = go.Scatter(
    x=node_x, y=node_y,
    mode='markers',
    hoverinfo='text',
    marker=dict(
        color=node_colors,
        size=[10+node_degrees[node]*0.4 for node in G.nodes],
        # size = 10,
        colorbar=dict(
            thickness=15,
            title='Node Connections',
            xanchor='left',
            titleside='right'
        ),
        line_width=2))

In [461]:
node_adjacencies = []
node_text = []
for node, adjacencies in enumerate(G.adjacency()):
    node_adjacencies.append(len(adjacencies[1]))
    node_text.append(str(adjacencies[0])+ '# of connections: '+str(len(adjacencies[1])))

node_trace.text = node_text

In [466]:
fig = go.Figure(data=[edge_trace, node_trace],
             layout=go.Layout(
                title='<br>Network graph made with Python',
                titlefont_size=16,
                showlegend=False,
                hovermode='closest',
                margin=dict(b=20,l=5,r=5,t=40),
                annotations=[ dict(
                    text="Python code: <a href='https://plotly.com/ipython-notebooks/network-graphs/'> https://plotly.com/ipython-notebooks/network-graphs/</a>",
                    showarrow=False,
                    xref="paper", yref="paper",
                    x=0.005, y=-0.002 ) ],
                xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
                )
fig.show()
