# Graph Analysis of Twitter Data About War In Ukraine

Our data are retweets from database of tweets with hashtags associated with war in Ukraine.
We are going to analyse two graphs: one undirected with no parallel edges (**G**) and one directed with parallel edges allowed (**MDG**). We are going to look at some basic informations like density and diameter and also analyse chains and most frequent triplets.

## Importing required libraries

In [None]:
import pandas as pd
import numpy as np
import networkx as nx
from nltk.tokenize import word_tokenize
import matplotlib.pyplot as plt
import netsci.models.random as nsr
import netsci.metrics.motifs as nsm
import netsci.visualization as nsv

## Handy utility functions

* Adding edges to newly created graph:

In [None]:
def add_edges_to_graph(graph):
    colors_dict = {'-1': 'white',
                  '1': 'white',
                  '0': 'blue'}
    
    for i in range(main_df.shape[0]):
        try:
            if "RT @" in main_df["text"][i]:
                source = word_tokenize(main_df["text"][i])[2]
                graph.add_edge(main_df["author"][i], source, color=colors_dict.get(str(main_df["Tension"][i])))
        except TypeError:
            continue

* Calculate pseudo diameter (pseudo because our graph is not connected):

In [None]:
def pseudo_diameter(graph):
    return max([max(j.values()) for (i,j) in nx.shortest_path_length(graph)])

* Printing summary of given graph:

In [None]:
def summary(graph):
    d = {'Edges': len(graph.edges),
    'Nodes': len(graph.nodes),
    'Density': nx.density(graph),
    'pseudo-Diameter': pseudo_diameter(graph),
     '': '',
    'Most popular node': count_degrees(graph)[0][0],
    'Second most popular node': count_degrees(graph)[1][0],
    'Third most popular node': count_degrees(graph)[2][0]}
    for k, v in d.items():
        print("{:<30} {}".format(k, v))

* Plotting bar chart of graph degrees:

In [None]:
def plot_degrees(graph):
    degrees = count_degrees(graph)
    x = [i[1] for i in degrees[0:10]]
    y = [i[0] for i in degrees[0:10]]
    
    plt.figure(figsize=(16,4))
    plt.title(label="Degree plot", fontsize=30)
    plt.bar(y, x)

* Plotting bar chart of graph centrality:

In [None]:
def plot_centrality(graph):
    centrality_dict = nx.degree_centrality(graph)
    centrality_list = []
    for key in centrality_dict.keys():
        temp_list = []
        temp_list.append(key)
        temp_list.append(centrality_dict[key])
        centrality_list.append(temp_list)
        
    centrality_list = sorted(centrality_list, key=lambda x:x[1])
    centrality_list.reverse()
    
    x = [i[1] for i in centrality_list[0:10]]
    y = [i[0] for i in centrality_list[0:10]]
    
    plt.figure(figsize=(16,4))
    plt.title(label="Centrality plot", fontsize=30)
    plt.bar(y, x)

* Returning adjacency matrix of given graph:

In [None]:
def get_adj_matrix(graph):
    mx = nx.to_numpy_matrix(graph)
    mx_arr = np.array(mx).astype('int')
    adj_mx_nan = np.divide(mx_arr, mx_arr)
    adj_mx = np.nan_to_num(adj_mx_nan).astype('int')
    
    return adj_mx

* Calculating and plotting network motifs (triplets) of given graph:

In [None]:
def plot_triplets(graph, order=[*range(0,16,1)]):
    mx = get_adj_matrix(graph)
    print(f)
    nsv.bar_motifs(f, order=order)

* Calculating degrees for nodes in given graph:

In [None]:
def count_degrees(graph):
    degrees = []
    for elem in nx.degree(graph):
        degrees.append(list(elem))

    degrees = sorted(degrees, key = lambda x:x[1])
    degrees.reverse()
    return degrees

* Plotting chains of nodes from given graph (works only for undirected graph and with no parallel edges):

In [None]:
def plot_chains(graph):
    
    chains = []
    for i in nx.chain_decomposition(graph):
        if len(i) > 3:
            chains.append(i)

    chains.sort(key=len)
    chains.reverse()
    
    chainG = nx.Graph()
    for item in range(len(chains)):
        chainG.add_edges_from(chains[item])
    
    plt.figure(figsize=(8,8))
    plt.title(label="Normal graph", fontsize=30)
    nx.draw(chainG, with_labels=True)
    plt.figure(figsize=(8,8))
    plt.title(label="Shell graph", fontsize=30)
    nx.draw_shell(chainG, with_labels=True)
    plt.show()

## Importing data

In [None]:
main_df = pd.read_csv('./output1.csv')
sentiment_df = pd.read_csv('./tweets_sentiments.csv')
sent_df = pd.DataFrame(sentiment_df['Tension'])
main_df = pd.concat([main_df, sent_df], axis=1)

Quick peek at the data:

In [None]:
main_df.head()

How many records are included in graph:

In [None]:
retweets_count = 0
for i in range(main_df.shape[0]):
    try:
        if "RT @" in main_df["text"][i]:
            retweets_count += 1
    except TypeError:
        continue

In [None]:
retweets_count

## Creating graphs

In [None]:
G = nx.Graph()
MDG = nx.MultiDiGraph()

### Adding edges to graphs

To undirected graph with no parallel edges:

In [None]:
add_edges_to_graph(G)

To directed graph with parallel edges:

In [None]:
add_edges_to_graph(MDG)

## Plotting graphs

### Undirected graph with no parallel edges:

In [None]:
# plt.figure(figsize=(300,300))
# nx.draw(G, with_labels=True)
# plt.savefig('simple_graph.png')
# plt.show()

In [None]:
plt.figure(figsize=(50,50))
nx.draw(G, with_labels=False)
plt.show()

As we can see there are several main clusters. Three of them are around users: _propeertys_, _AntyAnty77_ and _SieciechT_. There are also many pairs and triplets on periphery. It could mean that our dataset describes only a small part of bigger discourse on war in Ukraine and refugee crisis on polish east border. Each of these pair and triplet could be connected to other larger structures of information distribution.

### Directed graph with parallel edges:

In [None]:
# plt.figure(figsize=(300,300))
# edges, colors = zip(*nx.get_edge_attributes(MDG, 'color').items())
# nx.draw(MDG, edge_color=colors, with_labels=True)
# plt.savefig('multidi_graph.png')
# plt.show()

In [None]:
plt.figure(figsize=(50,50))
edges, colors = zip(*nx.get_edge_attributes(MDG, 'color').items())
nx.draw(MDG, edge_color=colors, with_labels=False)
plt.show()

Reversed graph:

In [None]:
mdg = MDG.reverse()
plt.figure(figsize=(50,50))
edges, colors = zip(*nx.get_edge_attributes(mdg, 'color').items())
nx.draw(mdg, edge_color=colors, with_labels=False)
plt.show()

Only 0s:

In [None]:
plt.figure(figsize=(50,50))
edges, colors = zip(*nx.get_edge_attributes(MDG, 'color').items())
nx.draw(MDG, edge_color=colors, with_labels=False)
plt.show()

In [None]:
mdg = MDG.reverse()
plt.figure(figsize=(50,50))
edges, colors = zip(*nx.get_edge_attributes(mdg, 'color').items())
nx.draw(mdg, edge_color=colors, with_labels=False)
plt.show()

Only -1s and 1s:

In [None]:
plt.figure(figsize=(50,50))
edges, colors = zip(*nx.get_edge_attributes(MDG, 'color').items())
nx.draw(MDG, edge_color=colors, with_labels=False)
plt.show()

In [None]:
mdg = MDG.reverse()
plt.figure(figsize=(50,50))
edges, colors = zip(*nx.get_edge_attributes(mdg, 'color').items())
nx.draw(mdg, edge_color=colors, with_labels=False)
plt.show()

Only 1s:

In [None]:
plt.figure(figsize=(50,50))
edges, colors = zip(*nx.get_edge_attributes(MDG, 'color').items())
nx.draw(MDG, edge_color=colors, with_labels=False)
plt.show()

In [None]:
mdg = MDG.reverse()
plt.figure(figsize=(50,50))
edges, colors = zip(*nx.get_edge_attributes(mdg, 'color').items())
nx.draw(mdg, edge_color=colors, with_labels=False)
plt.show()

Only -1s:

In [None]:
plt.figure(figsize=(50,50))
edges, colors = zip(*nx.get_edge_attributes(MDG, 'color').items())
nx.draw(MDG, edge_color=colors, with_labels=False)
plt.show()

In [None]:
mdg = MDG.reverse()
plt.figure(figsize=(50,50))
edges, colors = zip(*nx.get_edge_attributes(mdg, 'color').items())
nx.draw(mdg, edge_color=colors, with_labels=False)
plt.show()

## Descriptions of graphs

### Undirected:

In [None]:
summary(G)

In [None]:
plot_degrees(G)
plot_centrality(G)

In [None]:
plot_triplets(G, order=[1,3,4,5,6,7])

In [None]:
plot_chains(G)

### Directed:

In [None]:
summary(MDG)

In [None]:
axs[0] = plot_degrees(MDG)
axs[1] = plot_centrality(MDG)

In [None]:
plot_triplets(MDG, order=[1,3,4,5,6,7])