In [None]:
import json
import gzip
import pandas as pd
import networkx as nx
from itertools import combinations

### The tweets file should only contain native tweets from the v1.1 Twitter API

In [None]:
tweetid = []
text = []
hashtags = []
userid = []
created_at = []

with gzip.open('tweets.json.gz', 'rb') as f:
    for line in f:
        try:
            tmp_json = json.loads(line)
            for row in range(len(tmp_json)):
                tweetid.append(tmp_json[row]['id_str'])
                text.append(tmp_json[row]['text'])
                userid.append(tmp_json[row]['user']['id_str'])
                created_at.append(pd.to_datetime(tmp_json[row]['created_at']))
                hashtags.append(tmp_json[row]['entities']['hashtags'])
        except Exception as e:
            print(e)

data = pd.DataFrame(tweetid, columns=['tweetid'])
data['userid'] = userid
data['created_at'] = created_at
data['text'] = text
data['hashtags'] = hashtags
data.sort_values(by='created_at', inplace=True)

# Extract users that meet the thresholds
hashtag_seqs = dict()
for user in data['userid'].unique():
    tmp_str = []
    count = 0
    if len(data[data['userid'] == user].index) >= 5:
        for index in data[data['userid'] == user].index:
            if len(data.loc[index,'hashtags']) != 0:
                count += 1
                for hashtag in data.loc[index,'hashtags']:
                    tmp_str.append(hashtag['text'])
        if (len(set(tmp_str)) >= 5) & (count >= 5):
            tmp_str = "".join(tmp_str)
            if tmp_str in hashtag_seqs.keys():
                hashtag_seqs[tmp_str].append(user)
            else:
                hashtag_seqs[tmp_str] = [user]

# Build an undirect network of users that met the above thresholds and
# have the same hashtag sequences
G = nx.Graph()
for key in hashtag_seqs.keys():
    if len(hashtag_seqs[key]) > 1:
        for comb in list(combinations(hashtag_seqs[key], 2)):
            nx.add_edge(comb[0], comb[1])

nx.write_gexf(G,'hashtag_coordination_graph.gexf')