# Packages

In [0]:
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx
import requests
from pandas.io.json import json_normalize

# List of documents

In [0]:
_FMlinkList = {
    'october2017':"http://educatorinnovator.org/how-young-activists-deploy-digital-tools-for-social-change",
    #'november2017':"https://educatorinnovator.org/wp-content/uploads/2017/10/RRE_Chapter-6_Civic-Participation-Remiagined_0091732X17690121.pdf",
    #'december2017':"https://educatorinnovator.org/wp-content/uploads/2017/10/Critical-Literacy-And-Our-Students-Lives.pdf",
    #'january2018':"https://educatorinnovator.org/wp-content/uploads/2017/12/OurDeclaration_PG31-35.pdf",
    #'february2018':"https://educatorinnovator.org/wp-content/uploads/2018/02/Educating-for-Democracy-in-a-Partisan-Age.pdf.pdf",
    #'march2018':"https://educatorinnovator.org/wp-content/uploads/2018/02/The-Stories-They-Tell-.pdf",
    #'april2018':"https://educatorinnovator.org/wp-content/uploads/2018/03/Educating-Youth-for-Online-Civic-and-Political-Dialogue_-A-Conceptual-Framework-for-the-Digital-Age-_-Journal-of-Digital-and-Media-Literacy.pdf",
    #'may2018':"https://educatorinnovator.org/wp-content/uploads/2018/03/zemelman_websample.pdf",
    'june2018':"http://www.ncte.org/library/NCTEFiles/Resources/Journals/VM/0254-may2018/VM0254Leading.pdf"
}

# Helper functions

In [0]:
# Build dataframe from hypothes.is API
def H_API(session,uri):
    n = 0
    payload = {'url':uri}
    r = requests.get('https://hypothes.is/api/search',params=payload)
    tmp = pd.DataFrame(columns=['created', 'document.title', 'group', 'id', 'links.html',
       'links.incontext', 'links.json', 'permissions.admin',
       'permissions.delete', 'permissions.read', 'permissions.update',
       'references', 'tags', 'target', 'text', 'updated', 'uri', 'user'])
    total = r.json()['total']
    while n < total:
        #print(n)
        tmp = tmp.append(json_normalize(r.json()['rows']))
        n = len(tmp)
        payload = {
            'url':uri,
            'offset':n}
        r = requests.get('https://hypothes.is/api/search',params=payload)
    tmp['session'] = session
    tmp['updated'] = pd.to_datetime(tmp['updated'])
    tmp['created'] = pd.to_datetime(tmp['created'])
    tmp = tmp.set_index(tmp['created'])
    return tmp

#returns the number of times a message was in a reference list of other messages
def countReplies(msgId):
    return len(df.dropna()[df['references'].dropna().map(lambda x: msgId in x)])
def listOfIds(msgId):
    return df.dropna()[df['references'].dropna().map(lambda x: msgId in x)]['id'].values
def replyTimeDelta(x):
    if len(df[df['id']==x]['created']) == 0:
      return None
    else:
      return df[df['id']==x]['created'].iloc[0]
    
#Builds a reply chain of users
def replyChain(refs):
  replychain = []
  for i in refs:
    if len(df[df['id']==i]['user']) == 0:
      return None
    replychain.append(df[df['id']==i]['user'].iloc[0])
  return replychain

# Create dataFrames

In [0]:
threadsByMonth = {}
for session,link in _FMlinkList.items():    
  #Build network of threads
  df = pd.DataFrame(columns=['created','document.title','group','id','links.html',
         'links.incontext','links.json','permissions.admin',
         'permissions.delete','permissions.read','permissions.update',
         'references','tags','target','text','updated','uri','user','session'])
  df = df.append(H_API(session,link))
  df = df.tz_localize('UTC').tz_convert('US/Mountain')

  #Modify df for network analysis
  df = df[['created','id','references','session','tags','target', 'text','user']]
  df['replyTo'] = df['references'].map(lambda x: x[-1],na_action='ignore')
  df['user'] = df['user'].map(lambda x: x[5:-12])

  #Time betweeen message and reply
  df['created'] = pd.to_datetime(df['created']) #added to fix subtraction error
  df['replyDelay'] = df[df['replyTo'].notnull()]['created']\
  - df[df['replyTo'].notnull()]['replyTo'].map(lambda x: replyTimeDelta(x))

  #convert to minutes
  df['replyDelay'] = df['replyDelay'].map(lambda x: x.total_seconds() / 60)
  #builds reply chain of users
  df['replyChain'] = df[df['references'].notnull()]['references'].map(lambda x: replyChain(x),na_action='ignore')

  G = nx.from_pandas_edgelist(df[df['references'].notnull()],'id','replyTo'
                             ,edge_attr=['tags','target','text','user','replyDelay']
                             ,create_using=nx.DiGraph())

  # Build dataframe of threads with list of ids per thread
  threads = []
  for x in nx.connected_components(nx.to_undirected(G)):
      threads.append(x)

  # list of text per thread
  textT = []
  timesT = []
  usersT = []
  repliesT = []
  for thread in threads:    
      T = []
      t = []
      u = []
      r = []
      for key,val in df[df['id'].isin(thread)].iterrows():
          T.append(val['text'])
          t.append(val['replyDelay'])
          u.append(val['user'])
          r.append(val['replyChain'])
      textT.append(T)
      timesT.append(t)
      usersT.append(u)
      repliesT.append(r)

  #build threads dataframe
  dfThreads = pd.DataFrame(pd.Series(data=threads),columns=['ids'])
  dfThreads['texts'] = pd.Series(textT)
  dfThreads['timeDelays'] = pd.Series(timesT)
  dfThreads['users'] = pd.Series(usersT)
  dfThreads['replyChains'] = pd.Series(repliesT)
  dfThreads['totalMsgs'] = dfThreads['ids'].map(lambda x: len(x))
  dfThreads = dfThreads.sort_values(by='totalMsgs',ascending=False)
  dfThreads.reset_index(drop=True,inplace=True)
  
  threadsByMonth[session] = dfThreads


##Threads with 3 or more messages 
###Prints text by participants

In [0]:
for k in threadsByMonth:
  print("****************************************************************************************************")
  print("****************************************************************************************************")
  print('                                 Document name:',k)
  print('              URL:',_FMlinkList[k])
  print("****************************************************************************************************")
  print("****************************************************************************************************")
  for key,vals in threadsByMonth[k].iterrows():
    if (vals['totalMsgs'] < 3): continue
    print("**************************************************")
    print('NEW THREAD')
    print('Message Count: ',vals['totalMsgs'])
    print("**************************************************")
    ul = vals['users']
    tl = vals['texts']
    rl = vals['replyChains']
    ul.reverse()
    tl.reverse()
    rl.reverse()
    for p in zip(rl, ul, tl):
      if (str(p[0]) == 'nan'):
        print(p[1]+":",p[2])
      else: print(str(p[0]),p[1]+":",p[2])
      print("-------------------")
    print()

****************************************************************************************************
****************************************************************************************************
                                 Document name: october2017
              URL: http://educatorinnovator.org/how-young-activists-deploy-digital-tools-for-social-change
****************************************************************************************************
****************************************************************************************************
**************************************************
NEW THREAD
Message Count:  6
**************************************************
andreaz: One thing I noticed is in the video is not only the powerful way she connects with her listeners by beginning with her own intimate discussion of faith, but the way she sets that next to the negative examples from social media. It is a powerful argument. It struck me when she even correct