# Setup for initial analysis

## Load packages

In [0]:
%matplotlib inline
import pandas as pd
import requests
import numpy as np
from pandas.io.json import json_normalize
import matplotlib.pyplot as plt

## List of documents

In [0]:
FMlinkList = {
#2016 Marginal Syllabus        
        'August':'http://www.commonsense.org/education/privacy/blog/digital-redlining-access-privacy',
        'September':'http://dmlcentral.net/speculative-design-for-emergent-learning-taking-risks/',
        'October':'http://marginalsyllab.us/wp-content/uploads/2016/08/PWFlow-Intro.pdf',
        'November':'https://helenbeetham.com/2016/11/14/ed-tech-and-the-circus-of-unreason/',
        'January':'http://marginalsyllab.us/the-school-and-social-progress-by-john-dewey/',
        'February':'http://marginalsyllab.us/preface-to-research-writing-rewired-lessons-that-ground-students-digital-learning-by-dawn-reed-and-troy-hicks/',
        'FebruaryJCI':'http://cognitionandinstruction.com/engagements-the-learning-sciences-in-a-new-era-of-u-s-nationalism/',
        'March':'https://www.colorlines.com/articles/how-can-white-teachers-do-better-urban-kids-color',
        'April':'http://educatorinnovator.org/between-storytelling-and-surveillance-the-precarious-public-of-american-muslim-youth/',
        'May':'https://via.hypothes.is/http://educatorinnovator.org/wp-content/uploads/2017/05/LaMay-Ch5.pdf',
#2017 Marginal Syllabus
        'October-17':'https://educatorinnovator.org/how-young-activists-deploy-digital-tools-for-social-change',
        'November-17':'https://educatorinnovator.org/wp-content/uploads/2017/10/RRE_Chapter-6_Civic-Participation-Remiagined_0091732X17690121.pdf'}

# Helper function

In [0]:
def H_API(session,uri):
    n = 0
    payload = {'url':uri}
    r = requests.get('https://hypothes.is/api/search',params=payload)
    tmp = pd.DataFrame(columns=['created', 'document.title', 'group', 'id', 'links.html',
       'links.incontext', 'links.json', 'permissions.admin',
       'permissions.delete', 'permissions.read', 'permissions.update',
       'references', 'tags', 'target', 'text', 'updated', 'uri', 'user'])
    total = r.json()['total']
    print(session)
    print(uri)
    print('Number of Records Found for this Session: ',total)
    print('Retrieved:')
    while n < total:
        print(n)
        tmp = tmp.append(json_normalize(r.json()['rows']))
        n = len(tmp)
        payload = {
            'url':uri,
            'offset':n}
        r = requests.get('https://hypothes.is/api/search',params=payload)
    print(n)
    print('*****')
    tmp['session'] = session
    tmp['updated'] = pd.to_datetime(tmp['updated'])
    tmp['created'] = pd.to_datetime(tmp['created'])
    tmp = tmp.set_index(tmp['created'])
    return tmp

# Create dataframes

## from hypothes.is data

In [0]:
df = pd.DataFrame(columns=['created','document.title','group','id','links.html',
       'links.incontext','links.json','permissions.admin',
       'permissions.delete','permissions.read','permissions.update',
       'references','tags','target','text','updated','uri','user','session'])
for session,link in FMlinkList.items():
    df = df.append(H_API(session,link))
df = df.tz_localize('UTC').tz_convert('US/Mountain')
print('Total Records: ',len(df))

## for analysis graphs

In [0]:
stackedBarGraphs = {}
userActivityTSGraphs = {}
entireConversationTSGraphs = {}
for session in FMlinkList:
  Table1 = pd.DataFrame(index=list(df[df['session'] == session].drop_duplicates('user')['user']))
  Table1['annotations'] = df[(df['session'] == session)&(df['references'].isnull())].groupby('user').size()
  Table1['replies'] = df[(df['session'] == session)&(~df['references'].isnull())].groupby('user').size()
  Table1.fillna(0,inplace=True)
  Table1.index = Table1.index.str[5:].str[:-12]
  Table1['total'] = Table1['replies'] + Table1['annotations']
  stackedBarGraphs[session] = Table1.sort_values(by='total',ascending=False)[['annotations','replies']]
  
  Graph3 = df[(df['session'] == session)].groupby([pd.TimeGrouper('15T'),'user']).size().to_frame()
  Graph3.reset_index(level='user',inplace=True)
  Graph3 = pd.pivot_table(Graph3,columns='user',values=0,index=pd.Grouper(freq='15T'),fill_value=0)
  Graph3 = Graph3.rename(columns=lambda x: x[5:][:-12])
  userActivityTSGraphs[session] = Graph3
  
  graphEntireConversation = df[(df['session'] == session)
                                ].groupby([pd.TimeGrouper('D'),'user']).size().to_frame()
  graphEntireConversation.reset_index(level='user',inplace=True)
  graphEntireConversation['Total'] = graphEntireConversation.sum(axis=1)
  graphEntireConversation = pd.pivot_table(graphEntireConversation,columns='user',values=0,
                                         index=pd.Grouper(freq='D'),fill_value=0)
  graphEntireConversation = graphEntireConversation.rename(columns=lambda x: x[5:][:-12])
  graphEntireConversation['Total'] = graphEntireConversation.sum(axis=1)
  entireConversationTSGraphs[session] = graphEntireConversation

# Display visualizations 
## Entire user activity graph for each conversation by day

In [0]:
for session in FMlinkList:
  entireConversationTSGraphs[session].plot(title=session)

## User activity graph for each conversation by 15 min intervals
### *note: time periods with no annotations are removed

In [0]:
for session in FMlinkList:
  userActivityTSGraphs[session].plot(title=session)

## Stacked bar graph for each conversation

In [0]:
for session in FMlinkList:
  stackedBarGraphs[session].plot.bar(stacked=True,title=session)