In [11]:
import pandas as pd
import numpy as np
import re
import csv

In [9]:
def path_to_url(nid, data_type, paths):
    
    try:

        path_type = (paths['type'] == data_type)
        path_nid = (paths['nid'] == nid)

        path = paths.loc[path_type & path_nid]['path'].values[0]

        return 'https://publiclab.org' + path
    
    except:
        
        return False
    
# path_to_url(18446, 'qstn', paths), path_to_url(13356, 'qstn', paths)

In [10]:
def create_csv_paths(data_types, base='', exp='', full=True):
    
    csv_paths = {}
    
    for data_type in data_types:

        if not full:
            csv_dir = '../data/2016_2019/samples/'
            e = '_' + str(base) + 'e' + str(exp)
            csv_path = csv_dir + data_type + e + '.csv'
        else:
            csv_dir = '../data/2016_2019/full/'
            e = ''
            csv_path = csv_dir + data_type + e + '.csv'
            
        csv_paths[data_type] = csv_path
    
    return csv_paths

In [90]:
def set_edge_direction(row):
    
    if row['q_uid'] == row['a_uid']:
        
        direction = -1
        
    elif row['q_uid'] != row['a_uid']:
        
        direction = 1
        
    return direction

# set_edge_direction(q_and_a.iloc[3254]), set_edge_direction(q_and_a.iloc[3255]) # -1 , 1

### LOAD PATHS

#### CSV DIRECTORY PATHS

In [29]:
data_types = ['answers', 'comments', 'notes', 'questions', 'users', 'wikis', 'paths', 'q_and_a']
csv_paths = create_csv_paths(data_types)

# print csv_paths

#### WEBSITE PATHS

In [13]:
paths = pd.read_csv(csv_paths['paths'])

In [14]:
paths.sample(5)

Unnamed: 0,type,nid,date,path
2026,note,13969,1488297079,/notes/nicholas/02-28-2017/foldable-spectromet...
349,wiki,15087,1508797068,/wiki/infragram-filters
2166,note,14206,1495047283,/notes/stevie/05-17-2017/last-annual-barnraisi...
1036,note,12375,1446810564,/notes/Holger/11-06-2015/raspberry-pi-based-mi...
487,wiki,18412,1550945796,/wiki/filters


### WIKI PAGES

In [None]:
# # ORIGINAL WIKI COLS
# [u'nid', u'vid', u'type', u'language', u'title', u'uid', u'status', u'created', u'changed', u'comment', u'promote', 
#  u'moderate', u'sticky', u'tnid', u'translate', u'cached_likes', u'comments_count', u'drupal_node_revisions_count', 
#  u'path', u'main_image_id', u'slug', u'legacy_views', u'views', u'latitude', u'longitude', u'precision']

In [17]:
wiki_cols = [u'nid',  u'uid', u'created', u'comments_count', u'cached_likes', u'type', u'title']
renamed_wiki_cols = {'created': 'date', 'comments_count': 'comments', 'cached_likes': 'likes'}

wikis = pd.read_csv(csv_paths['wikis'])
wikis['type'] = 'wiki'

wikis = wikis[wiki_cols] # questions.columns = cols
wikis.rename(index=str, columns=renamed_wiki_cols, inplace=True)

# wikis.columns
wikis.head()
# print wikis.shape # (514, 6)

Unnamed: 0,nid,uid,date,comments,likes,type,title
0,10388,7,1525745225,0,27,wiki,OpenHour
1,11502,1,1420149623,0,0,wiki,MapMill Help
2,11508,7,1420405663,0,0,wiki,choosing-how-to-track-progress
3,11509,7,1420476955,0,1,wiki,infrared-garden-experiment
4,11511,7,1420666454,0,3,wiki,pole-mapping-guide


### RESEARCH NOTES

In [18]:
# # ORIGINAL NOTES COLUMNS
# ['nid', 'vid', 'type', 'language', 'title', 'uid', 'status', 'created', 'changed', 'comment', 'promote', 
#  'moderate', 'sticky', 'tnid', 'translate', 'cached_likes', 'comments_count', 'drupal_node_revisions_count', 
#  'path', 'main_image_id', 'slug', 'legacy_views', 'views', 'latitude', 'longitude', 'precision']

In [19]:
notes_cols = [u'nid',  u'uid', u'created', u'comments_count', u'cached_likes', u'type', u'title']
renamed_notes_cols = {'created': 'date', 'comments_count': 'comments', 'cached_likes': 'likes'}

notes = pd.read_csv(csv_paths['notes'])
notes['type'] = 'note'
notes = notes[notes_cols]
notes.rename(index=str, columns=renamed_notes_cols, inplace=True)

# notes.columns
notes.head() # (3155, 6)

Unnamed: 0,nid,uid,date,comments,likes,type,title
0,11503,554,1420328192,0,3,note,Public Lab at Science for Action
1,11504,554,1420328228,0,0,note,"Public Lab Barnraising 2014, day 3"
2,11505,554,1420328254,0,0,note,"Public Lab Barnraising 2014, day 2"
3,11506,554,1420328276,1,0,note,"Public Lab Barnraising 2014, day 1"
4,11507,554,1420328303,2,4,note,"Public Lab Organizers Summit, 2014"


### COMMENTS

In [20]:
# # ORIGINAL COMMENTS COLUMNS
# [u'cid', u'pid', u'nid', u'uid', u'subject', u'comment', u'hostname', u'timestamp', u'status', u'format', 
#  u'thread', u'name', u'mail', u'homepage', u'aid', u'comment_via', u'message_id', u'tweet_id', u'reply_to']

In [21]:
cmnt_cols = ['nid', 'uid', 'cid', 'aid', 'timestamp', 'type', 'thread', 'comment', 'reply_to']

comments = pd.read_csv(csv_paths['comments'])
comments['type'] = 'cmnt'
comments['reply_to'].fillna(0, inplace=True)
comments['reply_to'] = comments['reply_to'].astype('int64')
comments = comments[cmnt_cols]
comments.rename(index=str, columns={'timestamp': 'date'}, inplace=True)

# comments.columns
comments.head()

Unnamed: 0,nid,uid,cid,aid,date,type,thread,comment,reply_to
0,11500,433330,11010,0,1420182880,cmnt,02/,"fyi, I just noticed that the local public libr...",0
1,9509,433359,11011,0,1420238273,cmnt,02/,Thank you very much Ned. This information will...,0
2,11493,51,11012,0,1420483545,cmnt,01/,I will be attending!,0
3,11493,51,11013,0,1420483578,cmnt,02/,Not certain but perhaps.,0
4,11178,7,11014,0,1420487544,cmnt,02/,"Hey @Tbtouaki , how did this turn out?",0


### QUESTIONS

In [22]:
# # ORIGINAL QUESTIONS COLUMNS
# ['nid', 'vid', 'type', 'language', 'title', 'uid', 'status', 'created', 'changed', 'comment', 'promote', 
#  'moderate', 'sticky', 'tnid', 'translate', 'cached_likes', 'comments_count', 'drupal_node_revisions_count', 
#  'path', 'main_image_id', 'slug', 'legacy_views', 'views', 'latitude', 'longitude', 'precision']

In [23]:
quest_cols = [u'nid', u'uid', u'qid', u'created', u'comment', u'comments_count', u'cached_likes', u'type', u'title']

renamed_quest_cols = {'created': 'date', 'comments_count': 'thread_cmnts', 
                      'comment': 'user_cmnts', 'cached_likes': 'likes'}

questions = pd.read_csv(csv_paths['questions'])
questions['type'] = 'qstn'
questions['qid'] = range(1, questions.shape[0] + 1)

questions = questions[quest_cols] # questions.columns = cols
questions.rename(index=str, columns=renamed_quest_cols, inplace=True)

# questions.columns
questions.head() # (902, 8)

Unnamed: 0,nid,uid,qid,date,user_cmnts,thread_cmnts,likes,type,title
0,11516,433692,1,1421053078,2,3,1,qstn,Is it possible to log the data from a dustduin...
1,11524,433790,2,1421278243,2,1,1,qstn,Is the dustduino (with Shinyei sensor) appropr...
2,11526,433849,3,1421429978,2,2,0,qstn,How can I test my tap water for BPA
3,11545,4,4,1422557787,2,6,5,qstn,dealing with patents: Black & Decker and the T...
4,11610,435436,5,1424457388,2,2,0,qstn,Is the project still active?


### ANSWERS

In [24]:
# # ORIGINAL ANSWERS COLUMNS
# [u'id', u'uid', u'nid', u'content', u'cached_likes', u'created_at', u'updated_at', u'accepted']

In [25]:
ans_cols = [u'nid', u'uid', u'id', u'created_at', u'cached_likes', u'type', u'content', u'accepted']

answers = pd.read_csv(csv_paths['answers'])
answers['type'] = 'ans'
answers = answers[ans_cols]

renamed_ans_cols = {'id': 'aid', 'created': 'date', 'cached_likes': 'likes'}
answers.rename(index=str, columns=renamed_ans_cols, inplace=True)

# answers.columns
answers.head()  #(1081, 8)

Unnamed: 0,nid,uid,aid,created_at,likes,type,content,accepted
0,13190,1,1,2016-06-17 15:42:47 UTC,0,ans,"Yes, it there should be an edit link (with pen...",True
1,13312,468506,2,2016-08-02 09:16:09 UTC,1,ans,Sorry to bother you. I was just testing the ne...,True
2,13356,1,3,2016-08-16 23:04:23 UTC,0,ans,Looks like it does indeed work... now to see i...,True
3,13356,7,4,2016-08-17 12:35:13 UTC,0,ans,Here's an answer for you: 42!,False
4,13358,1,5,2016-08-17 14:27:28 UTC,0,ans,"Hi, CCDs do not have a linear response curve -...",True


#### TOTALS

In [26]:
print 'WIKI PAGES:', wikis.shape[0]
print 'RESEARCH NOTES:', notes.shape[0]
print 'COMMENTS:', comments.shape[0]
print 'QUESTIONS:', questions.shape[0]
print 'ANSWERS:', answers.shape[0]

WIKI PAGES: 514
RESEARCH NOTES: 3155
COMMENTS: 11636
QUESTIONS: 902
ANSWERS: 1081


### DATA RELATIONSIHPS

#### QUESTIONS VS. NOTES

In [521]:
questions.loc[questions['nid'] == 13745]
# questions.loc[questions['nid'] == 18446]
# questions.loc[questions['nid'] == 13356] # NO QUESTION

Unnamed: 0,nid,uid,qid,date,user_cmnts,thread_cmnts,likes,type,title
152,13745,498969,153,1480917522,2,3,0,qstn,Is anyone doing any work with fungi? or biorem...


A QUESTION IS A NOTE WITH A POWER TAG OF QUESTION

In [522]:
notes.loc[notes['nid'] == 13745]
# notes.loc[notes['nid'] == 18446]
# notes.loc[notes['nid'] == 13356] # NO NOTE (E.G., NO QUESTION)

Unnamed: 0,nid,uid,date,comments,likes,type,title
1370,13745,498969,1480917522,3,0,note,Is anyone doing any work with fungi? or biorem...


#### ANSWERS VS. COMMENTS

In [523]:
answers.loc[answers['nid'] == 13745]
# answers.loc[answers['nid'] == 18446]
# answers.loc[answers['nid'] == 13356] # DOES NOT HAVE A QUESTION/NOTE

Unnamed: 0,nid,uid,aid,created_at,likes,type,content,accepted
138,13745,499993,149,2016-12-15 00:59:26 UTC,1,ans,Hey Mushroomman! \r\n\r\nFellow mycology fanat...,False
237,13745,237313,251,2017-06-06 19:08:04 UTC,0,ans,"I'm late to the party, but a very cool topic! ...",False


ALL DISCUSSION ABOUT A QUESTION, INCLUDING ANSWERS AND REPLIES IN ALL LEVELS OF HIERARCHIES ARE COMMENTS.

HOWEVER, AN ANSWER IS *TECHNICALLY* A COMMENT OF THE HIGHEST LEVEL THREAD.

**NOTICE:** *AID 149 AND 251 ABOVE* ARE THE SAME AS *CID 22382 and 22481 BELOW*.  HOWEVER, *CID 22223 BELOW* IS IN REPLY TO *AID 149 (ABOVE AND BELOW) AND CID 22382 (BELOW)* WHICH ARE ALL THE SAME.  

In [542]:
comments.loc[comments['nid'] == 13745]
# comments.loc[comments['nid'] == 18446]
# comments.loc[comments['nid'] == 13356] # DOES NOT HAVE A QUESTION/NOTE

Unnamed: 0,nid,uid,cid,aid,date,type,thread,comment,reply_to
10042,13745,579767,22223,149,1552994882,cmnt,,Hello Jlmaybach.\r\n\r\nI am actually doing a...,22382
10195,13745,499993,22382,0,1481763566,cmnt,/01,Hey Mushroomman! \r\n\r\nFellow mycology fanat...,0
10294,13745,237313,22481,0,1496776084,cmnt,/01,"I'm late to the party, but a very cool topic! ...",0


#### WIKI PAGES

WIKI PAGES DO NOT APPEAR TO HAVE ANY SUBTYPES SUCH AS WITH QUESTIONS & NOTES AND ANSWERS & COMMENTS

In [545]:
wikis.loc[wikis['nid'] == 13745]

Unnamed: 0,nid,uid,date,comments,likes,type,title


#### SUMMARY OF Q&A RELATIONSIPS

In [None]:
DATES -> WIKI_DATES + NOTES_DATES + COMMENT_DATES
USERS -> USERS_COUNT
WIKI PAGES -> WIKIS
RESEARCH NOTES -> NOTES - QUESTIONS
COMMENTS -> WIKI_COMMENTS + NOTE_COMMENTS + ANSWERS
QUESTIONS -> NOTES WHERE QUESTION_NID EQUALS NOTES_NID
ANSWERS -> COMMENTS WHERE QUESTIONS_NID EQUALS COMMENTS_NID

USER_WIKI_PAGES -> USERS_UID == WIKI_PAGES_UID
USER_NOTES -> USERS_UID == NOTES_UID
USER_COMMENTS -> USERS_UID == WIKI_PAGES_UID
USER_QUESTIONS -> USERS_UID == QUESTIONS_UID
USER_ANSWERS -> USERS_UID == ANSWERS_UID

WIKI PAGE COMMENTS -> COMMENTS WHERE WIKI_NID EQUALS COMMENTS_NID
RESEARCH NOTE COMMENTS -> COMMENTS WHERE NOTE_NID EQUALS COMMENTS_NID

### MERGING QUESTIONS AND ANSWERS  
SEE "SUMMARY STATISTICS" TO SEE WHY THIS MERGE WAS CHOSEN

In [40]:
questions.iloc[0]

nid                                                         11516
uid                                                        433692
qid                                                             1
date                                                   1421053078
user_cmnts                                                      2
thread_cmnts                                                    3
likes                                                           1
type                                                         qstn
title           Is it possible to log the data from a dustduin...
Name: 0, dtype: object

In [41]:
comments.iloc[0]

nid                                                     11500
uid                                                    433330
cid                                                     11010
aid                                                         0
date                                               1420182880
type                                                     cmnt
thread                                                    02/
comment     fyi, I just noticed that the local public libr...
reply_to                                                    0
Name: 0, dtype: object

In [43]:
merge_q_cols = ['nid', 'q_id', 'q_uid','q_date', 'q_text']
merge_a_cols = ['a_id', 'a_uid',  'a_cid', 'a_date', 'a_text']

merge_cols = merge_q_cols + merge_a_cols

q_cols = ['nid', 'qid', 'uid','date', 'title']
a_cols = ['nid', 'aid', 'uid',  'cid', 'date', 'comment']

# qac for question and answers from comments
qac = pd.merge(questions[q_cols], comments[a_cols], on='nid', how='outer', indicator=True)
qac.columns = merge_cols + ['merge']
qac.head()

Unnamed: 0,nid,q_id,q_uid,q_date,q_text,a_id,a_uid,a_cid,a_date,a_text,merge
0,11516,1.0,433692.0,1421053000.0,Is it possible to log the data from a dustduin...,0.0,304279.0,11037.0,1421161000.0,"Hi KP55, \r\nI think that the first thing you...",both
1,11516,1.0,433692.0,1421053000.0,Is it possible to log the data from a dustduin...,0.0,1.0,11391.0,1427129000.0,I agree that a non-wifi DustDuino variant woul...,both
2,11516,1.0,433692.0,1421053000.0,Is it possible to log the data from a dustduin...,0.0,1.0,22929.0,1518818000.0,There's now a way to collect data from a #dust...,both
3,11524,2.0,433790.0,1421278000.0,Is the dustduino (with Shinyei sensor) appropr...,0.0,4.0,11043.0,1421343000.0,currently the dust counts for the dustduino ar...,both
4,11526,3.0,433849.0,1421430000.0,How can I test my tap water for BPA,0.0,4.0,11064.0,1421795000.0,That is a good question. I'm not sure what as...,both


In [67]:
print 'QAC MERGE - NID, OUTER'
print 'LEFT ONLY:', qac.loc[qac['merge'] == 'left_only'].shape[0], \
      '  RIGHT ONLY:', qac.loc[qac['merge'] == 'right_only'].shape[0], \
      '  BOTH:', qac.loc[qac['merge'] == 'both'].shape[0]

QAC MERGE - NID, OUTER
LEFT ONLY: 104   RIGHT ONLY: 8280   BOTH: 3356


In [76]:
# # CREATE DF WITH MERGE EQUALS BOTH ONLY

qac_both = qac.loc[(~qac.isnull()).all(axis=1)]

cols_to_type = ['nid', 'q_id', 'q_uid','q_date', 'a_id', 'a_uid',  'a_cid', 'a_date']

# GIVES .loc ALERT, NOT SURE WHY, TRIED IT W/ .loc AND THEN THERE WAS A KEY ERROR
for col_to_type in cols_to_type:
    qac_both[col_to_type] = qac_both[col_to_type].astype('int64')
    
qac_both.drop(['merge'], inplace=True, axis=1)
qac_both.head()

Unnamed: 0,nid,q_id,q_uid,q_date,q_text,a_id,a_uid,a_cid,a_date,a_text
0,11516,1,433692,1421053078,Is it possible to log the data from a dustduin...,0,304279,11037,1421161394,"Hi KP55, \r\nI think that the first thing you..."
1,11516,1,433692,1421053078,Is it possible to log the data from a dustduin...,0,1,11391,1427129027,I agree that a non-wifi DustDuino variant woul...
2,11516,1,433692,1421053078,Is it possible to log the data from a dustduin...,0,1,22929,1518817538,There's now a way to collect data from a #dust...
3,11524,2,433790,1421278243,Is the dustduino (with Shinyei sensor) appropr...,0,4,11043,1421342622,currently the dust counts for the dustduino ar...
4,11526,3,433849,1421429978,How can I test my tap water for BPA,0,4,11064,1421795312,That is a good question. I'm not sure what as...


In [77]:
qac_both.to_csv('../data/2016_2019/full/q_and_a.csv', index=False)

In [468]:
q_and_a = pd.read_csv(csv_paths['q_and_a'])
q_and_a.head()
print 'TOTAL ROWS:', q_and_a.shape[0]

TOTAL ROWS: 3356


In [139]:
questions.loc[questions['uid'] == 7].shape

(30, 9)

In [80]:
q_and_a.loc[q_and_a['nid'] == 18446]

Unnamed: 0,nid,q_id,q_uid,q_date,q_text,a_id,a_uid,a_cid,a_date,a_text
3254,18446,869,217812,1551498746,How do I interpret the relationship between th...,1346,217812,22034,1551731181,Yes! Perfect. Thank you! Speaking of a nod...
3255,18446,869,217812,1551498746,How do I interpret the relationship between th...,1346,1,22035,1551732030,that's right!
3256,18446,869,217812,1551498746,How do I interpret the relationship between th...,1346,217812,22036,1551732144,"Very likable indeed. Thanks, @warren!"
3257,18446,869,217812,1551498746,How do I interpret the relationship between th...,0,1,23322,1551730472,Hi @bsugar!\r\n\r\n 1) While there are answer ...


### CREATE EDGELIST

#### ADD EDGE DIRECTIONALITY FOR IN/OUT DEGREE

In [478]:
q_and_a['edge_dir'] = q_and_a.apply(set_edge_direction, axis=1)
q_and_a.head()
# q_and_a.loc[q_and_a['nid'] == 18446]

Unnamed: 0,nid,q_id,q_uid,q_date,q_text,a_id,a_uid,a_cid,a_date,a_text,edge_dir
0,11516,1,433692,1421053078,Is it possible to log the data from a dustduin...,0,304279,11037,1421161394,"Hi KP55, \r\nI think that the first thing you...",1
1,11516,1,433692,1421053078,Is it possible to log the data from a dustduin...,0,1,11391,1427129027,I agree that a non-wifi DustDuino variant woul...,1
2,11516,1,433692,1421053078,Is it possible to log the data from a dustduin...,0,1,22929,1518817538,There's now a way to collect data from a #dust...,1
3,11524,2,433790,1421278243,Is the dustduino (with Shinyei sensor) appropr...,0,4,11043,1421342622,currently the dust counts for the dustduino ar...,1
4,11526,3,433849,1421429978,How can I test my tap water for BPA,0,4,11064,1421795312,That is a good question. I'm not sure what as...,1


In [578]:
q_edges = q_and_a[['q_uid', 'q_date', 'q_text', 'a_uid', 'a_date', 'a_text', 'edge_dir']].copy()
q_edges.columns = ['source', 's_date', 's_text', 'target', 't_date', 't_text', 'edge_dir']
q_edges[4:15]

Unnamed: 0,source,s_date,s_text,target,t_date,t_text,edge_dir
4,433849,1421429978,How can I test my tap water for BPA,4,1421795312,That is a good question. I'm not sure what as...,1
5,433849,1421429978,How can I test my tap water for BPA,524546,1517961102,Might want to check for answers here? https://...,1
6,4,1422557787,dealing with patents: Black & Decker and the T...,8,1422654308,"Interesting Matt, how did you find out about t...",1
7,4,1422557787,dealing with patents: Black & Decker and the T...,4,1422654623,"Well, I saw the device and did a patent search...",-1
8,4,1422557787,dealing with patents: Black & Decker and the T...,43651,1422716715,I myself am really curious about this issue in...,1
9,4,1422557787,dealing with patents: Black & Decker and the T...,1,1422719975,Here are some potentially helpful notes from O...,1
10,4,1422557787,dealing with patents: Black & Decker and the T...,53164,1422724009,I'm also very curious to see how this progresses.,1
11,4,1422557787,dealing with patents: Black & Decker and the T...,1,1424729885,"So, does priority date allow them to back-date...",1
12,435436,1424457388,Is the project still active?,4,1424903115,"I don't believe this project is still active, ...",1
13,435436,1424457388,Is the project still active?,443858,1434435160,Great project. The project fruit will benefit ...,1


In [579]:
neg_one = edges.loc[q_edges['edge_dir'] == -1].copy()
neg_one.head()

Unnamed: 0,source,s_date,s_text,target,t_date,t_text,edge_dir
7,4,1422557787,dealing with patents: Black & Decker and the T...,4,1422654623,"Well, I saw the device and did a patent search...",-1
15,328260,1424795839,NDVI in realtime video,328260,1424796168,Another thing that confuses me a little bit mo...,-1
17,328260,1424795839,NDVI in realtime video,328260,1424805532,"cfastie Great, in that case the formula that I...",-1
24,432132,1426707115,Question: HELP NEEDED,432132,1426713736,Thanks Chris for all the material that you cre...,-1
32,435648,1427020365,Question: purchase sensor,435648,1427189401,Thanks for replying...well by my house there i...,-1


In [580]:
neg_one = neg_one[[u'target', u't_date', u't_text', u'source', u's_date', u's_text']]
neg_one.head()

Unnamed: 0,target,t_date,t_text,source,s_date,s_text
7,4,1422654623,"Well, I saw the device and did a patent search...",4,1422557787,dealing with patents: Black & Decker and the T...
15,328260,1424796168,Another thing that confuses me a little bit mo...,328260,1424795839,NDVI in realtime video
17,328260,1424805532,"cfastie Great, in that case the formula that I...",328260,1424795839,NDVI in realtime video
24,432132,1426713736,Thanks Chris for all the material that you cre...,432132,1426707115,Question: HELP NEEDED
32,435648,1427189401,Thanks for replying...well by my house there i...,435648,1427020365,Question: purchase sensor


In [581]:
neg_one.columns = [u'source', u's_date', u's_text', u'target', u't_date', u't_text']
neg_one.head()

Unnamed: 0,source,s_date,s_text,target,t_date,t_text
7,4,1422654623,"Well, I saw the device and did a patent search...",4,1422557787,dealing with patents: Black & Decker and the T...
15,328260,1424796168,Another thing that confuses me a little bit mo...,328260,1424795839,NDVI in realtime video
17,328260,1424805532,"cfastie Great, in that case the formula that I...",328260,1424795839,NDVI in realtime video
24,432132,1426713736,Thanks Chris for all the material that you cre...,432132,1426707115,Question: HELP NEEDED
32,435648,1427189401,Thanks for replying...well by my house there i...,435648,1427020365,Question: purchase sensor


In [582]:
pos_one = q_edges.loc[edges['edge_dir'] == 1].copy()
pos_one.drop('edge_dir', axis=1, inplace=True)
pos_one.head()

Unnamed: 0,source,s_date,s_text,target,t_date,t_text
0,433692,1421053078,Is it possible to log the data from a dustduin...,304279,1421161394,"Hi KP55, \r\nI think that the first thing you..."
1,433692,1421053078,Is it possible to log the data from a dustduin...,1,1427129027,I agree that a non-wifi DustDuino variant woul...
2,433692,1421053078,Is it possible to log the data from a dustduin...,1,1518817538,There's now a way to collect data from a #dust...
3,433790,1421278243,Is the dustduino (with Shinyei sensor) appropr...,4,1421342622,currently the dust counts for the dustduino ar...
4,433849,1421429978,How can I test my tap water for BPA,4,1421795312,That is a good question. I'm not sure what as...


In [583]:
edges = pd.concat([pos_one, neg_one])
edges.sort_index(inplace=True)
edges[4:15]

Unnamed: 0,source,s_date,s_text,target,t_date,t_text
4,433849,1421429978,How can I test my tap water for BPA,4,1421795312,That is a good question. I'm not sure what as...
5,433849,1421429978,How can I test my tap water for BPA,524546,1517961102,Might want to check for answers here? https://...
6,4,1422557787,dealing with patents: Black & Decker and the T...,8,1422654308,"Interesting Matt, how did you find out about t..."
7,4,1422654623,"Well, I saw the device and did a patent search...",4,1422557787,dealing with patents: Black & Decker and the T...
8,4,1422557787,dealing with patents: Black & Decker and the T...,43651,1422716715,I myself am really curious about this issue in...
9,4,1422557787,dealing with patents: Black & Decker and the T...,1,1422719975,Here are some potentially helpful notes from O...
10,4,1422557787,dealing with patents: Black & Decker and the T...,53164,1422724009,I'm also very curious to see how this progresses.
11,4,1422557787,dealing with patents: Black & Decker and the T...,1,1424729885,"So, does priority date allow them to back-date..."
12,435436,1424457388,Is the project still active?,4,1424903115,"I don't believe this project is still active, ..."
13,435436,1424457388,Is the project still active?,443858,1434435160,Great project. The project fruit will benefit ...


In [584]:
q_edges[4:15]

Unnamed: 0,source,s_date,s_text,target,t_date,t_text,edge_dir
4,433849,1421429978,How can I test my tap water for BPA,4,1421795312,That is a good question. I'm not sure what as...,1
5,433849,1421429978,How can I test my tap water for BPA,524546,1517961102,Might want to check for answers here? https://...,1
6,4,1422557787,dealing with patents: Black & Decker and the T...,8,1422654308,"Interesting Matt, how did you find out about t...",1
7,4,1422557787,dealing with patents: Black & Decker and the T...,4,1422654623,"Well, I saw the device and did a patent search...",-1
8,4,1422557787,dealing with patents: Black & Decker and the T...,43651,1422716715,I myself am really curious about this issue in...,1
9,4,1422557787,dealing with patents: Black & Decker and the T...,1,1422719975,Here are some potentially helpful notes from O...,1
10,4,1422557787,dealing with patents: Black & Decker and the T...,53164,1422724009,I'm also very curious to see how this progresses.,1
11,4,1422557787,dealing with patents: Black & Decker and the T...,1,1424729885,"So, does priority date allow them to back-date...",1
12,435436,1424457388,Is the project still active?,4,1424903115,"I don't believe this project is still active, ...",1
13,435436,1424457388,Is the project still active?,443858,1434435160,Great project. The project fruit will benefit ...,1


In [593]:
# edges['s_date'] = pd.to_datetime(edges['s_date'], unit='s').dt.strftime('%Y-%m-%d')
# edges['t_date'] = pd.to_datetime(edges['t_date'], unit='s').dt.strftime('%Y-%m-%d')

edges.to_csv('../data/2016_2019/full/qa_edges.csv', index=False)
edges.head()

Unnamed: 0,source,s_date,s_text,target,t_date,t_text
0,433692,2015-01-12,Is it possible to log the data from a dustduin...,304279,2015-01-13,"Hi KP55, \r\nI think that the first thing you..."
1,433692,2015-01-12,Is it possible to log the data from a dustduin...,1,2015-03-23,I agree that a non-wifi DustDuino variant woul...
2,433692,2015-01-12,Is it possible to log the data from a dustduin...,1,2018-02-16,There's now a way to collect data from a #dust...
3,433790,2015-01-14,Is the dustduino (with Shinyei sensor) appropr...,4,2015-01-15,currently the dust counts for the dustduino ar...
4,433849,2015-01-16,How can I test my tap water for BPA,4,2015-01-20,That is a good question. I'm not sure what as...


#### NODELIST

In [None]:
# NAKE A NODE LIST WHERE -1 IS ADDED FOR MAKING THE QUESTION.  SO IN THE ABOVE, I'VE ACTUALLY PARTICIPATED 5 TIMES 
# IF I INCLUDE THE FIRST QUESTION, WHICH I SHOULD!

In [97]:
q_nodes = q_and_a[['nid', 'q_id', 'q_uid','q_date', 'q_text']].copy()
q_nodes.head()

Unnamed: 0,nid,q_id,q_uid,q_date,q_text
0,11516,1,433692,1421053078,Is it possible to log the data from a dustduin...
1,11516,1,433692,1421053078,Is it possible to log the data from a dustduin...
2,11516,1,433692,1421053078,Is it possible to log the data from a dustduin...
3,11524,2,433790,1421278243,Is the dustduino (with Shinyei sensor) appropr...
4,11526,3,433849,1421429978,How can I test my tap water for BPA


#### TOTAL QUESTIONS ASKED

In [155]:
questions.loc[~questions['nid'].duplicated()].shape[0]

902

#### TOTAL RESPONSES INCLUDING OP'S

In [245]:
q_and_a.shape[0]

3356

#### COUNT OF QUESTIONS WITH RESPONSES

In [232]:
q_unq = q_nodes.loc[~q_nodes['nid'].duplicated()]
print q_unq.shape[0]
q_unq.head()

798


Unnamed: 0,nid,q_id,q_uid,q_date,q_text
0,11516,1,433692,1421053078,Is it possible to log the data from a dustduin...
3,11524,2,433790,1421278243,Is the dustduino (with Shinyei sensor) appropr...
4,11526,3,433849,1421429978,How can I test my tap water for BPA
6,11545,4,4,1422557787,dealing with patents: Black & Decker and the T...
12,11610,5,435436,1424457388,Is the project still active?


#### COUNT OF QUESTIONS W/O RESPONSES

In [339]:
q_without_resp = questions.loc[~questions['nid'].isin(q_unq['nid'])]
print q_without_resp.shape[0]
q_without_resp.head()

104


Unnamed: 0,nid,uid,qid,date,user_cmnts,thread_cmnts,likes,type,title
16,11946,443091,17,1433350344,2,0,0,qstn,Question: Colorimetry using the desktop spectr...
17,11953,443196,18,1433708737,2,0,0,qstn,Question: Stray light correction
25,12135,420458,26,1439246576,2,0,0,qstn,Question:
48,12767,468156,49,1456950440,2,0,0,qstn,Question: RE: Desktop Spectrometry Kit
120,13636,451398,121,1477067715,2,0,1,qstn,How does bio-waste move within the waste stream?


#### COUNT OF UNIQUE USERS WHO ASKED QUESTIONS

In [341]:
q_ask_unq_user = pd.DataFrame(q_unq.groupby('q_uid').size())
q_ask_unq_user.reset_index(inplace=True)#.head()
q_ask_unq_user.columns = ['uid', 'q_count']
q_ask_unq_user.head()

Unnamed: 0,uid,q_count
0,1,114
1,4,7
2,7,23
3,8,1
4,160,1


#### COUNTS OF UNIQUE USERS WHO ASKED QUESTIONS W/O A RESPONSE

In [343]:
q_without_resp.head()

Unnamed: 0,nid,uid,qid,date,user_cmnts,thread_cmnts,likes,type,title
16,11946,443091,17,1433350344,2,0,0,qstn,Question: Colorimetry using the desktop spectr...
17,11953,443196,18,1433708737,2,0,0,qstn,Question: Stray light correction
25,12135,420458,26,1439246576,2,0,0,qstn,Question:
48,12767,468156,49,1456950440,2,0,0,qstn,Question: RE: Desktop Spectrometry Kit
120,13636,451398,121,1477067715,2,0,1,qstn,How does bio-waste move within the waste stream?


In [344]:
q_without_resp = pd.DataFrame(q_without_resp.groupby('uid').size())
q_without_resp.reset_index(inplace=True)#.head()
q_without_resp.columns = ['uid', 'q_count']
print q_without_resp.shape[0]
q_without_resp.head()

65


Unnamed: 0,uid,q_count
0,1,9
1,7,7
2,45586,1
3,237313,1
4,380298,1


#### COUNT OF "ANSWERS" BY USERS *EXCLUDING* THE ORIGINAL QUESTIONER

In [330]:
a_nodes = q_and_a[['nid', 'a_id', 'a_uid','a_date', 'a_text', 'edge_dir']].copy()
a_resp_no_q = a_nodes.loc[a_nodes['edge_dir'] == 1]
a_resp_no_q = pd.DataFrame(a_resp_no_q.groupby('a_uid').size())
a_resp_no_q.reset_index(inplace=True)#.head()

# a_resp_no_q.head()
a_resp_no_q.columns = ['uid', 'a_count']
print a_resp_no_q.shape[0]
a_resp_no_q.head()

307


Unnamed: 0,uid,a_count
0,1,687
1,4,28
2,7,144
3,8,1
4,9,2


#### COUNT OF ALL RESPONSES FROM QUESTIONER *EXCLUDING* ORIGINAL QUESTION

In [331]:
a_resp_from_q = a_nodes.loc[a_nodes['edge_dir'] == -1]
a_resp_from_q = pd.DataFrame(a_resp_from_q.groupby('a_uid').size())
# a_resp_from_q.head()
a_resp_from_q.reset_index(inplace=True)
a_resp_from_q.columns = ['uid', 'q_disc_count']
print a_resp_from_q.shape[0]
a_resp_from_q.head()

219


Unnamed: 0,uid,q_disc_count
0,1,290
1,4,7
2,7,13
3,160,5
4,554,12


In [388]:
q_and_a.head()

Unnamed: 0,nid,q_id,q_uid,q_date,q_text,a_id,a_uid,a_cid,a_date,a_text,edge_dir
0,11516,1,433692,1421053078,Is it possible to log the data from a dustduin...,0,304279,11037,1421161394,"Hi KP55, \r\nI think that the first thing you...",1
1,11516,1,433692,1421053078,Is it possible to log the data from a dustduin...,0,1,11391,1427129027,I agree that a non-wifi DustDuino variant woul...,1
2,11516,1,433692,1421053078,Is it possible to log the data from a dustduin...,0,1,22929,1518817538,There's now a way to collect data from a #dust...,1
3,11524,2,433790,1421278243,Is the dustduino (with Shinyei sensor) appropr...,0,4,11043,1421342622,currently the dust counts for the dustduino ar...,1
4,11526,3,433849,1421429978,How can I test my tap water for BPA,0,4,11064,1421795312,That is a good question. I'm not sure what as...,1


In [94]:
# NUMBER OF FIRST RESPONSES
q_and_a.loc[q_and_a['a_id'] == 0].shape[0]

2553

In [346]:
node_dfs = [q_ask_unq_users, q_without_resp, a_resp_no_q, a_resp_from_q, desc_count_unq_user]
col_names = ['9_7', '9_8',  '9_9', '9_10', '9_11']
nodes = pd.concat(node_dfs, axis=1, names=col_names, join='outer', ignore_index=True)
print nodes.shape[0]
nodes.head(10)

405


Unnamed: 0,0,1
0,1,114
1,4,7
2,7,23
3,8,1
4,160,1
5,554,4
6,1083,1
7,45586,1
8,46742,1
9,46795,1


In [431]:
nodes_9_7_9_8 = pd.merge(q_ask_unq_users, q_without_resp, on='uid', how='outer', indicator=True)
print nodes_9_7_9_8.shape[0]
nodes_9_7_9_8.head()

436


Unnamed: 0,uid,q_count_x,q_count_y,_merge
0,1,114.0,9.0,both
1,4,7.0,,left_only
2,7,23.0,7.0,both
3,8,1.0,,left_only
4,160,1.0,,left_only


In [432]:
nodes_9_9_9_10 = pd.merge(a_resp_from_q, a_resp_no_q, on='uid', how='outer', indicator=True)
print nodes_9_9_9_10.shape[0]
nodes_9_9_9_10.head()

463


Unnamed: 0,uid,q_disc_count,a_count,_merge
0,1,290.0,687.0,both
1,4,7.0,28.0,both
2,7,13.0,144.0,both
3,160,5.0,1.0,both
4,554,12.0,239.0,both


In [453]:
nodes_9_7_9_8 = pd.merge(q_ask_unq_users, q_without_resp, on='uid', how='outer', indicator=True)
nodes_9_7_9_8.drop(['_merge'], axis=1, inplace=True)

nodes_9_9_9_10 = pd.merge(a_resp_from_q, a_resp_no_q, on='uid', how='outer', indicator=True)
nodes_9_9_9_10.drop(['_merge'], axis=1, inplace=True)

nodes_97_910 = pd.merge(nodes_9_7_9_8, nodes_9_9_9_10, on='uid', how='outer', indicator=True)
nodes_97_910.drop(['_merge'], axis=1, inplace=True)

nodes = pd.merge(nodes_97_910, desc_count_unq_user, on='uid', how='outer', indicator=True)
nodes.drop(['_merge'], axis=1, inplace=True)
nodes.fillna(0, axis=1, inplace=True)

nodes.drop('disc_count', axis=1, inplace=True)
nodes.reset_index(drop=True, inplace=True)
nodes.head()

Unnamed: 0,uid,q_count_x,q_count_y,q_disc_count,a_count
0,1,114.0,9.0,290.0,687.0
1,4,7.0,0.0,7.0,28.0
2,7,23.0,7.0,13.0,144.0
3,8,1.0,0.0,0.0,1.0
4,160,1.0,0.0,5.0,1.0


In [454]:
nodes.columns = ['uid', 'q_w_r', 'q_wo_r', 'r_not_op', 'r_as_op']

# BECAUSE ALL_R_NO_Q is wrong.
nodes['total_q'] = nodes['q_w_r'] + nodes['q_wo_r']
nodes['total_r'] = nodes['r_not_op'] + nodes['r_as_op']
nodes['total_activity'] = nodes['total_q'] + nodes['total_r']

nodes.head()

Unnamed: 0,uid,q_w_r,q_wo_r,r_not_op,r_as_op,total_q,total_r,total_activity
0,1,114.0,9.0,290.0,687.0,123.0,977.0,1100.0
1,4,7.0,0.0,7.0,28.0,7.0,35.0,42.0
2,7,23.0,7.0,13.0,144.0,30.0,157.0,187.0
3,8,1.0,0.0,0.0,1.0,1.0,1.0,2.0
4,160,1.0,0.0,5.0,1.0,1.0,6.0,7.0


In [455]:
nodes['ask_ratio'] = nodes['total_q']/(nodes['total_q'] + nodes['total_r'])
nodes['ans_ratio'] = nodes['total_r']/(nodes['total_q'] + nodes['total_r'])

nodes.head()

Unnamed: 0,uid,q_w_r,q_wo_r,r_not_op,r_as_op,total_q,total_r,total_activity,ask_ratio,ans_ratio
0,1,114.0,9.0,290.0,687.0,123.0,977.0,1100.0,0.111818,0.888182
1,4,7.0,0.0,7.0,28.0,7.0,35.0,42.0,0.166667,0.833333
2,7,23.0,7.0,13.0,144.0,30.0,157.0,187.0,0.160428,0.839572
3,8,1.0,0.0,0.0,1.0,1.0,1.0,2.0,0.5,0.5
4,160,1.0,0.0,5.0,1.0,1.0,6.0,7.0,0.142857,0.857143


In [459]:
for col in nodes.columns[1:8]:
    print col

q_w_r
q_wo_r
r_not_op
r_as_op
total_q
total_r
total_activity


In [460]:
for col in nodes.columns[1:8]:
    nodes[col] = nodes[col].astype('int64')

nodes.head()

Unnamed: 0,uid,q_w_r,q_wo_r,r_not_op,r_as_op,total_q,total_r,total_activity,ask_ratio,ans_ratio
0,1,114,9,290,687,123,977,1100,0.111818,0.888182
1,4,7,0,7,28,7,35,42,0.166667,0.833333
2,7,23,7,13,144,30,157,187,0.160428,0.839572
3,8,1,0,0,1,1,1,2,0.5,0.5
4,160,1,0,5,1,1,6,7,0.142857,0.857143


In [462]:
nodes.to_csv('../data/2016_2019/full/qa_nodes.csv', index=False)

In [427]:
def ask_or_ans(row):
    
    total_questions = float(row['total_q'])
    responses = float(row['r_not_op'])
    
    if total_questions and responses:
        
        ratio = float(total_questions)/responses
        
    elif row['total_q'] and not row['r_not_op']:
        
        ratio = 0.0
        
    elif not row['total_q'] and row['r_not_op']:
        
        ratio = 1.0
        
    return ratio

d = {'total_q': 651, 'r_not_op' : 0 }

ask_or_ans(nodes.loc[nodes['uid'] == 1])

0.9943931688991793

In [277]:
COUNT OF UNIQUE USERS ASKING QUESTIONS W/ RESPONSES: 405
DESCRIPTION OF NUMBER OF QUESTIONS EACH: MEAN: 1.97037037037 MEDIAN: 1.0 STD: 6.98932078678

COUNT OF UNIQUE USERS ASKING QUESTIONS W/O RESPONSES: 65
DESCRIPTION OF NUMBER OF QUESTIONS EACH: MEAN: 1.6 MEDIAN: 1.0 STD: 1.71208936683

COUNT OF UNIQUE USERS RESPONDING AND NOT OP: 307
DESCRIPTION OF NUMBER OF RESPONSES EACH: MEAN: 7.45928338762 MEDIAN: 1.0 STD: 43.1315149874

RESPONSES FROM OP: 219
DESCRIPTION OF NUMBER OF RESPONSES EACH: MEAN: 4.86757990868 MEDIAN: 2.0 STD: 20.4276705459

COUNT OF UNIQUE RESPONDERS INCLUDING OP BUT W/O ORIGINAL QUESTION: 627
DESCRIPTION OF NUMBER OF RESPONSES EACH: MEAN: 10.7049441786 MEDIAN: 3.0 STD: 67.0944498387


Index([u'q_uid', u'q_count'], dtype='object')

### SUMMARY STATSTICS

In [273]:
print 'TOTAL QUESTIONS ASKED:', questions.shape[0] # 9.3
print 'TOTAL RESPONSES:', q_and_a.shape[0] # 9.4
print ''
print 'QUESTIONS WITH RESPONSES:', q_nodes['nid'].unique().shape[0] # 9.5
print 'QUESTIONS W/O RESPONSES:', q_without_resp.shape[0] # 9.6
print ''
print 'COUNT OF UNIQUE USERS ASKING QUESTIONS W/ RESPONSES:',  q_ask_unq_users.shape[0]
print 'DESCRIPTION OF NUMBER OF QUESTIONS EACH:', 'MEAN:', q_ask_unq_users['q_count'].mean(), \
                                                  'MEDIAN:', q_ask_unq_users['q_count'].median(), \
                                                  'STD:', q_ask_unq_users['q_count'].std()
print ''
print 'COUNT OF UNIQUE USERS ASKING QUESTIONS W/O RESPONSES:',  q_without_resp.shape[0]
print 'DESCRIPTION OF NUMBER OF QUESTIONS EACH:', 'MEAN:', q_without_resp['q_count'].mean(), \
                                                  'MEDIAN:', q_without_resp['q_count'].median(), \
                                                  'STD:', q_without_resp['q_count'].std()
print ''
        
print 'COUNT OF UNIQUE USERS RESPONDING AND NOT OP:', a_resp_no_q.shape[0]
print 'DESCRIPTION OF NUMBER OF RESPONSES EACH:', 'MEAN:', a_resp_no_q['a_count'].mean(), \
                                                  'MEDIAN:', a_resp_no_q['a_count'].median(), \
                                                  'STD:', a_resp_no_q['a_count'].std()
print ''

print 'RESPONSES FROM OP:', a_resp_from_q.shape[0] #9.8 
print 'DESCRIPTION OF NUMBER OF RESPONSES EACH:', 'MEAN:', a_resp_from_q['q_disc_count'].mean(), \
                                                  'MEDIAN:', a_resp_from_q['q_disc_count'].median(), \
                                                  'STD:', a_resp_from_q['q_disc_count'].std()

        
print ''

print 'COUNT OF UNIQUE RESPONDERS INCLUDING OP:', desc_count_unq_user.shape[0] #9.9
print 'DESCRIPTION OF NUMBER OF RESPONSES EACH:', 'MEAN:', desc_count_unq_user['disc_count'].mean(), \
                                                  'MEDIAN:', desc_count_unq_user['disc_count'].median(), \
                                                  'STD:', desc_count_unq_user['disc_count'].std()
        
NUMBER OF POSTS QUESTIONS USER HAS ASKED/TOTAL NO OP ENGAGEMENT POSTS + COMMENTS 



TOTAL QUESTIONS ASKED: 902
TOTAL RESPONSES: 3356

QUESTIONS WITH RESPONSES: 798
QUESTIONS W/O RESPONSES: 65

COUNT OF UNIQUE USERS ASKING QUESTIONS W/ RESPONSES: 405
DESCRIPTION OF NUMBER OF QUESTIONS EACH: MEAN: 1.97037037037 MEDIAN: 1.0 STD: 6.98932078678

COUNT OF UNIQUE USERS ASKING QUESTIONS W/O RESPONSES: 65
DESCRIPTION OF NUMBER OF QUESTIONS EACH: MEAN: 1.6 MEDIAN: 1.0 STD: 1.71208936683

COUNT OF UNIQUE USERS RESPONDING AND NOT OP: 307
DESCRIPTION OF NUMBER OF RESPONSES EACH: MEAN: 7.45928338762 MEDIAN: 1.0 STD: 43.1315149874

RESPONSES FROM OP: 219
DESCRIPTION OF NUMBER OF RESPONSES EACH: MEAN: 4.86757990868 MEDIAN: 2.0 STD: 20.4276705459

COUNT OF UNIQUE RESPONDERS INCLUDING OP: 627
DESCRIPTION OF NUMBER OF RESPONSES EACH: MEAN: 10.7049441786 MEDIAN: 3.0 STD: 67.0944498387


In [38]:
QUESTIONS MERGED WITH ANSWERS: q_and_a
TOTAL Q: 902, TOTAL A: 1081
LQ: 346, RA: 29, BOTH: 1052

NOTES MERGED WITH ANSWERS: q_and_a_from_n
TOTAL NOTES: 3155, TOTAL ANS: 1081 
LNQ: 2597 RA: 21 BOTH: 1060

QUESTIONS MERGED WITH COMMENTS (ANSWERS) q_and_a_from_c
TOTAL QUESTS: 902 TOTAL CMTS: 11636
LQ: 104 RAC: 8280 BOTH: 3356

NOTES (QUESTIONS) MERGED WITH COMMENTS (ANSWERS): qn_and_ac
TOTAL NOTES: 3155 TOTAL CMTS: 11636 
LNQ: 1032 RAC: 1101 BOTH: 10535

In [37]:
qnids = list(questions['nid'])
q_resp = comments.loc[(comments['nid'].isin(qnids))]
q_resp_top_thread = comments.loc[(comments['nid'].isin(qnids)) & (comments['reply_to'] == 0)]
q_resp_sub_threads = comments.loc[(comments['nid'].isin(qnids)) & (comments['reply_to'] != 0)]
print 'TOTAL COMMENTS:', comments.shape[0]
print 'TOTAL COMMENTS RELATED TO A QUESTION:', q_resp.shape[0]
print 'TOTAL COMMENTS RELATED TO A QUESTION AND IN TOP THREAD:', q_resp_top_thread.shape[0]
print 'TOTAL COMMENTS RELATED TO A QUESTION AND IN A SUB THREAD:', q_resp_sub_threads.shape[0]
print 'TOTAL ANSWERS WHEN QUETIONS ARE MERGED WITH COMMENTS:', qac_both.shape[0]

TOTAL COMMENTS: 11636
TOTAL COMMENTS RELATED TO A QUESTION: 3356
TOTAL COMMENTS RELATED TO A QUESTION AND IN TOP THREAD: 2520
TOTAL COMMENTS RELATED TO A QUESTION AND IN A SUB THREAD: 836
TOTAL ANSWERS WHEN QUETIONS ARE MERGED WITH COMMENTS: 3356


#### EXAMPLE MISSING DATA

In [149]:
notes[notes['nid'] == 13836]

Unnamed: 0,nid,uid,date,comments,likes,type,title
1417,13836,7,1483554758,0,0,note,Is using iodine a good way to determine if ric...


In [150]:
questions[questions['nid'] == 13836]

Unnamed: 0,nid,uid,qid,date,user_cmnts,thread_cmnts,likes,type,title
169,13836,7,170,1483554758,2,0,0,qstn,Is using iodine a good way to determine if ric...


In [151]:
answers[answers['nid'] == 13836]

Unnamed: 0,nid,uid,aid,created_at,likes,type,content,accepted


In [152]:
comments[comments['nid'] == 13836]

Unnamed: 0,nid,uid,cid,aid,date,type,thread,comment,reply_to
