# Use Stack Overflow API to get some R packages

demo:
https://api.stackexchange.com/docs/tags-by-name#order=desc&sort=popular&tags=r&filter=default&site=stackoverflow&run=true

more complex searches:
https://api.stackexchange.com/docs/search#order=desc&sort=creation&tagged=pebble-watch%3Bpebble-sdk%3Bcloudpebble&filter=default&site=stackoverflow&run=true

docs:
https://api.stackexchange.com/docs

## Get overall site info

In [2]:
# MAIN SITE INFO
import requests

INFOURL = "https://api.stackexchange.com/2.2/info"

params = {
  "site" : "stackoverflow",
  }

r = requests.get(INFOURL, params=params)

In [3]:
r.json()

{u'has_more': False,
 u'items': [{u'answers_per_minute': 4.68,
   u'api_revision': u'2017.1.3.24329',
   u'badges_per_minute': 4.74,
   u'new_active_users': 15,
   u'questions_per_minute': 2.95,
   u'total_accepted': 7112013,
   u'total_answers': 20772648,
   u'total_badges': 21075965,
   u'total_comments': 64380077,
   u'total_questions': 13091520,
   u'total_unanswered': 3666272,
   u'total_users': 6550803,
   u'total_votes': 93869533}],
 u'quota_max': 300,
 u'quota_remaining': 180}

## Get data for SO tags

In [5]:
import pandas as pd

structure:

/2.2/tags/dplyr/info?order=desc&sort=popular&site=stackoverflow

In [43]:
def get_tag_data(tag):
    BASEURL = "https://api.stackexchange.com/2.2/tags/"
  
    params = {
        "site" : "stackoverflow"
    }
  
    r = requests.get(BASEURL + tag + "/info", params=params)
    if r.json()['has_more']:
        print "WARNING: Request has more data that is not shown here."
    return r.json()['items']

In [44]:
my_tags = 'dplyr;ggplot2;r'

pd.DataFrame(get_tag_data(my_tags))

Unnamed: 0,count,has_synonyms,is_moderator_only,is_required,name
0,165750,True,False,False,r
1,14961,True,False,False,ggplot2
2,4137,False,False,False,dplyr


In [10]:
tag_list = pd.read_csv("list-of-packages.txt", header=None)
tag_list = list(tag_list.T[0])

In [45]:
my_tags = "Rcpp;ggplot2;stringr;plyr;dplyr;digest;reshape2;tidyr;colorspace;RColorBrewer;\
manipulate;scales;labeling;proto;munsell;gtable;dichromat;mime;RCurl;magrittr;lubridate;caret"

In [132]:
df = pd.DataFrame(get_tag_data(my_tags))[['name', 'count']]
df['type'] = "tag_count"

In [133]:
df

Unnamed: 0,name,count,type
0,ggplot2,14961,tag_count
1,dplyr,4137,tag_count
2,plyr,1739,tag_count
3,mime,1398,tag_count
4,rcpp,1266,tag_count
5,reshape2,640,tag_count
6,caret,556,tag_count
7,tidyr,508,tag_count
8,rcurl,486,tag_count
9,lubridate,403,tag_count


## Counts in question body

In [142]:
# SO api is NOT case-sensitive
tag_set = ['dplyr', 'digest', 'ggplot2', 'rcpp', 'the']

In [150]:
def get_count(body_string):
    """Given string, find it in body of all SO questions, and return count
    
    Uses filter=total to return count
    """
    r = requests.get('https://api.stackexchange.com/2.2/search/advanced?body=' + body_string + '&site=stackoverflow&filter=total')
    return r.json()['total']

In [148]:
counts_in_question_body = { item: get_count(item) for item in tag_set}

{u'total': 2018}
{u'total': 5143}
{u'total': 4829}
{u'total': 1006}
{u'total': 9875030}


In [149]:
counts_in_question_body

{'a': 9875030, 'digest': 5143, 'dplyr': 2018, 'ggplot2': 4829, 'rcpp': 1006}