# Use Stack Overflow API to get some R packages

[demo](https://api.stackexchange.com/docs/tags-by-name#order=desc&sort=popular&tags=r&filter=default&site=stackoverflow&run=true)

[more complex searches](https://api.stackexchange.com/docs/search#order=desc&sort=creation&tagged=pebble-watch%3Bpebble-sdk%3Bcloudpebble&filter=default&site=stackoverflow&run=true)

[full docs](https://api.stackexchange.com/docs)

In [35]:
import requests
import pandas as pd

## Get overall site info

In [14]:
# MAIN SITE INFO
INFOURL = "https://api.stackexchange.com/2.2/info"

params = {
  "site" : "stackoverflow",
  }

r = requests.get(INFOURL, params=params)

In [3]:
r.json()

{u'has_more': False,
 u'items': [{u'answers_per_minute': 4.68,
   u'api_revision': u'2017.1.3.24329',
   u'badges_per_minute': 4.74,
   u'new_active_users': 15,
   u'questions_per_minute': 2.95,
   u'total_accepted': 7112013,
   u'total_answers': 20772648,
   u'total_badges': 21075965,
   u'total_comments': 64380077,
   u'total_questions': 13091520,
   u'total_unanswered': 3666272,
   u'total_users': 6550803,
   u'total_votes': 93869533}],
 u'quota_max': 300,
 u'quota_remaining': 180}

## Counts based on tags

In [31]:
# SO api is NOT case-sensitive
package_list = ['dplyr', 'digest', 'ggplot2', 'rcpp', 'the']
package_list

['dplyr', 'digest', 'ggplot2', 'rcpp', 'the']

In [30]:
TAG_URL = "https://api.stackexchange.com/2.2/tags/"

In [32]:
def get_tag_counts(tag_list):
    """"Given tag list, return counts as json"""
    
    formatted_tags = ';'.join(tag_list)
    url = TAG_URL + formatted_tags + "/info?site=stackoverflow"
  
    r = requests.get(url)
    if r.json()['has_more']:
        print "WARNING: Request has more data than is not shown here."
    return r.json()['items']

In [33]:
tag_counts = get_tag_counts(package_list)

In [36]:
pd.DataFrame(tag_counts)

Unnamed: 0,count,has_synonyms,is_moderator_only,is_required,name
0,14962,True,False,False,ggplot2
1,4137,False,False,False,dplyr
2,1266,False,False,False,rcpp
3,383,False,False,False,digest


## Counts based on presence in question body

In [38]:
def get_body_count(body_string):
    """Given ONE string, return number of SO questions containig it
  
    uses filter=total to return counts only
    """
    url = 'https://api.stackexchange.com/2.2/search/advanced?body=' +\
    body_string + '&site=stackoverflow&filter=total'
    r = requests.get(url)
    return r.json()['total']

In [39]:
question_body_counts = { item: get_body_count(item) for item in package_list}

In [40]:
question_body_counts

{'digest': 5143, 'dplyr': 2018, 'ggplot2': 4829, 'rcpp': 1006, 'the': 11527107}