# Use Stack Overflow API to get some R packages

[demo](https://api.stackexchange.com/docs/tags-by-name#order=desc&sort=popular&tags=r&filter=default&site=stackoverflow&run=true)

[more complex searches](https://api.stackexchange.com/docs/search#order=desc&sort=creation&tagged=pebble-watch%3Bpebble-sdk%3Bcloudpebble&filter=default&site=stackoverflow&run=true)

[full docs](https://api.stackexchange.com/docs)

In [24]:
import requests
import pandas as pd
import logging
%matplotlib inline

In [66]:
logging.basicConfig(level=logging.INFO)

In [85]:
GLOBAL_PARAMS = {
    "site" : "stackoverflow",
    "key" : "y38PeNERQJQIC8EPliKAVQ(("
}

## Get overall site info
You may need to add API key if you reach limits. Find 'GLOBAL' in this notebook.

In [69]:
# MAIN SITE INFO
INFOURL = "https://api.stackexchange.com/2.2/info"

params = {
  "site" : "stackoverflow",
  }

r = requests.get(INFOURL, params=params)

In [70]:
r.json()

{u'error_id': 502,
 u'error_message': u'too many requests from this IP, more requests available in 12423 seconds',
 u'error_name': u'throttle_violation'}

## Counts based on individual tags

In [71]:
# SO api is NOT case-sensitive
package_list = ['dplyr', 'digest', 'ggplot', 'ggplot2', 'rcpp', 'magrittr', 'caret']
package_list

['dplyr', 'digest', 'ggplot', 'ggplot2', 'rcpp', 'magrittr', 'caret']

In [89]:
def get_tag_counts(tag_list):
    """"Given tag list, return tag counts as json"""
    
    formatted_tags = ';'.join(tag_list)
    url = "https://api.stackexchange.com/2.2/tags/" + formatted_tags + "/info"
  
    try:
        r = requests.get(url, params=GLOBAL_PARAMS)
        if r.json()['has_more']:
            print "WARNING: Request has more data than is not shown here."
        return r.json()['items']
    except:
        logging.warning("Error in response.")

In [90]:
tag_counts = get_tag_counts(package_list)

In [91]:
pd.DataFrame(tag_counts)

Unnamed: 0,count,has_synonyms,is_moderator_only,is_required,name
0,14979,True,False,False,ggplot2
1,4149,False,False,False,dplyr
2,1269,False,False,False,rcpp
3,556,False,False,False,caret
4,385,False,False,False,digest
5,120,False,False,False,magrittr


In [None]:
tag_url = "https://api.stackexchange.com/2.2/tags/"


for i in range(1):
    two_tags = str('r;' + package_list[i])
    url = tag_url + two_tags + "/info?site=stackoverflow"
  
    r = requests.get(url)
    print r.json()
    print r.json()['items']

## Counts based on presence in question body

In [133]:
def get_body_count(body_string, tag=None):
    """Given ONE string, return number of SO questions containing it
  
    possibly tagged with TAG
    uses filter=total to return counts only
    """
    
    baseurl = 'https://api.stackexchange.com/2.2/search/advanced'
    
    params = {
        'q': body_string,
        'filter': 'total',
    }
    
    params.update(GLOBAL_PARAMS)
    if tag:
        params.update({'tagged': tag})

    r = requests.get(baseurl, params=params)
    return r.json()

In [129]:
question_body_counts = { item: get_body_count(item) for item in package_list}

https://api.stackexchange.com/2.2/search/advanced?q=dplyr&filter=total&site=stackoverflow&key=y38PeNERQJQIC8EPliKAVQ%28%28
https://api.stackexchange.com/2.2/search/advanced?q=digest&filter=total&site=stackoverflow&key=y38PeNERQJQIC8EPliKAVQ%28%28
https://api.stackexchange.com/2.2/search/advanced?q=ggplot&filter=total&site=stackoverflow&key=y38PeNERQJQIC8EPliKAVQ%28%28
https://api.stackexchange.com/2.2/search/advanced?q=ggplot2&filter=total&site=stackoverflow&key=y38PeNERQJQIC8EPliKAVQ%28%28
https://api.stackexchange.com/2.2/search/advanced?q=rcpp&filter=total&site=stackoverflow&key=y38PeNERQJQIC8EPliKAVQ%28%28
https://api.stackexchange.com/2.2/search/advanced?q=magrittr&filter=total&site=stackoverflow&key=y38PeNERQJQIC8EPliKAVQ%28%28
https://api.stackexchange.com/2.2/search/advanced?q=caret&filter=total&site=stackoverflow&key=y38PeNERQJQIC8EPliKAVQ%28%28


In [130]:
question_body_counts

{'caret': {u'total': 12819},
 'digest': {u'total': 14060},
 'dplyr': {u'total': 6355},
 'ggplot': {u'total': 14552},
 'ggplot2': {u'total': 17359},
 'magrittr': {u'total': 410},
 'rcpp': {u'total': 1696}}

## Counts based on presence in question body, with [r] tag

In [131]:
tag = 'r'
question_body_counts = { item: get_body_count(item, tag=tag) for item in package_list}

https://api.stackexchange.com/2.2/search/advanced?q=dplyr&filter=total&tagged=r&site=stackoverflow&key=y38PeNERQJQIC8EPliKAVQ%28%28
https://api.stackexchange.com/2.2/search/advanced?q=digest&filter=total&tagged=r&site=stackoverflow&key=y38PeNERQJQIC8EPliKAVQ%28%28
https://api.stackexchange.com/2.2/search/advanced?q=ggplot&filter=total&tagged=r&site=stackoverflow&key=y38PeNERQJQIC8EPliKAVQ%28%28
https://api.stackexchange.com/2.2/search/advanced?q=ggplot2&filter=total&tagged=r&site=stackoverflow&key=y38PeNERQJQIC8EPliKAVQ%28%28
https://api.stackexchange.com/2.2/search/advanced?q=rcpp&filter=total&tagged=r&site=stackoverflow&key=y38PeNERQJQIC8EPliKAVQ%28%28
https://api.stackexchange.com/2.2/search/advanced?q=magrittr&filter=total&tagged=r&site=stackoverflow&key=y38PeNERQJQIC8EPliKAVQ%28%28
https://api.stackexchange.com/2.2/search/advanced?q=caret&filter=total&tagged=r&site=stackoverflow&key=y38PeNERQJQIC8EPliKAVQ%28%28


In [132]:
question_body_counts

{'caret': {u'total': 1064},
 'digest': {u'total': 226},
 'dplyr': {u'total': 6241},
 'ggplot': {u'total': 14020},
 'ggplot2': {u'total': 17042},
 'magrittr': {u'total': 402},
 'rcpp': {u'total': 1472}}

# Counts based on [r] TAG + question BODY

In [None]:
def get_tag_counts_with_r_tag(tag_list):
    """"Given tag list, return tag counts which appear with the tag 'r'
    
    Returns json"""
    
    
    formatted_tags = ';'.join(tag_list)
    tag_url = "https://api.stackexchange.com/2.2/tags/"
    url = tag_url + formatted_tags + "/info?site=stackoverflow"
  
    r = requests.get(url)
    if r.json()['has_more']:
        logging.warning()"WARNING: Request has more data than is not shown here.")
    return r.json()['items']

# Visualizations

In [91]:
df_body = pd.DataFrame.from_dict(question_body_counts, orient='index')
df_body.reset_index(inplace=True)
df_body.columns = [['name', 'count']]
df_body['type'] = 'in_question_body'
df_body.sort_values(by='count', inplace=True, ascending=False)
df_body.head()

Unnamed: 0,name,count,type
0,caret,3989,in_question_body
1,dplyr,2020,in_question_body
2,ggplot,4290,in_question_body
3,magrittr,58,in_question_body
4,ggplot2,4829,in_question_body


In [92]:
df_tags = pd.DataFrame(tag_counts)[['name', 'count']]
df_tags['type'] = 'tagged'
df_tags.head()

Unnamed: 0,name,count,type
0,ggplot2,14962,tagged
1,dplyr,4139,tagged
2,rcpp,1266,tagged
3,caret,556,tagged
4,digest,383,tagged


In [93]:
df = pd.concat([df_tags, df_body])
df.sort_values(by = ['type', 'count'], inplace=True, ascending=False)
df

Unnamed: 0,name,count,type
0,ggplot2,14962,tagged
1,dplyr,4139,tagged
2,rcpp,1266,tagged
3,caret,556,tagged
4,digest,383,tagged
5,magrittr,119,tagged
6,digest,5143,in_question_body
4,ggplot2,4829,in_question_body
2,ggplot,4290,in_question_body
0,caret,3989,in_question_body


In [94]:
from bokeh.io import output_notebook, show
from bokeh.charts import Bar
from bokeh.charts.attributes import cat


output_notebook()

In [95]:
p = Bar(df, label=cat('name', sort=False), values='count', group='type',
        title="PKGS FTW!", legend='top_right')

show(p)