# Use Stack Overflow API to get some R packages

[demo](https://api.stackexchange.com/docs/tags-by-name#order=desc&sort=popular&tags=r&filter=default&site=stackoverflow&run=true)

[more complex searches](https://api.stackexchange.com/docs/search#order=desc&sort=creation&tagged=pebble-watch%3Bpebble-sdk%3Bcloudpebble&filter=default&site=stackoverflow&run=true)

[full docs](https://api.stackexchange.com/docs)

In [4]:
import requests
import pandas as pd
import logging
%matplotlib inline

In [5]:
logging.basicConfig(level=logging.INFO)

In [6]:
GLOBAL_PARAMS = {
    "site" : "stackoverflow",
    "key" : "y38PeNERQJQIC8EPliKAVQ(("
}

## Get overall site info
You may need to add API key if you reach limits. Find 'GLOBAL' in this notebook.

In [7]:
# MAIN SITE INFO
INFOURL = "https://api.stackexchange.com/2.2/info"

params = {
  "site" : "stackoverflow",
  }

r = requests.get(INFOURL, params=params)

In [8]:
r.json()

{u'has_more': False,
 u'items': [{u'answers_per_minute': 4.68,
   u'api_revision': u'2017.1.11.24452',
   u'badges_per_minute': 4.75,
   u'new_active_users': 16,
   u'questions_per_minute': 2.95,
   u'total_accepted': 7122393,
   u'total_answers': 20803839,
   u'total_badges': 21119711,
   u'total_comments': 64497750,
   u'total_questions': 13119616,
   u'total_unanswered': 3678164,
   u'total_users': 6566255,
   u'total_votes': 94073432}],
 u'quota_max': 300,
 u'quota_remaining': 298}

## Counts based on individual tags

In [9]:
# SO api is NOT case-sensitive
package_list = ['dplyr', 'digest', 'ggplot', 'ggplot2', 'rcpp', 'magrittr', 'caret']
package_list

['dplyr', 'digest', 'ggplot', 'ggplot2', 'rcpp', 'magrittr', 'caret']

In [10]:
def get_tag_counts(tag_list):
    """"Given tag list, return tag counts as json"""
    
    formatted_tags = ';'.join(tag_list)
    url = "https://api.stackexchange.com/2.2/tags/" + formatted_tags + "/info"
  
    try:
        r = requests.get(url, params=GLOBAL_PARAMS)
        if r.json()['has_more']:
            print "WARNING: Request has more data than is not shown here."
        return r.json()['items']
    except:
        logging.warning("Error in response.")

In [11]:
tag_counts = get_tag_counts(package_list)

In [12]:
pd.DataFrame(tag_counts)

Unnamed: 0,count,has_synonyms,is_moderator_only,is_required,name
0,15002,True,False,False,ggplot2
1,4166,False,False,False,dplyr
2,1271,False,False,False,rcpp
3,559,False,False,False,caret
4,385,False,False,False,digest
5,121,False,False,False,magrittr


In [None]:
tag_url = "https://api.stackexchange.com/2.2/tags/"


for i in range(1):
    two_tags = str('r;' + package_list[i])
    url = tag_url + two_tags + "/info?site=stackoverflow"
  
    r = requests.get(url)
    print r.json()
    print r.json()['items']

## Counts based on presence in question body

In [None]:
def get_body_count(body_string, tag=None):
    """Given ONE string, return number of SO questions containing it
  
    possibly tagged with TAG
    uses filter=total to return counts only
    """
    
    baseurl = 'https://api.stackexchange.com/2.2/search/advanced'
    
    params = {
        'q': body_string,
        'filter': 'total',
    }
    
    params.update(GLOBAL_PARAMS)
    if tag:
        params.update({'tagged': tag})

    r = requests.get(baseurl, params=params)
    return r.json()

In [None]:
question_body_counts = { item: get_body_count(item) for item in package_list}

In [None]:
question_body_counts

## Counts based on presence in question body, with [r] tag

In [None]:
tag = 'r'
question_body_counts = { item: get_body_count(item, tag=tag) for item in package_list}

In [None]:
question_body_counts

# Counts based on [r] TAG + question BODY

In [None]:
def get_tag_counts_with_r_tag(tag_list):
    """"Given tag list, return tag counts which appear with the tag 'r'
    
    Returns json"""
    
    
    formatted_tags = ';'.join(tag_list)
    tag_url = "https://api.stackexchange.com/2.2/tags/"
    url = tag_url + formatted_tags + "/info?site=stackoverflow"
  
    r = requests.get(url)
    if r.json()['has_more']:
        logging.warning()"WARNING: Request has more data than is not shown here.")
    return r.json()['items']

# Visualizations

In [None]:
df_body = pd.DataFrame.from_dict(question_body_counts, orient='index')
df_body.reset_index(inplace=True)
df_body.columns = [['name', 'count']]
df_body['type'] = 'in_question_body'
df_body.sort_values(by='count', inplace=True, ascending=False)
df_body.head()

In [None]:
df_tags = pd.DataFrame(tag_counts)[['name', 'count']]
df_tags['type'] = 'tagged'
df_tags.head()

In [None]:
df = pd.concat([df_tags, df_body])
df.sort_values(by = ['type', 'count'], inplace=True, ascending=False)
df

In [None]:
from bokeh.io import output_notebook, show
from bokeh.charts import Bar
from bokeh.charts.attributes import cat


output_notebook()

In [None]:
p = Bar(df, label=cat('name', sort=False), values='count', group='type',
        title="PKGS FTW!", legend='top_right')

show(p)