Skip to content

Commit

Permalink
Add full auth workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
hampelm committed Feb 23, 2016
1 parent a5ea858 commit 206947f
Show file tree
Hide file tree
Showing 12 changed files with 368 additions and 154 deletions.
24 changes: 18 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,19 +49,31 @@ the `.env` file are copied to the server on deploy.

### Add Google Analytics credentials

Using analytics requires a client ID and secret in a `client_secrets.json` file in the project root. Follow the [Google Analytics python setup instructions](https://developers.google.com/analytics/devguides/reporting/core/v3/quickstart/installed-py#enable) to get that file.
The first time you run the project locally, a browser window will open and you
will be asked to authorize you account. This will create an `analytics.dat` file
in your project directory.
The first time you run Carebot, you'll need to authorize the app with your
Google account. To do that, run

### Run the project
```
fab app
```

And follow the on-screen instructions. If you have already set up an app using
the NPR Apps template, you may not have to do this.

To run the bot:
## Using Carebot

### Run the bot

```
python carebot.py
```

### Get new stories from the story spreadsheet

```
fab load_new_stories
```


## Deploying the project

To deploy carebot to production:
Expand Down
28 changes: 28 additions & 0 deletions app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/usr/bin/env python

import app_config
import oauth
from flask import Flask, make_response, render_template
from werkzeug.debug import DebuggedApplication

app = Flask(__name__)
app.debug = app_config.DEBUG

@app.route('/')
@oauth.oauth_required
def index():
return make_response("You're good to go.")

app.register_blueprint(oauth.oauth)


# Enable Werkzeug debug pages
if app_config.DEBUG:
wsgi_app = DebuggedApplication(app, evalex=False)
else:
wsgi_app = app


# Catch attempts to run the app directly
if __name__ == '__main__':
print 'This command has been removed! Please run "fab app" instead!'
32 changes: 31 additions & 1 deletion fabfile.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,29 @@
import app_config
import copy
from fabric.api import *
from fabric.state import env
from jinja2 import Template

import app_config
from oauth import get_document
from util.models import Story
from scrapers.spreadsheet import SpreadsheetScraper
from scrapers.analytics import GoogleAnalyticsScraper

env.user = app_config.SERVER_USER
env.hosts = app_config.SERVERS
env.slug = app_config.PROJECT_SLUG


"""
Base configuration
"""
env.user = app_config.SERVER_USER
env.forward_agent = True

env.hosts = []
env.settings = None

"""
Configuration
"""
Expand Down Expand Up @@ -41,6 +52,21 @@ def _get_installed_service_name(service):
"""
return '%s.%s' % (app_config.PROJECT_FILENAME, service)


"""
Running the app
Probably only neded the first time, to set up oauth creds
"""
@task
def app(port='8000'):
"""
Serve app.py.
"""
if env.settings:
local("DEPLOYMENT_TARGET=%s bash -c 'gunicorn -b 0.0.0.0:%s --timeout 3600 --debug --reload app:wsgi_app'" % (env.settings, port))
else:
local('gunicorn -b 0.0.0.0:%s --timeout 3600 --debug --reload app:wsgi_app' % port)

"""
Data tasks
"""
Expand All @@ -51,7 +77,11 @@ def load_new_stories():
stories = scraper.scrape_spreadsheet(app_config.STORIES_PATH)
scraper.write(stories)


@task
def get_analytics():
scraper = GoogleAnalyticsScraper()
stats = scraper.scrape_google_analytics()
print(stats)

"""
Deploy tasks
Expand Down
6 changes: 3 additions & 3 deletions oauth.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def authenticate():

if not result.error:
save_credentials(result.user.credentials)
get_document(app_config.COPY_GOOGLE_DOC_KEY, app_config.COPY_PATH)
get_document(app_config.STORIES_GOOGLE_DOC_KEY, app_config.STORIES_PATH)

return render_template('oauth/authenticate.html', **context)

Expand All @@ -63,11 +63,11 @@ def oauth_required(f):
def decorated_function(*args, **kwargs):
from flask import request
credentials = get_credentials()
if app_config.COPY_GOOGLE_DOC_KEY and (not credentials or not credentials.valid):
if app_config.STORIES_GOOGLE_DOC_KEY and (not credentials or not credentials.valid):
return redirect(url_for('_oauth.oauth_alert'))
else:
if request.args.get('refresh'):
get_document(app_config.COPY_GOOGLE_DOC_KEY, app_config.COPY_PATH)
get_document(app_config.STORIES_GOOGLE_DOC_KEY, app_config.STORIES_PATH)
return f(*args, **kwargs)
return decorated_function

Expand Down
9 changes: 5 additions & 4 deletions render_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,10 +176,11 @@ def make_context(asset_depth=0):
"""
context = flatten_app_config()

try:
context['COPY'] = copytext.Copy(app_config.COPY_PATH)
except copytext.CopyException:
pass
# TODO: Re-add copy spreadsheet, if needed
# try:
# context['COPY'] = copytext.Copy(app_config.COPY_PATH)
# except copytext.CopyException:
# pass

context['JS'] = JavascriptIncluder(asset_depth=asset_depth)
context['CSS'] = CSSIncluder(asset_depth=asset_depth)
Expand Down
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@ copytext==0.1.8
Flask==0.9
google-api-python-client==1.4.2
gspread==0.3.0
gunicorn==19.1.1
Jinja2==2.7.3
peewee==2.8.0
slackbot==0.3.0
slacker==0.9.0
slimit==0.7.4
smartypants==1.8.6
Werkzeug==0.8.3

183 changes: 43 additions & 140 deletions scrapers/analytics.py
Original file line number Diff line number Diff line change
@@ -1,155 +1,58 @@
# Query constructor: https://ga-dev-tools.appspot.com/query-explorer/
#
import app_config

"""
Before you begin, you must sigup for a new project in the Google APIs console:
https://code.google.com/apis/console
from datetime import datetime
from oauth import get_credentials

Then register the project to use OAuth2.0 for installed applications.
import logging

Finally you will need to add the client id, client secret, and redirect URL
into the client_secrets.json file that is in the same directory as this sample.
logging.basicConfig()
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

Sample Usage:
class GoogleAnalyticsScraper:
def __init__(self):
self.run_time = datetime.utcnow()

$ python analytics.py
"""
from __future__ import print_function
def scrape_google_analytics(self):
rows = []

import argparse
import sys
api_url = 'https://www.googleapis.com/analytics/v3/data/ga'
credentials = get_credentials()

from googleapiclient.errors import HttpError
from googleapiclient import sample_tools
from oauth2client.client import AccessTokenRefreshError
metrics = ','.join(['ga:{0}'.format(metric) for metric in app_config.GA_METRICS])
dimensions = ','.join(['ga:{0}'.format(dimensions) for dimensions in app_config.GA_DIMENSIONS])

class Analytics:
def __init__(self):
# Authenticate and construct service.
argv = []
self.service, flags = sample_tools.init(
argv, 'analytics', 'v3', __doc__, __file__,
scope='https://www.googleapis.com/auth/analytics.readonly')
params = {
'ids': 'ga:{0}'.format(app_config.GA_ORGANIZATION_ID),
'end-date': 'yesterday',
'start-date': '30daysAgo', # start_date.strftime('%Y-%m-%d'),
'metrics': 'ga:sessions,ga:pageviews',
'dimensions': 'ga:pagePath,ga:source,ga:deviceCategory',
'max-results': app_config.GA_RESULT_SIZE,
'samplingLevel': app_config.GA_SAMPLING_LEVEL,
'start-index': 1,
}

def autodiscover_stories(self,):
self.results = self.service.data().ga().get(
ids='ga:100688391',
start_date='5daysAgo',
end_date='today',
metrics='ga:totalEvents',
dimensions='ga:eventCategory',
filters='ga:eventAction==on-screen',
start_index='1',
max_results='500').execute()
while True:
resp = app_config.authomatic.access(credentials, api_url, params=params)
data = resp.data

if self.results.get('rows', []):
data = []
for row in self.results.get('rows'):
print(row[0])
logger.info('Processing rows {0} - {1}'.format(params['start-index'], params['start-index'] + app_config.GA_RESULT_SIZE - 1))

def donation_data(self, slug):
self.results = self.service.data().ga().get(
ids='ga:100688391',
start_date='90daysAgo',
end_date='today',
metrics='ga:totalEvents',
# dimensions='ga:date',
sort='-ga:totalEvents',
filters='ga:eventCategory==%s;ga:eventLabel==donate' % slug,
start_index='1',
max_results='25').execute()
if not data.get('rows'):
logger.info('No rows found, done.')
break

return self.results
# ga:eventCategory==carebot
# ga:eventLabel==10m
# dimensions: eventCategory, eventLabel, eventAction
for row in resp.data['rows']:
analytics_row = GoogleAnalyticsRow(row, app_config.GA_METRICS, app_config.GA_DIMENSIONS, data)
rows.append(analytics_row.serialize())

def get_linger_rate(self, slug):
self.results = self.service.data().ga().get(
ids='ga:100688391', #'ga:' + profile_id,
start_date='90daysAgo',
end_date='today',
metrics='ga:totalEvents',
dimensions='ga:eventLabel',
sort='-ga:totalEvents',
filters='ga:eventCategory==%s;ga:eventAction==on-screen;ga:eventLabel==10s,ga:eventLabel==20s,ga:eventLabel==30s,ga:eventLabel==40s,ga:eventLabel==50s,ga:eventLabel==1m,ga:eventLabel==2m,ga:eventLabel==3m,ga:eventLabel==4m,ga:eventLabel==5m,ga:eventLabel==10m' % slug,
start_index='1',
max_results='25').execute()
params['start-index'] += app_config.GA_RESULT_SIZE

if self.results.get('rows', []):
data = []
#import ipdb; ipdb.set_trace();
return rows

for row in self.results.get('rows'):
time = row[0]
seconds = 0
if 'm' in time:
time = time[:-1] # remove 'm' from the end
seconds = int(time) * 60
else:
time = time[:-1] # remove 's'
seconds = int(time)

row[0] = seconds
row[1] = int(row[1])
data.append(row)

# Calculate the number of visitors in each bucket
for index, row in enumerate(data):
if index == len(data) - 1:
continue

next_row = data[index + 1]
row[1] = row[1] - next_row[-1]

# Exclude everybody in the last bucket
# (they've been lingering for way too long -- 10+ minutes)
data = data [:-1]

# Get the average number of seconds
total_seconds = 0
total_people = 0
for row in data:
total_seconds = total_seconds + (row[0] * row[1])
total_people = total_people + row[1]

average_seconds = total_seconds/total_people
minutes = average_seconds / 60
seconds = average_seconds % 60
return (total_people, minutes, seconds)

def print_results(self):
print()
print('Profile Name: %s' % self.results.get('profileInfo').get('profileName'))
print()

# Print header.
output = []
for header in self.results.get('columnHeaders'):
output.append('%30s' % header.get('name'))
print(''.join(output))

# Print data table.
if self.results.get('rows', []):
for row in self.results.get('rows'):
output = []
for cell in row:
output.append('%30s' % cell)
print(''.join(output))

else:
print('No Rows Found')

# Testing -- move to a separate folder
# slugs: elections16
a = Analytics()
a.autodiscover_stories()

# data = a.get_linger_rate('space-time-stepper-20160208')
# a.print_results()

# if data.get('rows', []):
# row = data.get('rows')[0]
# cell = row[0]
# print(cell)

# a.print_results()
# def write(self, db, rows):
# table = db['google_analytics']
# table.delete()
# table.insert_many(rows)
Loading

0 comments on commit 206947f

Please sign in to comment.