-
Notifications
You must be signed in to change notification settings - Fork 29
/
app.py
236 lines (189 loc) · 6.15 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
from __future__ import absolute_import
import datetime
import logging
import os
import re
import sys
from collections import deque
from flask import Flask, json
from flask_cors import CORS # debdeps: python3-flask-cors
# debdeps: python3-geoip2
import geoip2.database # type: ignore
# python3-flask-cors has unnecessary dependencies :-/
from ooniapi.rate_limit_quotas import FlaskLimiter
try:
from systemd.journal import JournalHandler # debdeps: python3-systemd
enable_journal = True
except ImportError: # pragma: no cover
enable_journal = False
from flasgger import Swagger
from decimal import Decimal
from ooniapi.database import init_clickhouse_db
APP_DIR = os.path.dirname(__file__)
class FlaskJSONEncoder(json.JSONEncoder):
# Special JSON encoder that handles dates
def default(self, o):
if isinstance(o, datetime.datetime):
if o.tzinfo:
# eg: '2015-09-25T23:14:42.588601+00:00'
return o.isoformat("T")
else:
# No timezone present - assume UTC.
# eg: '2015-09-25T23:14:42.588601Z'
return o.isoformat("T") + "Z"
if isinstance(o, datetime.date):
return o.isoformat()
if isinstance(o, Decimal):
return float(o)
if isinstance(o, set):
return list(o)
return json.JSONEncoder.default(self, o)
def validate_conf(app, conffile):
"""Fail early if the app configuration looks incorrect"""
# TODO: fallback to reasonable defaults as much as possible instead
conf_keys = (
"ACCOUNT_ID_HASHING_KEY",
"BASE_URL",
"COLLECTORS",
"COLLECTOR_ID",
"DATABASE_STATEMENT_TIMEOUT",
"CLICKHOUSE_URL",
"GITHUB_ORIGIN_REPO",
"GITHUB_PUSH_REPO",
"GITHUB_TOKEN",
"GITHUB_USER",
"GITHUB_WORKDIR",
"JWT_ENCRYPTION_KEY",
"MAIL_PASSWORD",
"MAIL_PORT",
"MAIL_SERVER",
"MAIL_SOURCE_ADDRESS",
"MAIL_USERNAME",
"MAIL_USE_SSL",
"MSMT_SPOOL_DIR",
"PSIPHON_CONFFILE",
"S3_ACCESS_KEY_ID",
"S3_BUCKET_NAME",
"S3_ENDPOINT_URL",
"S3_SECRET_ACCESS_KEY",
"S3_SESSION_TOKEN",
"TOR_TARGETS_CONFFILE",
)
for k in conf_keys:
if k not in app.config:
app.logger.error(f"Missing configuration key {k} in {conffile}")
# exit with 4 to terminate gunicorn
sys.exit(4)
def parse_cors_origins(app):
out = []
for i in app.config["CORS_URLS"]:
if i.startswith("^"):
i = re.compile(i)
out.append(i)
app.config["CORS_URLS"] = out
def setup_collectors_ring(config):
"""Create round-robin ring of collectors excluding localhost"""
lh = config.get("HOSTNAME")
if not lh:
import socket
lh = socket.getfqdn()
colls = config["COLLECTORS"]
c = deque(sorted(set(colls)))
if lh in c:
# rotated this way to distribute load evenly when n > 2 collectors
# are in use
while c[0] != lh:
c.rotate()
c.popleft()
config["OTHER_COLLECTORS"] = c
else:
print(f"{lh} not found in collectors {colls}")
config["OTHER_COLLECTORS"] = c
print(f"Other collectors: {c}")
def setup_logging(log):
if enable_journal:
root_logger = log.root
h = JournalHandler(SYSLOG_IDENTIFIER="ooni-api")
formatter = logging.Formatter("%(levelname)s %(message)s")
h.setFormatter(formatter)
root_logger.addHandler(h)
root_logger.setLevel(logging.DEBUG)
else:
log.setLevel(logging.DEBUG)
logging.basicConfig(format="%(message)s")
def load_geoip_db(log, app):
log.debug("Loading GeoIP DBs")
ccfn = app.config.get("GEOIP_CC_DB")
asnfn = app.config.get("GEOIP_ASN_DB")
try:
app.geoip_cc_reader = geoip2.database.Reader(ccfn)
app.geoip_asn_reader = geoip2.database.Reader(asnfn)
except Exception:
log.error("Failed to load geoip DBs at", ccfn, asnfn, exc_info=True)
def init_app(app, testmode=False):
# Load configurations defaults from ooniapi/config.py
# and then from the file pointed by CONF
# (defaults to /etc/ooni/api.conf)
log = logging.getLogger("ooni-api")
conffile = os.getenv("CONF", "/etc/ooni/api.conf")
setup_logging(log)
log.info(f"Starting OONI API. Loading conf from {conffile}")
app.config.from_object("ooniapi.config")
app.config.from_pyfile(conffile)
validate_conf(app, conffile)
# parse_cors_origins(app)
setup_collectors_ring(app.config)
log.info("Configuration loaded")
load_geoip_db(log, app)
CORS(app)
def create_app(*args, testmode=False, **kw):
from ooniapi import views
app = Flask(__name__)
app.json_encoder = FlaskJSONEncoder
# Order matters
init_app(app, testmode=testmode)
init_clickhouse_db(app)
# Setup rate limiting
limits = dict(
ipaddr_per_month=60000,
token_per_month=6000,
ipaddr_per_week=20000,
token_per_week=2000,
ipaddr_per_day=4000,
token_per_day=500,
)
# Whitelist Prometheus and AMS Explorer
# TODO: move addrs to an external config file /etc/ooniapi.conf ?
whitelist = ["37.218.245.43", "37.218.242.149"]
unmetered_pages = ["/", "/health", "/report*"]
app.limiter = FlaskLimiter(
limits=limits,
app=app,
whitelisted_ipaddrs=whitelist,
unmetered_pages=unmetered_pages,
)
Swagger(app, parse=True)
# FIXME
views.register(app)
@app.route("/health")
def health():
"""Health check
---
responses:
'200':
description: Status
"""
return "UP"
# TODO: ping database?
# option httpchk GET /check
# http-check expect string success
return app
if __name__ == "__main__":
# Fetch the swagger API spec locally, save it and exit.
app = create_app()
with app.test_client() as client:
j = client.get("/apispec_1.json").json
fn = "apispec.json"
with open(fn, "w") as f:
json.dump(j, f, indent=4, sort_keys=True)
print(f"{fn} written")