This repository has been archived by the owner on Oct 13, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 26
/
dblib.py
539 lines (418 loc) · 19.6 KB
/
dblib.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
from future.utils import as_native_str
import time
import os
import threading
import traceback
from doozerlib import constants
import functools
from .model import Missing
import datetime
try:
import mysql.connector as mysql_connector
except:
# Allow this module to be missing
pass
class DBLibException(Exception):
"""
Exception class to record exceptions raised within the dblib module.
"""
def __init__(self, message):
super(self.__class__, self).__init__(message)
self.message = message
@as_native_str
def __str__(self):
return self.message
def try_connecting(func):
"""
Decorator function, which when wrapped around tries to reconnect to mysql in case the connection
doesn't exist or is broken. Attempts 5 tries to reconnect before giving up.
Tries reconnecting in 25, 75, 150, 250, 375 seconds before giving up.
"""
@functools.wraps(func)
def wrapper(*args, **kwargs):
db = args[0]
if db.mysql_db_env_var_setup:
if not db.connection or not db.connection.is_connected():
count = 1
while count <= 5:
try:
db.connection = mysql_connector.connect(
host=os.getenv(constants.DB_HOST, constants.default_db_params[constants.DB_HOST]),
user=os.getenv(constants.DB_USER),
password=os.getenv(constants.DB_PWD_NAME),
database=os.getenv(constants.DB_NAME, constants.default_db_params[constants.DB_NAME]))
break
except Exception as e:
traceback.print_exc()
db.runtime.logger.error("Unable to connect to mysql database. Exception {}.".format(e))
time.sleep(25 * count)
count += 1
func(*args, **kwargs)
else:
db.connection = None
return wrapper
class DB(object):
def __init__(self, runtime, environment, dry_run=False):
"""
:param runtime: The runtime
:param environment: int / stage / prod for database environment. None will not write any data.
"""
self.runtime = runtime
self.environment = environment
self.dry_run = dry_run
# db configuration parameters
self.host = None
self.port = None
self.db = None
self.pwd = None
self.db_user = None
# mysql config for build records
self.mysql_db_env_var_setup = True
self.mysql_db_env_var_setup = self.check_missing_db_env_var() and self.check_database_exists()
self.lock = threading.Lock()
self.connection = None
self._table_column_cache = {}
def check_missing_db_env_var(self):
"""
The function checks if the expected environment variables are set. If not, returns False.
If all required env variables are present, the object instance's configuration properties are set
and returns True.
"""
if not (constants.DB_USER in os.environ and constants.DB_PWD_NAME in os.environ):
self.runtime.logger.info("Environment variables required for db operation missing. Doozer will be running"
"in no DB use mode.")
return False
import mysql.connector as mysql_connector
# if required configuration parameters are found, set instance attributes to configuration values
self.host = os.getenv(constants.DB_HOST, constants.default_db_params[constants.DB_HOST])
self.port = os.getenv(constants.DB_PORT, constants.default_db_params[constants.DB_PORT])
self.db = os.getenv(constants.DB_NAME, constants.default_db_params[constants.DB_NAME])
self.pwd = os.getenv(constants.DB_PWD_NAME)
self.db_user = os.getenv(constants.DB_USER)
self.runtime.logger.info("Found all environment variables required for db setup.")
return True
def select(self, expr, limit=100):
"""
:param expr [string] the SQL command want to query from DB
:param limit [number] limit the length of the return value
:return [dict] return a list of results from SQL query output, return [] if get nothing or get error
This funtion pass the native SQL query command to DB, return the query result in dict format.
If query failed or get nothing then return empty dict.
By default limit the result length to 100.
"""
if not self.mysql_db_env_var_setup:
self.runtime.logger.error('No queries can be made without DB env vars setup!')
raise RuntimeError
exeresult = []
db_connection = mysql_connector.connect(host=self.host,
user=self.db_user,
password=self.pwd,
database=self.db)
cursor = db_connection.cursor()
try:
cursor.execute("{} LIMIT {}".format(expr, limit))
exeresult = cursor.fetchall()
self.runtime.logger.info(exeresult)
except Exception as e:
self.runtime.logger.error("Error executing command in database. Exception is [{}].".format(e))
finally:
cursor.close()
return exeresult
def check_database_exists(self):
"""
This method checks if the configured database is present.
If present returns True.
If not, it tries to create the database. On successful creation, returns True
otherwise returns false.
When returns false, Doozer will run in no db mode.
"""
db_check_connection = mysql_connector.connect(host=self.host,
user=self.db_user,
password=self.pwd)
cursor = db_check_connection.cursor()
cursor.execute("select count(*) from INFORMATION_SCHEMA.SCHEMATA where SCHEMA_NAME='{}'"
.format(self.db))
exception_raised = False
if cursor.fetchone()[0] != 0:
self.runtime.logger.info("Configured database [{}] present on MySQL host.".
format(self.db))
return True
else:
create_db_cursor = db_check_connection.cursor()
try:
create_db_cursor.execute("create database {}".format(self.db))
self.runtime.logger.info("Successfully created configured database [{}].".format(self.db))
except Exception as e:
self.runtime.logger.error("Error creating database. Exception is [{}].".format(e))
exception_raised = True
finally:
create_db_cursor.close()
return not exception_raised
def check_table_exist(self, table_name):
"""
:param table_name
This method returns True, if the table to which the record is attempted to be created in is true. Otherwise
returns false.
"""
if table_name in self._table_column_cache:
return True
cursor = self.connection.cursor()
cursor.execute("""
SELECT COUNT(*)
FROM information_schema.tables
WHERE table_schema ='{0}' and table_name = '{1}'
""".format(self.db, table_name.replace('\'', '\'\'')))
if cursor.fetchone()[0] != 0:
cursor.close()
self._table_column_cache[table_name] = {}
self.runtime.logger.info("Table {} found in database {}.".format(table_name, self.db))
return True
cursor.close()
return False
@staticmethod
def identify_column_type(value):
"""
:param value
This is a helper method, which returns the MYSQL type to the provided value with corresponding python
type.
"""
if isinstance(value, int):
return "BIGINT", value
elif isinstance(value, float):
return "FLOAT", value
elif isinstance(value, datetime.datetime):
return "DATETIME", value
else:
return "VARCHAR(1000)", value
def get_missing_columns(self, attr_payload, table_name):
"""
This method fetches all the column in the given table, and compares it with
the payload to check if there are any additional keys in the payload.
If there are new columns, it returns just the new non-existing columns.
"""
for attr_column in attr_payload:
if not (attr_column in self._table_column_cache[table_name]):
break
else:
# if the loop runs without breaking all the columns in payload are already present in
# the table hence no new columns are required to be added
self.runtime.logger.info("Found all the columns in cache for table [{}].".format(table_name))
return []
self.runtime.logger.info("Received new columns in the payload or table [{}] column cache empty."
.format(table_name))
payload_columns = set()
existing_columns = set()
return_columns = []
for column in attr_payload:
payload_columns.add(column)
if not self._table_column_cache[table_name]:
# if the table's column cache is not updates, load the cache and return
# the table columns
self.runtime.logger.info("Updating table [{}] column cache.".format(table_name))
cursor = self.connection.cursor()
cursor.execute(f"show columns from {table_name}")
for data in cursor:
existing_columns.add(data[0])
self._table_column_cache[table_name][data[0]] = True
else:
# otherwise if table column cache found, return cache value
self.runtime.logger.info("Found table [{}] column cache.".format(table_name))
existing_columns = set(self._table_column_cache[table_name])
# get the difference column, non-existing table columns which are present in payload
missing_columns = list(payload_columns.difference(existing_columns))
for index, column in enumerate(missing_columns):
self.runtime.logger.info("Missing column {}. {}".format(index + 1, column))
return_columns.append((column, self.identify_column_type(attr_payload[column])))
return return_columns
def handle_missing_table(self, table_name):
"""
:param table_name
This method creates table, if it doesn't exist.
"""
table_exist_status = self.check_table_exist(table_name)
if not table_exist_status:
self.runtime.logger.info("Table [{}] not found in database [{}]."
.format(table_name, self.db))
cursor = self.connection.cursor()
cursor.execute(f'create table {table_name}(log_{table_name}_id bigint auto_increment primary key)')
cursor.close()
self._table_column_cache[table_name] = dict()
self._table_column_cache[table_name]["log_" + table_name + "_id"] = True
self.runtime.logger.info("Successfully create table [{}] in database [{}]."
.format(table_name, self.db))
def handle_missing_columns(self, payload, table_name):
"""
:param payload
:param table_name
This method creates missing columns, which are present in payload but not in table.
"""
missing_columns = self.get_missing_columns(payload, table_name)
if missing_columns:
cursor = self.connection.cursor()
for column in missing_columns:
missing_column = column[0]
column_type = column[1][0]
try:
cursor.execute(f"alter table {table_name} add column `{missing_column}` {column_type}")
self._table_column_cache[table_name][missing_column] = True
self.runtime.logger.info("Added new column [{}] of identified type [{}] to table [{}] "
"of database [{}].".format(missing_column, column_type,
table_name, self.db))
except Exception as e:
self.runtime.logger.warning("Could not add new column [{}] of identified type [{}] to table [{}] "
"of database [{}]. Error: {}".format(missing_column, column_type,
table_name, self.db, e))
del payload[missing_column] # delete the column from the payload
cursor.close()
def insert_payload(self, payload, table_name, dry_run=False):
"""
:param payload
:param table_name
:param dry_run
This method creates table entry for the given payload. If dry_run is true, the insert string is
just printed on output.
The method returns 0, on successful creation of record entry in the table else returns 1.
"""
try:
columns, values = [], []
for column in payload:
columns.append(f'`{column}`')
values.append(f'"{payload[column]}"')
columns = ",".join(columns)
values = ",".join(values)
insert_string = f"insert into {table_name}({columns}) values({values})"
if dry_run:
print(insert_string)
return 0
cursor = self.connection.cursor()
cursor.execute(insert_string)
self.connection.commit()
cursor.close()
return 0
except Exception as e:
self.runtime.logger.error("Something went wrong creating payload entry in the database. Payload is {}. Exception is {}.".format(payload, e), exc_info=True)
return 1
@try_connecting
def create_payload_entry(self, payload, table_name, record_dry_run=False):
"""
:param payload
:param table_name
:param record_dry_run
This method is a wrapper method, which calls the method to check missing table, missing column and then
the method to insert record into table.
"""
# this if condition only allows request to be written to db where dry_run is set
# false both at DB initiation in runtime and creating Record in the context
# manager. Any of the dry_run flags if set to true will not allow
# writing record to the database.
insert_status = None
if self.mysql_db_env_var_setup and not self.dry_run and not record_dry_run:
if self.connection and self.connection.is_connected():
self.handle_missing_table(table_name)
self.handle_missing_columns(payload, table_name)
insert_status = self.insert_payload(payload, table_name)
else:
if not self.mysql_db_env_var_setup:
self.runtime.logger.error("Doozer running with no DB use mode. Can't create build entry.")
insert_status = 1
else:
self.runtime.logger.info("Doozer either running with no DB mode or this was a dry run request.")
insert_status = self.insert_payload(payload, table_name, True)
if insert_status:
self.runtime.logger.error("Something went wrong creating payload entry.")
else:
self.runtime.logger.info("Payload entry successfully created in database.")
@staticmethod
def rename_to_valid_column(column_name):
"""
This is a helper method to transform column name having '.' and '-' and replaces these characters with '_'.
"""
column_name = column_name.replace(".", "_")
column_name = column_name.replace("-", "_")
return column_name
def record(self, operation, metadata=None, dry_run=False):
"""
:param operation: The type of operation being tracked, table name to which the record is sent to
:param metadata: Distgit metadata if any
:param dry_run: If true, this record will not be written to the datastore. e.g. scratch builds.
:return:
"""
extras = {}
if metadata:
extras['dg.name'] = metadata.name
extras['dg.namespace'] = metadata.namespace
extras['dg.qualified_key'] = metadata.qualified_key
extras['dg.qualified_name'] = metadata.qualified_name
return Record(self, "log_" + str(operation), extras, dry_run)
class Record(object):
"""
Context manager to handle records being added to database
Useful reference: https://preshing.com/20110920/the-python-with-statement-by-example/
"""
_tl = threading.local()
def __init__(self, db, table, extras={}, dry_run=False):
self.previous_record = None
self.table = table
self.db = db
self.runtime = db.runtime
self.dry_run = dry_run
# A set of attributes for the record
self.attrs = {
'time.unix': int(round(time.time() * 1000)), # utc milliseconds since epoch
'time.iso': self.runtime.timestamp(), # for humans
'runtime.uuid': self.runtime.uuid,
'runtime.user': self.runtime.user or '',
'group': self.runtime.group_config['name'],
}
for jenkins_var in ['BUILD_NUMBER', 'BUILD_URL', 'JOB_NAME', 'NODE_NAME', 'JOB_URL']:
self.attrs[f'jenkins.{jenkins_var.lower()}'] = os.getenv(jenkins_var, '')
self.attrs.update(extras)
def __enter__(self):
if hasattr(self._tl, 'record'):
self.previous_record = self._tl.record
self._tl.record = self
return self
def __exit__(self, *args):
try:
with self.db.runtime.get_named_semaphore('dblib::mysql'):
attr_payload = {}
for k, v in self.attrs.items():
if v is None or v is Missing or v == '':
continue
else:
attr_payload[self.db.rename_to_valid_column(k)] = v
self.db.create_payload_entry(attr_payload, self.table, self.dry_run)
except Exception as e:
self.runtime.logger.error(f"Payload insert into database failed: {e}")
self._tl.record = self.previous_record
@classmethod
def set(cls, name, value):
"""
Sets an attribute name=value for the most recent Record context
:param name: The name of the attribute to set (limit 1024 chars)
:param value: The value of the attribute to set (limit 1024 chars)
:return:
"""
if not hasattr(cls._tl, "record"):
# Caller is not within runtime.record(...):
raise IOError(f'No database record context has been established. Unable to set {name}={value}.')
cls._tl.record.attrs[name] = value
@classmethod
def update(cls, a_dict):
"""
Sets attributes for the most recent Record context
:param a_dict: key / values to set
:return:
"""
if not hasattr(cls._tl, "record"):
# Caller is not within runtime.record(...):
raise IOError(f'No database record context has been established. Unable to set {a_dict}.')
cls._tl.record.attrs.update(a_dict)
@classmethod
def current(cls):
"""
:return: Returns the current Record object or None
"""
if not hasattr(cls._tl, "record"):
return None
return cls._tl.record