This repository has been archived by the owner on Oct 13, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 26
/
metadata.py
873 lines (739 loc) · 46.4 KB
/
metadata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
import datetime
import io
import pathlib
import re
import sys
import time
import urllib.parse
from collections import OrderedDict
from enum import Enum
from typing import Any, Dict, List, NamedTuple, Optional, Tuple, Union
from defusedxml import ElementTree
import dateutil.parser
import requests
import yaml
from dockerfile_parse import DockerfileParser
from tenacity import (retry, retry_if_exception_type, stop_after_attempt,
wait_fixed)
import doozerlib
from doozerlib import exectools, logutil
from doozerlib.assembly import assembly_basis_event, assembly_metadata_config
from doozerlib.brew import BuildStates
from doozerlib.distgit import DistGitRepo, ImageDistGitRepo, RPMDistGitRepo
from doozerlib.model import Missing, Model
from doozerlib.pushd import Dir
from doozerlib.util import (isolate_el_version_in_brew_tag,
isolate_git_commit_in_release)
class CgitAtomFeedEntry(NamedTuple):
title: str
updated: datetime.datetime
id: str
content: str
#
# These are used as labels to index selection of a subclass.
#
DISTGIT_TYPES = {
'image': ImageDistGitRepo,
'rpm': RPMDistGitRepo
}
CONFIG_MODES = [
'enabled', # business as usual
'disabled', # manually disabled from automatically building
'wip', # Work in Progress, do not build
]
CONFIG_MODE_DEFAULT = CONFIG_MODES[0]
class RebuildHintCode(Enum):
NO_COMPONENT = (True, 0)
NO_LATEST_BUILD = (True, 1)
DISTGIT_ONLY_COMMIT_OLDER = (False, 2)
DISTGIT_ONLY_COMMIT_NEWER = (True, 3)
DELAYING_NEXT_ATTEMPT = (False, 4)
LAST_BUILD_FAILED = (True, 5)
NEW_UPSTREAM_COMMIT = (True, 6)
UPSTREAM_COMMIT_MISMATCH = (True, 7)
BUILD_IS_UP_TO_DATE = (False, 8)
ANCESTOR_CHANGING = (True, 9)
CONFIG_CHANGE = (True, 10)
BUILDER_CHANGING = (True, 11)
BUILD_ROOT_CHANGING = (True, 12)
PACKAGE_CHANGE = (True, 13)
ARCHES_CHANGE = (True, 14)
DEPENDENCY_NEWER = (True, 15)
class RebuildHint(NamedTuple):
code: RebuildHintCode
reason: str
@property
def rebuild(self):
return self.code.value[0]
class Metadata(object):
def __init__(self, meta_type: str, runtime: "doozerlib.Runtime", data_obj: Dict, commitish: Optional[str] = None, prevent_cloning: Optional[bool] = False):
"""
:param meta_type - a string. Index to the sub-class <'rpm'|'image'>.
:param runtime - a Runtime object.
:param data_obj - a dictionary for the metadata configuration
:param commitish: If not None, build from the specified upstream commit-ish instead of the branch tip.
:param prevent_cloning: Throw an exception if upstream/downstream cloning operations are attempted.
"""
self.meta_type = meta_type
self.runtime = runtime
self.data_obj = data_obj
self.config_filename = data_obj.filename
self.full_config_path = data_obj.path
self.commitish = commitish
# For efficiency, we want to prevent some verbs from introducing changes that
# trigger distgit or upstream cloning. Setting this flag to True will cause
# an exception if it is attempted.
self.prevent_cloning = prevent_cloning
# URL and branch of public upstream source are set later by Runtime.resolve_source()
self.public_upstream_url = None
self.public_upstream_branch = None
# Some config filenames have suffixes to avoid name collisions; strip off the suffix to find the real
# distgit repo name (which must be combined with the distgit namespace).
# e.g. openshift-enterprise-mediawiki.apb.yml
# distgit_key=openshift-enterprise-mediawiki.apb
# name (repo name)=openshift-enterprise-mediawiki
self.distgit_key = data_obj.key
self.name = self.distgit_key.split('.')[0] # Split off any '.apb' style differentiator (if present)
self.runtime.logger.debug("Loading metadata from {}".format(self.full_config_path))
self.raw_config = Model(data_obj.data) # Config straight from ocp-build-data
assert (self.raw_config.name is not Missing)
self.config = assembly_metadata_config(runtime.get_releases_config(), runtime.assembly, meta_type, self.distgit_key, self.raw_config)
self.namespace, self._component_name = Metadata.extract_component_info(meta_type, self.name, self.config)
self.mode = self.config.get('mode', CONFIG_MODE_DEFAULT).lower()
if self.mode not in CONFIG_MODES:
raise ValueError('Invalid mode for {}'.format(self.config_filename))
self.enabled = (self.mode == CONFIG_MODE_DEFAULT)
self.qualified_name = "%s/%s" % (self.namespace, self.name)
self.qualified_key = "%s/%s" % (self.namespace, self.distgit_key)
# Includes information to identify the metadata being used with each log message
self.logger = logutil.EntityLoggingAdapter(logger=self.runtime.logger, extra={'entity': self.qualified_key})
self._distgit_repo = None
# List of Brew targets.
# The first target is the primary target, against which tito will direct build.
# Others are secondary targets. We will use Brew API to build against secondary
# targets with the same distgit commit as the primary target.
self.targets: List[str] = self.determine_targets()
if self.runtime.assembly_basis_event and self.config.content.source.git.branch.target and not commitish:
# Ok, so we are a release assembly like 'art1999'. We inherit from
# 4.7.22 which was composed primarily out of "assembly.stream" builds
# but maybe one or two pinned "assembly.4.7.22" builds.
# An artists has been asked to create art1999 and bump a single RPM in the
# ose-etcd image. To do that, the artist
# adds the dependency to releases.yml for the ose-etcd distgit_key in the
# art1999 assembly and then triggers a "rebuild" job of the image.
# What upstream git commit do you expect to be built? Why the source from 4.7.22,
# of course! The customer asked for an RPM bump, not to take on any of the hundreds of
# code changes that may have take place since 4.7.22 in the 4.7 branch.
# So how do we arrive at that? Well, it is in the brew metadata of the latest build from
# the 4.7.22 assembly.
# Oh, but what if the customer DOES want a different commit? Well, the artist should
# update the release.yml for art1999 to include that commit or explicitly specify a branch.
# How do we determine whether they have done that? Looking for any explicit overrides
# in the our assembly's metadata.
# Let's do it!
assembly_overrides = assembly_metadata_config(runtime.get_releases_config(), runtime.assembly, meta_type, self.distgit_key, Model({}))
# Nice! By passing Model({}) instead of the metadata from our image yml file, we should only get fields actually defined in
# releases.yml.
if assembly_overrides.content.source.git.branch.target:
# Yep.. there is an override in releases.yml. The good news is that we are done.
# The rest of doozer code is equipped to clone that upstream commit
# and rebase using it.
pass
else:
# Ooof.. it is not defined in the assembly, so we need to find it dynamically.
self.logger.info("A commitish is not explicitly specified for %s. Determining from the latest build...", self.name)
build_obj = self.get_latest_build(default=None, el_target=self.determine_rhel_targets()[0])
if build_obj:
self.commitish = isolate_git_commit_in_release(build_obj['nvr'])
self.logger.warning(f'Pinning upstream source to commit of last assembly selected build ({build_obj["id"]}) -> commit {self.commitish} ')
else:
# If this is part of a unit test, don't make the caller's life more difficult thatn it already is; skip the exception.
if 'unittest' not in sys.modules.keys():
raise IOError(f'Expected to find pre-existing build for {self.distgit_key} in order to pin upstream source commit')
# If you've read this far, you may be wondering, why are we not trying to find the SOURCE_GIT_URL from the last built image?
# Good question! Because it should be the value found in our assembly-modified image metadata!
# The git commit starts as a branch in standard ocp-build-data metadata and its
# commit hash is only discovered at runtime. The source git URL is literal. If it does change somewhere in the assembly
# definitions, that's fine. This assembly should find it when looking up the content.source.git.url from the metadata.
def determine_targets(self) -> List[str]:
""" Determine Brew targets for building this component
"""
targets = self.config.get("targets")
if not targets:
# If not specified in meta config, load from group config
profile_name = self.runtime.profile or self.runtime.group_config.get(f"default_{self.meta_type}_build_profile")
if profile_name:
targets = self.runtime.group_config.build_profiles.primitive()[self.meta_type][profile_name].get("targets")
if not targets:
# If group config doesn't define the targets either, the target name will be derived from the distgit branch name
targets = [self._default_brew_target()]
return targets
def determine_rhel_targets(self) -> List[int]:
"""
For each build target for the component, return the rhel version it is for. For example,
if an RPM builds for both rhel-7 and rhel-8 targets, return [7,8]
"""
el_targets: List[int] = []
for target in self.determine_targets():
el_ver = isolate_el_version_in_brew_tag(target)
if not el_ver:
raise IOError(f'Unable to determine RHEL version from build target {target} in {self.distgit_key}')
el_targets.append(el_ver)
return el_targets
def save(self):
self.data_obj.data = self.config.primitive()
self.data_obj.save()
def distgit_remote_url(self):
pkgs_host = self.runtime.group_config.urls.get('pkgs_host', 'pkgs.devel.redhat.com')
# rhpkg uses a remote named like this to pull content from distgit
if self.runtime.user:
return f'ssh://{self.runtime.user}@{pkgs_host}/{self.qualified_name}'
return f'ssh://{pkgs_host}/{self.qualified_name}'
def distgit_repo(self, autoclone=True) -> DistGitRepo:
if self._distgit_repo is None:
self._distgit_repo = DISTGIT_TYPES[self.meta_type](self, autoclone=autoclone)
return self._distgit_repo
def branch(self) -> str:
if self.config.distgit.branch is not Missing:
return self.config.distgit.branch
return self.runtime.branch
def branch_major_minor(self) -> str:
"""
:return: Extracts and returns '{major}.{minor}' from the distgit branch.
"""
split = self.branch().split('-') # e.g. ['rhaos', '4.8', 'rhel', '8']
return split[1]
def branch_el_target(self) -> int:
"""
:return: Determines what rhel-# version the distgit branch is associated with and returns the RHEL version as an int
"""
target_match = re.match(r'.*-rhel-(\d+)(?:-|$)', str(self.branch()))
if target_match:
return int(target_match.group(1))
else:
raise IOError(f'Unable to determine rhel version from branch: {self.branch()}')
def build_root_tag(self):
return '{}-build'.format(self.branch())
def candidate_brew_tag(self):
return '{}-candidate'.format(self.branch())
def hotfix_brew_tag(self):
return f'{self.branch()}-hotfix'
def _default_brew_target(self):
""" Returns derived brew target name from the distgit branch name
"""
return NotImplementedError()
def candidate_brew_tags(self):
return [self.candidate_brew_tag()]
def hotfix_brew_tags(self):
""" Returns "hotfix" Brew tags for this component.
"Hotfix" tags are used to prevent garbage collection.
"""
return [self.hotfix_brew_tag()]
def get_arches(self):
"""
:return: Returns the list of architecture this image/rpm should build for. This is an intersection
of config specific arches & globally enabled arches in group.yml
"""
if self.config.arches:
ca = self.config.arches
intersection = list(set(self.runtime.get_global_arches()) & set(ca))
if len(intersection) != len(ca):
self.logger.info(f'Arches are being pruned by group.yml. Using computed {intersection} vs config list {ca}')
if not intersection:
raise ValueError(f'No arches remained enabled in {self.qualified_key}')
return intersection
else:
return list(self.runtime.get_global_arches())
def cgit_atom_feed(self, commit_hash: Optional[str] = None, branch: Optional[str] = None) -> List[CgitAtomFeedEntry]:
"""
:param commit_hash: Specify to receive an entry for the specific commit (branch ignored if specified).
Returns a feed with a single entry.
:param branch: branch name; None implies the branch specified in ocp-build-data (XOR commit_hash).
Returns a feed with several of the most recent entries.
Returns a representation of the cgit atom feed. This information includes
feed example: https://gist.github.com/jupierce/ab006c0fc83050b714f6de2ec30f1072 . This
feed provides timestamp and commit information without having to clone distgits.
Example urls..
http://pkgs.devel.redhat.com/cgit/containers/cluster-etcd-operator/atom/?h=rhaos-4.8-rhel-8
or
http://pkgs.devel.redhat.com/cgit/containers/cluster-etcd-operator/atom/?id=35ecfa4436139442edc19585c1c81ebfaca18550
"""
cgit_url_base = self.runtime.group_config.urls.cgit
if not cgit_url_base:
raise ValueError("urls.cgit is not set in group config")
url = f"{cgit_url_base}/{urllib.parse.quote(self.qualified_name)}/atom/"
params = {}
if commit_hash:
params["id"] = commit_hash
else:
if branch is None:
branch = self.branch()
if not commit_hash and branch:
params["h"] = branch
def _make_request():
self.logger.info("Getting cgit atom feed %s ...", url)
resp = requests.get(url, params=params)
resp.raise_for_status()
return resp.text
content = retry(
stop=stop_after_attempt(3), # wait for 10 seconds * 3 = 30 seconds
wait=wait_fixed(10), # wait for 10 seconds between retries
retry=retry_if_exception_type(),
)(_make_request)()
et = ElementTree.fromstring(content)
entry_list = list()
for et_entry in et.findall('{http://www.w3.org/2005/Atom}entry'):
entry = CgitAtomFeedEntry(
title=et_entry.find('{http://www.w3.org/2005/Atom}title').text,
updated=dateutil.parser.parse(et_entry.find('{http://www.w3.org/2005/Atom}updated').text),
id=et_entry.find('{http://www.w3.org/2005/Atom}id').text,
content=et_entry.find('{http://www.w3.org/2005/Atom}content[@type="text"]').text
)
entry_list.append(entry)
return entry_list
def cgit_file_url(self, filename: str, commit_hash: Optional[str] = None, branch: Optional[str] = None) -> str:
""" Construct a cgit URL to a given file associated with the commit hash pushed to distgit
:param filename: a relative path
:param commit_hash: commit hash; None implies the current HEAD
:param branch: branch name; None implies the branch specified in ocp-build-data
:return: a cgit URL
"""
cgit_url_base = self.runtime.group_config.urls.cgit
if not cgit_url_base:
raise ValueError("urls.cgit is not set in group config")
ret = f"{cgit_url_base}/{urllib.parse.quote(self.qualified_name)}/plain/{urllib.parse.quote(filename)}"
params = {}
if branch is None:
branch = self.branch()
if branch:
params["h"] = branch
if commit_hash:
params["id"] = commit_hash
if params:
ret += "?" + urllib.parse.urlencode(params)
return ret
def fetch_cgit_file(self, filename, commit_hash: Optional[str] = None, branch: Optional[str] = None):
""" Retrieve the content of a cgit URL to a given file associated with the commit hash pushed to distgit
:param filename: a relative path
:param commit_hash: commit hash; None implies the current HEAD
:param branch: branch name; None implies the branch specified in ocp-build-data
:return: the content of the file
"""
url = self.cgit_file_url(filename, commit_hash=commit_hash, branch=branch)
req = exectools.retry(
3, lambda: urllib.request.urlopen(url),
check_f=lambda req: req.code == 200)
return req.read()
def get_latest_build(self, default: Optional[Any] = -1, assembly: Optional[str] = None, extra_pattern: str = '*',
build_state: BuildStates = BuildStates.COMPLETE, component_name: Optional[str] = None,
el_target: Optional[Union[str, int]] = None, honor_is: bool = True, complete_before_event: Optional[int] = None):
"""
:param default: A value to return if no latest is found (if not specified, an exception will be thrown)
:param assembly: A non-default assembly name to search relative to. If not specified, runtime.assembly
will be used. If runtime.assembly is also None, the search will return true latest.
If the assembly parameter is set to '', this search will also return true latest.
:param extra_pattern: An extra glob pattern that must be matched in the middle of the
build's release field. Pattern must match release timestamp and components
like p? and git commit (up to, but not including ".assembly.<name>" release
component). e.g. "*.g<commit>.* or '*.p1.*'
:param build_state: 0=BUILDING, 1=COMPLETE, 2=DELETED, 3=FAILED, 4=CANCELED
:param component_name: If not specified, looks up builds for self component.
:param el_target: In the case of an RPM, which can build for multiple targets, you can specify
'7' for el7, '8' for el8, etc. You can also pass in a brew target that
contains '....-rhel-?..' and the number will be extracted. If you want the true
latest, leave as None.
:param honor_is: If True, and an assembly component specifies 'is', that nvr will be returned.
:param complete_before_event: If a value is specified >= 0, the search will be constrained to builds which completed before
the specified brew_event. If a value is specified < 0, the search will be conducted with no constraint on
brew event. If no value is specified, the search will be relative to the current assembly's basis event.
:return: Returns the most recent build object from koji for the specified component & assembly.
Example https://gist.github.com/jupierce/57e99b80572336e8652df3c6be7bf664
"""
if not component_name:
component_name = self.get_component_name()
builds = []
with self.runtime.pooled_koji_client_session(caching=True) as koji_api:
package_info = koji_api.getPackage(component_name) # e.g. {'id': 66873, 'name': 'atomic-openshift-descheduler-container'}
if not package_info:
raise IOError(f'No brew package is defined for {component_name}')
package_id = package_info['id'] # we could just constrain package name using pattern glob, but providing package ID # should be a much more efficient DB query.
# listBuilds returns all builds for the package; We need to limit the query to the builds
# relevant for our major/minor.
rpm_suffix = '' # By default, find the latest RPM build - regardless of el7, el8, ...
el_ver = None
if self.meta_type == 'image':
ver_prefix = 'v' # openshift-enterprise-console-container-v4.7.0-202106032231.p0.git.d9f4379
else:
# RPMs do not have a 'v' in front of their version; images do.
ver_prefix = '' # openshift-clients-4.7.0-202106032231.p0.git.e29b355.el8
if el_target:
el_ver = isolate_el_version_in_brew_tag(el_target)
if el_ver:
rpm_suffix = f'.el{el_ver}'
else:
raise IOError(f'Unable to determine rhel version from specified el_target: {el_target}')
pattern_prefix = f'{component_name}-{ver_prefix}{self.branch_major_minor()}.'
if assembly is None:
assembly = self.runtime.assembly
list_builds_kwargs = {} # extra kwargs that will be passed to koji_api.listBuilds invocations
if complete_before_event is not None:
if complete_before_event < 0:
# By setting the parameter to None, it tells the koji wrapper to not bound the brew event.
list_builds_kwargs['completeBefore'] = None
else:
# listBuilds accepts timestamps, not brew events, so convert brew event into seconds since the epoch
complete_before_ts = koji_api.getEvent(complete_before_event)['ts']
list_builds_kwargs['completeBefore'] = complete_before_ts
def default_return():
msg = f"No builds detected for using prefix: '{pattern_prefix}', extra_pattern: '{extra_pattern}', assembly: '{assembly}', build_state: '{build_state.name}', el_target: '{el_target}'"
if default != -1:
self.logger.info(msg)
return default
raise IOError(msg)
def latest_build_list(pattern_suffix):
# Include * after pattern_suffix to tolerate:
# 1. Matching an unspecified RPM suffix (e.g. .el7).
# 2. Other release components that might be introduced later.
builds = koji_api.listBuilds(packageID=package_id,
state=None if build_state is None else build_state.value,
pattern=f'{pattern_prefix}{extra_pattern}{pattern_suffix}*{rpm_suffix}',
queryOpts={'limit': 1, 'order': '-creation_event_id'},
**list_builds_kwargs)
# Ensure the suffix ends the string OR at least terminated by a '.' .
# This latter check ensures that 'assembly.how' doesn't match a build from
# "assembly.howdy'.
refined = [b for b in builds if b['nvr'].endswith(pattern_suffix) or f'{pattern_suffix}.' in b['nvr']]
if refined and build_state == BuildStates.COMPLETE:
# A final sanity check to see if the build is tagged with something we
# respect. There is a chance that a human may untag a build. There
# is no standard practice at present in which they should (they should just trigger
# a rebuild). If we find the latest build is not tagged appropriately, blow up
# and let a human figure out what happened.
check_nvr = refined[0]['nvr']
for i in range(2):
tags = {tag['name'] for tag in koji_api.listTags(build=check_nvr)}
if tags:
refined[0]['_tags'] = tags # save tag names to dict for future use
break
# Observed that a complete build needs some time before it gets tagged. Give it some
# time if not immediately available.
time.sleep(60)
# RPMS have multiple targets, so our self.branch() isn't perfect.
# We should permit rhel-8/rhel-7/etc.
tag_prefix = self.branch().rsplit('-', 1)[0] + '-' # String off the rhel version.
accepted_tags = [name for name in tags if name.startswith(tag_prefix)]
if not accepted_tags:
self.logger.warning(f'Expected to find at least one tag starting with {self.branch()} on latest build {check_nvr} but found [{tags}]; tagging failed after build or something has changed tags in an unexpected way')
return refined
if honor_is and self.config['is']:
if build_state != BuildStates.COMPLETE:
# If this component is defined by 'is', history failures, etc, do not matter.
return default_return()
# under 'is' for RPMs, we expect 'el7' and/or 'el8', etc. For images, just 'nvr'.
isd = self.config['is']
if self.meta_type == 'rpm':
if el_ver is None:
raise ValueError(f'Expected el_target to be set when querying a pinned RPM component {self.distgit_key}')
is_nvr = isd[f'el{el_ver}']
if not is_nvr:
return default_return()
else:
# The image metadata (or, more likely, the current assembly) has the image
# pinned. Return only the pinned NVR. When a child image is being rebased,
# it uses get_latest_build to find the parent NVR to use (if it is not
# included in the "-i" doozer argument). We need it to find the pinned NVR
# to place in its Dockerfile.
# Pinning also informs gen-payload when attempting to assemble a release.
is_nvr = isd.nvr
if not is_nvr:
raise ValueError(f'Did not find nvr field in pinned Image component {self.distgit_key}')
# strict means raise an exception if not found.
found_build = koji_api.getBuild(is_nvr, strict=True)
# Different brew apis return different keys here; normalize to make the rest of doozer not need to change.
found_build['id'] = found_build['build_id']
return found_build
if not assembly:
# if assembly is '' (by parameter) or still None after runtime.assembly,
# we are returning true latest.
builds = latest_build_list('')
else:
basis_event = assembly_basis_event(self.runtime.get_releases_config(), assembly=assembly)
if basis_event:
# If an assembly has a basis event, its latest images can only be sourced from
# "is:" or the stream assembly. We've already checked for "is" above.
assembly = 'stream'
# Assemblies without a basis will return assembly qualified builds for their
# latest images. This includes "stream" and "test", but could also include
# an assembly that is customer specific with its own branch.
builds = latest_build_list(f'.assembly.{assembly}')
if not builds:
if assembly != 'stream':
builds = latest_build_list('.assembly.stream')
if not builds:
# Fall back to true latest
builds = latest_build_list('')
if builds and '.assembly.' in builds[0]['release']:
# True latest belongs to another assembly. In this case, just return
# that they are no builds for this assembly.
builds = []
if not builds:
return default_return()
found_build = builds[0]
# Different brew apis return different keys here; normalize to make the rest of doozer not need to change.
found_build['id'] = found_build['build_id']
return found_build
def get_latest_build_info(self, default=-1, **kwargs):
"""
Queries brew to determine the most recently built release of the component
associated with this image. This method does not rely on the "release"
label needing to be present in the Dockerfile. kwargs will be passed on
to get_latest_build.
:param default: A value to return if no latest is found (if not specified, an exception will be thrown)
:return: A tuple: (component name, version, release); e.g. ("registry-console-docker", "v3.6.173.0.75", "1")
"""
build = self.get_latest_build(default=default, **kwargs)
if default != -1 and build == default:
return default
return build['name'], build['version'], build['release']
@classmethod
def extract_component_info(cls, meta_type: str, meta_name: str, config_model: Model) -> Tuple[str, str]:
"""
Determine the component information for either RPM or Image metadata
configs.
:param meta_type: 'rpm' or 'image'
:param meta_name: The name of the component's distgit
:param config_model: The configuration for the metadata.
:return: Return (namespace, component_name)
"""
# Choose default namespace for config data
if meta_type == "image":
namespace = "containers"
else:
namespace = "rpms"
# Allow config data to override namespace
if config_model.distgit.namespace is not Missing:
namespace = config_model.distgit.namespace
if namespace == "rpms":
# For RPMS, component names must match package name and be in metadata config
return namespace, config_model.name
# For RPMs, by default, the component is the name of the distgit,
# but this can be overridden in the config yaml.
component_name = meta_name
# For apbs, component name seems to have -apb appended.
# ex. http://dist-git.host.prod.eng.bos.redhat.com/cgit/apbs/openshift-enterprise-mediawiki/tree/Dockerfile?h=rhaos-3.7-rhel-7
if namespace == "apbs":
component_name = "%s-apb" % component_name
if namespace == "containers":
component_name = "%s-container" % component_name
if config_model.distgit.component is not Missing:
component_name = config_model.distgit.component
return namespace, component_name
def get_component_name(self) -> str:
"""
:return: Returns the component name of the metadata. This is the name in the nvr
that brew assigns to component build. Component name is synonymous with package name.
For RPMs, spec files declare the package name. For images, it is usually based on
the distgit repo name + '-container'.
"""
return self._component_name
def needs_rebuild(self):
if self.config.targets:
# If this meta has multiple build targets, check currency of each
for target in self.config.targets:
hint = self._target_needs_rebuild(el_target=target)
if hint.rebuild or hint.code == RebuildHintCode.DELAYING_NEXT_ATTEMPT:
# No need to look for more
return hint
return hint
else:
return self._target_needs_rebuild(el_target=None)
def _target_needs_rebuild(self, el_target=None) -> RebuildHint:
"""
Checks whether the current upstream commit has a corresponding successful downstream build.
Take care to not unnecessarily trigger a clone of the distgit
or upstream source as it will dramatically increase the time needed for scan-sources.
:param el_target: A brew build target or literal '7', '8', or rhel to perform the search for.
:return: Returns (rebuild:<bool>, message: description of why).
"""
now = datetime.datetime.now(datetime.timezone.utc)
# If a build fails, how long will we wait before trying again
rebuild_interval = self.runtime.group_config.scan_freshness.threshold_hours or 6
component_name = self.get_component_name()
latest_build = self.get_latest_build(default=None, el_target=el_target)
if not latest_build:
return RebuildHint(code=RebuildHintCode.NO_LATEST_BUILD,
reason=f'Component {component_name} has no latest build for assembly: {self.runtime.assembly}')
latest_build_creation = dateutil.parser.parse(latest_build['creation_time'])
latest_build_creation = latest_build_creation.replace(tzinfo=datetime.timezone.utc) # If time lacks timezone info, interpret as UTC
# Log scan-sources coordinates throughout to simplify setting up scan-sources
# function tests to reproduce real-life scenarios.
self.logger.debug(f'scan-sources coordinate: latest_build: {latest_build}')
self.logger.debug(f'scan-sources coordinate: latest_build_creation_datetime: {latest_build_creation}')
# If downstream has been locked to a commitish, only check the atom feed at that moment.
distgit_commitish = self.runtime.downstream_commitish_overrides.get(self.distgit_key, None)
atom_entries = self.cgit_atom_feed(commit_hash=distgit_commitish, branch=self.branch())
if not atom_entries:
raise IOError(f'No atom feed entries exist for {component_name} in {self.branch()}. Does branch exist?')
dgr = self.distgit_repo(autoclone=False) # For scan-sources speed, we need to avoid cloning
if not dgr.has_source():
# This is a distgit only artifact (no upstream source)
latest_entry = atom_entries[0] # Most recent commit's information
dg_commit = latest_entry.id
self.logger.debug(f'scan-sources coordinate: dg_commit: {dg_commit}')
dg_commit_dt = latest_entry.updated
self.logger.debug(f'scan-sources coordinate: distgit_head_commit_datetime: {dg_commit_dt}')
if latest_build_creation > dg_commit_dt:
return RebuildHint(code=RebuildHintCode.DISTGIT_ONLY_COMMIT_OLDER,
reason='Distgit only repo commit is older than most recent build')
# Two possible states here:
# 1. A user has made a commit to this dist-git only branch and there has been no build attempt
# 2. We've already tried a build and the build failed.
# Check whether a build attempt for this assembly has failed.
last_failed_build = self.get_latest_build(default=None,
build_state=BuildStates.FAILED,
el_target=el_target) # How recent was the last failed build?
if not last_failed_build:
return RebuildHint(code=RebuildHintCode.DISTGIT_ONLY_COMMIT_NEWER,
reason='Distgit only commit is newer than last successful build')
last_failed_build_creation = dateutil.parser.parse(last_failed_build['creation_time'])
last_failed_build_creation = last_failed_build_creation.replace(tzinfo=datetime.timezone.utc) # If time lacks timezone info, interpret as UTC
if last_failed_build_creation + datetime.timedelta(hours=rebuild_interval) > now:
return RebuildHint(code=RebuildHintCode.DELAYING_NEXT_ATTEMPT,
reason=f'Waiting at least {rebuild_interval} hours after last failed build')
return RebuildHint(code=RebuildHintCode.LAST_BUILD_FAILED,
reason=f'Last build failed > {rebuild_interval} hours ago; making another attempt')
# Otherwise, we have source. In the case of git source, check the upstream with ls-remote.
# In the case of alias (only legacy stuff afaik), check the cloned repo directory.
if "git" in self.config.content.source:
remote_branch = self.runtime.detect_remote_source_branch(self.config.content.source.git)[0]
out, _ = exectools.cmd_assert(["git", "ls-remote", self.config.content.source.git.url, remote_branch], strip=True, retries=5, on_retry='sleep 5')
# Example output "296ac244f3e7fd2d937316639892f90f158718b0 refs/heads/openshift-4.8"
upstream_commit_hash = out.split()[0]
elif self.config.content.source.alias and self.runtime.group_config.sources and self.config.content.source.alias in self.runtime.group_config.sources:
# This is a new style alias with url information in group config
source_details = self.runtime.group_config.sources[self.config.content.source.alias]
remote_branch = self.runtime.detect_remote_source_branch(source_details)[0]
out, _ = exectools.cmd_assert(["git", "ls-remote", source_details.url, remote_branch], strip=True, retries=5, on_retry='sleep 5')
# Example output "296ac244f3e7fd2d937316639892f90f158718b0 refs/heads/openshift-4.8"
upstream_commit_hash = out.split()[0]
else:
# If it is not git, we will need to punt to the rest of doozer to get the upstream source for us.
with Dir(dgr.source_path()):
upstream_commit_hash, _ = exectools.cmd_assert('git rev-parse HEAD', strip=True)
self.logger.debug(f'scan-sources coordinate: upstream_commit_hash: {upstream_commit_hash}')
git_component = f'.g*{upstream_commit_hash[:7]}' # use .g*<commit> so it matches new form ".g0123456" and old ".git.0123456"
# Scan for any build in this assembly which also includes the git commit.
upstream_commit_build = self.get_latest_build(default=None,
extra_pattern=f'*{git_component}*',
el_target=el_target) # Latest build for this commit.
if not upstream_commit_build:
# There is no build for this upstream commit. Two options to assess:
# 1. This is a new commit and needs to be built
# 2. Previous attempts at building this commit have failed
# Check whether a build attempt with this commit has failed before.
failed_commit_build = self.get_latest_build(default=None,
extra_pattern=f'*{git_component}*',
build_state=BuildStates.FAILED,
el_target=el_target) # Have we tried before and failed?
# If not, this is a net-new upstream commit. Build it.
if not failed_commit_build:
return RebuildHint(code=RebuildHintCode.NEW_UPSTREAM_COMMIT,
reason='A new upstream commit exists and needs to be built')
# Otherwise, there was a failed attempt at this upstream commit on record.
# Make sure provide at least rebuild_interval hours between such attempts
last_attempt_time = dateutil.parser.parse(failed_commit_build['creation_time'])
last_attempt_time = last_attempt_time.replace(tzinfo=datetime.timezone.utc) # If time lacks timezone info, interpret as UTC
if last_attempt_time + datetime.timedelta(hours=rebuild_interval) < now:
return RebuildHint(code=RebuildHintCode.LAST_BUILD_FAILED,
reason=f'It has been {rebuild_interval} hours since last failed build attempt')
return RebuildHint(code=RebuildHintCode.DELAYING_NEXT_ATTEMPT,
reason=f'Last build of upstream commit {upstream_commit_hash} failed, but holding off for at least {rebuild_interval} hours before next attempt')
if latest_build['nvr'] != upstream_commit_build['nvr']:
return RebuildHint(code=RebuildHintCode.UPSTREAM_COMMIT_MISMATCH,
reason=f'Latest build {latest_build["nvr"]} does not match upstream commit build {upstream_commit_build["nvr"]}; commit reverted?')
return RebuildHint(code=RebuildHintCode.BUILD_IS_UP_TO_DATE,
reason=f'Build already exists for current upstream commit {upstream_commit_hash}: {latest_build}')
def get_jira_info(self) -> Tuple[str, str]:
"""
:return: Returns Jira project name and component. These
are coordinates for where to file bugs.
"""
# We are trying to discover some team information that indicates which Jira project bugs for this
# component should be filed against. This information can be stored in the doozer metadata OR
# in prodsec's component mapping. Metadata overrides, as usual.
# Maintainer info can be defined in metadata, so try there first.
maintainer = self.config.jira.copy() or dict()
product_config = self.runtime.get_product_config()
issue_project = product_config.bug_mapping.default_issue_project
component_mapping = product_config.bug_mapping.components
component_entry = component_mapping[self.get_component_name()]
if component_entry.issue_project:
issue_project = component_entry.issue_project
jira_component = component_entry.issue_component
if not issue_project:
issue_project = 'OCPBUGS'
if not jira_component:
jira_component = 'Unknown'
if self.distgit_key == 'openshift-enterprise-base':
# This is a special case image that is represented by upstream but
# no one release owns. ART should handle merges here.
jira_component = 'Release'
return maintainer.get('project', issue_project), maintainer.get('component', jira_component)
def extract_kube_env_vars(self) -> Dict[str, str]:
"""
Analyzes the source_base_dir for the hyperkube Dockerfile in which the release's k8s version
is defined. Side effect is cloning distgit
and upstream source if it has not already been done.
:return: A Dict of environment variables that should be added to the Dockerfile / rpm spec.
Variables like KUBE_GIT_VERSION, KUBE_GIT_COMMIT, KUBE_GIT_MINOR, ...
May be empty if there is no kube information in the source dir.
"""
envs = dict()
upstream_source_path: pathlib.Path = pathlib.Path(self.runtime.resolve_source(self))
if not upstream_source_path:
# distgit only. Return empty.
return envs
with Dir(upstream_source_path):
out, _ = exectools.cmd_assert(["git", "rev-parse", "HEAD"])
source_full_sha = out
use_path = None
path_4x = upstream_source_path.joinpath('openshift-hack/images/hyperkube/Dockerfile.rhel') # for >= 4.6: https://github.com/openshift/kubernetes/blob/fcff70a54d3f0bde19e879062e8f1489ba5d0cb0/openshift-hack/images/hyperkube/Dockerfile.rhel#L16
if path_4x.exists():
use_path = path_4x
path_3_11 = upstream_source_path.joinpath('images/hyperkube/Dockerfile') # for 3.11: https://github.com/openshift/ose/blob/enterprise-3.11/images/hyperkube/Dockerfile
if not use_path and path_3_11.exists():
use_path = path_3_11
kube_version_fields = []
if use_path:
dfp = DockerfileParser(cache_content=True, fileobj=io.BytesIO(use_path.read_bytes()))
build_versions = dfp.labels.get('io.openshift.build.versions', None)
if not build_versions:
raise IOError(f'Unable to find io.openshift.build.versions label in {str(use_path)}')
# Find something like kubernetes=1.22.1 and extract version as group
m = re.match(r"^.*[^\w]*kubernetes=([\d.]+).*", build_versions)
if not m:
raise IOError(f'Unable to find `kubernetes=...` in io.openshift.build.versions label from {str(use_path)}')
base_kube_version = m.group(1).lstrip('v')
kube_version_fields = base_kube_version.split('.') # 1.17.1 => [ '1', '17', '1']
# upstream kubernetes creates a tag for each version. Go find its sha.
rc, out, err = exectools.cmd_gather(f'git ls-remote https://github.com/kubernetes/kubernetes v{base_kube_version}')
out = out.strip()
if rc == 0 and out:
# Expecting something like 'a26dc584ac121d68a8684741bce0bcba4e2f2957 refs/tags/v1.19.0-rc.2'
kube_commit_hash = out.split()[0]
else:
# That's strange, but let's not kill the build for it. Poke in our repo's hash.
self.logger.warning(f'Unable to find upstream git tag v{base_kube_version} in https://github.com/kubernetes/kubernetes')
kube_commit_hash = source_full_sha
if kube_version_fields:
# For historical consistency with tito's flow, we add +OS_GIT_COMMIT[:7] to the kube version
envs['KUBE_GIT_VERSION'] = f"v{'.'.join(kube_version_fields)}+{source_full_sha[:7]}"
envs['KUBE_GIT_MAJOR'] = '0' if len(kube_version_fields) < 1 else kube_version_fields[0]
godep_kube_minor = '0' if len(kube_version_fields) < 2 else kube_version_fields[1]
envs['KUBE_GIT_MINOR'] = f'{godep_kube_minor}+' # For historical reasons, append a '+' since OCP patches its vendored kube.
envs['KUBE_GIT_COMMIT'] = kube_commit_hash
envs['KUBE_GIT_TREE_STATE'] = 'clean'
elif self.name in ('openshift-enterprise-hyperkube', 'openshift', 'atomic-openshift'):
self.logger.critical(f'Unable to acquire KUBE vars for {self.name}. This must be fixed or platform addons can break: https://bugzilla.redhat.com/show_bug.cgi?id=1861097')
raise IOError(f'Unable to determine KUBE vars for {self.name}')
return envs