Skip to content

Commit

Permalink
Merge pull request #781 from lsst/tickets/DM-38969
Browse files Browse the repository at this point in the history
DM-38969: Update to correct pandas usage that does not fragment dataframes.
  • Loading branch information
erykoff committed May 3, 2023
2 parents 80cdd8e + 298162b commit e3f20a9
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions python/lsst/pipe/tasks/postprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,6 @@ def run(self, catalogs, tract, patch):
catalog : `pandas.DataFrame`
Merged dataframe.
"""

dfs = []
for filt, tableDict in catalogs.items():
for dataset, table in tableDict.items():
Expand All @@ -183,14 +182,15 @@ def run(self, catalogs, tract, patch):

# Sort columns by name, to ensure matching schema among patches
df = df.reindex(sorted(df.columns), axis=1)
df['tractId'] = tract
df['patchId'] = patch
df = df.assign(tractId=tract, patchId=patch)

# Make columns a 3-level MultiIndex
df.columns = pd.MultiIndex.from_tuples([(dataset, filt, c) for c in df.columns],
names=('dataset', 'band', 'column'))
dfs.append(df)

# We do this dance and not `pd.concat(dfs)` because the pandas
# concatenation uses infinite memory.
catalog = functools.reduce(lambda d1, d2: d1.join(d2), dfs)
return catalog

Expand Down

0 comments on commit e3f20a9

Please sign in to comment.