-
Notifications
You must be signed in to change notification settings - Fork 405
/
annotate_metadata_with_index.py
39 lines (33 loc) · 1.16 KB
/
annotate_metadata_with_index.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
"""Annotate a metadata file with the given sequence index.
"""
import argparse
from augur.io import read_metadata
import pandas as pd
if __name__ == '__main__':
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("--metadata", required=True, help="metadata to annotate")
parser.add_argument("--sequence-index", required=True, help="sequence index from augur index")
parser.add_argument("--output", required=True, help="metadata annotated with sequence index columns including a 'length' column based on the number of A, C, G, and T bases.")
args = parser.parse_args()
metadata = read_metadata(args.metadata)
index = pd.read_csv(
args.sequence_index,
sep="\t",
).drop(
columns=["length"],
)
index["length"] = index.loc[:, ["A", "C", "G", "T"]].sum(axis=1)
new_columns = {
column: f"_{column}"
for column in index.columns
if column != "strain"
}
index = index.rename(columns=new_columns)
metadata.merge(
index,
on="strain",
).to_csv(
args.output,
sep="\t",
index=False,
)