# Copy all PASS vcfs from project

Created file with pattern paths to the vcf files:

```
isabl get-outdirs -fi projects 116 -fi application 32 -fi status SUCCEEDED | xargs -n 1 -I{} echo {}/merged/*.indels.pass.vcf.gz > indels_pass_p116.txt
```

Look for each file and copy it inside folder
```
cat indels_pass_p116.txt | xargs -I{} bash -c "ls {}" | xargs -I{} bash -c "cp {} P116/"
```

Unzip vcfs:

```
cd P116 && gunzip *.vcf.gz
```

In [None]:
# Install GRCh37

from SigProfilerMatrixGenerator import install as genInstall

genInstall.install('GRCh37', rsync=False, bash=True)

In [None]:
from SigProfilerMatrixGenerator.scripts import SigProfilerMatrixGeneratorFunc as matGen

matrices = matGen.SigProfilerMatrixGeneratorFunc(
    "P179", 
    "GRCh37", 
    "/work/isabl/home/arangooj/indels_wgs/P179",
    plot=True, 
    exome=False, 
    bed_file=None, 
    chrom_based=False, 
    tsb_stat=False, 
    seqInfo=False, 
    cushion=100
)

In [None]:
matrices

In [None]:
import pysam
import pandas as pd

tsv = "/work/isabl/home/isablbot/wgs_indels/any2lcc-pos/any2lcc_raw.tsv.gz"
vcf = "/work/isabl/home/arangooj/indels_wgs"
template = "/work/isabl/home/arangooj/indels_wgs/test/input/all.vcf"

indels = []
chunks = pd.read_csv(
    filepath_or_buffer=tsv,
    compression="gzip",
    chunksize=20000,
    sep="\t",
    comment="#",
    low_memory=False,
    dtype={"CHR": str},
)
for i in chunks:
    indels.append(i)
    
indels = pd.concat(indels)
with pysam.VariantFile(template, mode="r") as template:
    for name, df in indels.groupby('TARGET_NAME'):
        print(f"Creating {name}...")
        with pysam.VariantFile(f"{vcf}/{name}.vcf", mode="w", header=template.header) as any2lcc:
                for _,row in df.iterrows():
                    rec = any2lcc.new_record()
                    rec.chrom = str(row.CHR)
                    rec.pos = row.START
                    rec.ref = row.REF
                    rec.alts = [row.ALT]
                    rec.filter.add("PASS")
                    any2lcc.write(rec)
