# Mi3-GPU Setup and Sequence Analysis Pipeline

In [None]:
!apt-get update && apt-get install -y build-essential libssl-dev libffi-dev python3-dev ocl-icd-libopencl1 opencl-headers clinfo libopenmpi-dev ocl-icd-opencl-dev
!pip install numpy scipy pyopencl biopython matplotlib configargparse mpi4py siphash24 cython setuptools wheel setuptools-git-ver setuptools-git-versioning seqtools

In [None]:
!git clone https://github.com/ahaldane/Mi3-GPU.git
%cd Mi3-GPU
!python3 setup.py build_ext --inplace

In [None]:
%cd extras
!git clone https://github.com/DEShawResearch/Random123.git
!cp -r Random123/include/Random123/ ./

In [None]:
%cd /content/Mi3-GPU/extras/
!sed -i 's/%d)/%zu)/g' mcmcCPUgenThreaded.c
!gcc -O3 mcmcCPUgenThreaded.c -lm -o cpu

## Load Input Sequence File

In [None]:
from google.colab import files
uploaded = files.upload()
fasta_file = list(uploaded.keys())[0]
!cp {fasta_file} PF00018_full.txt

In [None]:
import re

alpha = '-ACDEFGHIKLMNPQRSTVWY'

with open("PF00018_full.txt", "r") as fin:
    seqs = [re.sub('[a-z.]', '', r.split()[-1]) for r in fin if not (r.startswith('#') or r.startswith('/'))]

seqs = [s for s in seqs if all(c in alpha for c in s)]

with open("seqs21_raw", "wt") as fout:
    fout.write("\n".join(seqs))

In [None]:
import mi3gpu.utils.seqload as seqload
import numpy as np

seqs = seqload.loadSeqs('seqs21_raw')[0]
nseq, L = seqs.shape

# Remove columns with > 10% gaps
col_gap_pct = np.sum(seqs == 0, axis=0) / float(nseq)
seqs = seqs[:, col_gap_pct < 0.1]

# Remove sequences with > 10% gaps
seq_gap_pct = np.sum(seqs == 0, axis=1) / float(L)
seqs = seqs[seq_gap_pct < 0.1, :]

print("N: {}   L: {}".format(*seqs.shape))

seqload.writeSeqs('seqs21', seqs)

In [None]:
phy = 0.4
weights = np.ones(len(seqs))
np.save(f'weights{phy}.npy', weights)

In [None]:
Neff = len(seqs)
with open(f'Neff{phy}.txt', 'w') as f:
    f.write(str(Neff))

In [None]:
Mi3_path = '/content/Mi3-GPU/mi3gpu/Mi3.py'
bim = 'bim21Jeff.npy'
outdir = 'inference_output'
!python3 -u {Mi3_path} infer --bimarg {bim} --init_model independent --nwalkers 262144 --mcsteps 64 --reg l1z:0.0001 --outdir {outdir}