In [1]:
import pandas as pd
import os
import subprocess
import random

In [None]:
gemma \
-bfile 1001g_grenet_climate \
-maf 0.05 \
-lmm -k output/1001g_grenet_climateLDpruned_05maf.cXX.txt \
-c pc_1000.txt \
-o 1001g_grenet_climate_maf0.01


gemma \
-bfile 1001g_grenet_climate \
-maf 0.05 \
-bslmm 1 \
-o bslmm_nogrenenet_1001g_grenet_climate

In [4]:
climate_file = '/carnegie/nobackup/scratch/xwu/grenet/metadata/1001g_regmap_grenet_ecotype_info_corrected_bioclim_2024May16.csv'
climate = pd.read_csv(climate_file) # usecols = ['ecotypeid', 'bio1']

In [5]:
og_fam = pd.read_csv('1001g_grenet_climate.fam', sep = ' ', header=None)

In [6]:
biovar1 = climate[['ecotypeid', 'bio1']]

fam = og_fam.merge(biovar1, left_on= 0, right_on = 'ecotypeid', how = 'left')

fam = fam.drop([5, 'ecotypeid'],axis=1)

In [7]:
!pwd -P

/carnegie/nobackup/scratch/tbellagio/gea_grene-net/gwas/allele_assoc_runs


In [8]:
path = '/carnegie/nobackup/scratch/tbellagio/gea_grene-net/idea_fromind_to_pop/allele_assoc_runs'

In [35]:
gemma_path = path + '/lmm_gemma/'
bslmm_path = path + '/bslmm/'

In [36]:
biovars = [i for i in climate.columns if 'bio' in i]

In [37]:
bed_file = path + '/1001g_grenet_climate.bed'
bim_file = path + '/1001g_grenet_climate.bim'

In [40]:
for biovar in biovars:
    
    biovar_gemma_path = gemma_path + biovar
    biovar_bslmm_path = bslmm_path + biovar

    ## create folders for all the biovars 
    os.makedirs(biovar_gemma_path, exist_ok=True)
    os.makedirs(biovar_bslmm_path, exist_ok=True)

    biovar1 = climate[['ecotypeid', biovar]]
    fam = og_fam.merge(biovar1, left_on= 0, right_on = 'ecotypeid', how = 'left')
    fam = fam.drop([5, 'ecotypeid'],axis=1)

    ## create fam file
    fam.to_csv(biovar_gemma_path + '/1001g_grenet_climate.fam', index=None, header=None, sep = ' ')
    fam.to_csv(biovar_bslmm_path + '/1001g_grenet_climate.fam', index=None, header=None, sep = ' ')
    
    ## create symlinks for bed bim 
    os.symlink(bed_file, os.path.join(biovar_gemma_path, "1001g_grenet_climate.bed"))
    os.symlink(bim_file, os.path.join(biovar_gemma_path, "1001g_grenet_climate.bim"))

    os.symlink(bed_file, os.path.join(biovar_bslmm_path, "1001g_grenet_climate.bed"))
    os.symlink(bim_file, os.path.join(biovar_bslmm_path, "1001g_grenet_climate.bim"))

In [None]:
path = '/carnegie/nobackup/scratch/tbellagio/gea_grene-net/idea_fromind_to_pop/allele_assoc_runs'


In [65]:
shfiles = []

kinship_path = path + '/1001g_grenet_climateLDpruned_05maf.cXX.txt'
pcs_path = path + '/pc_1000.txt'
## submit gemma jobs 
for biovar in biovars:
    biovar_gemma_path = gemma_path + biovar + '/'
    seed = random.randint(1, 100000000)
    file = biovar_gemma_path + f'gemma_{biovar}.sh'
    text = f'''#!/bin/bash
#SBATCH --job-name=gemma_{biovar}
#SBATCH --time=2-00:00:00
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --mem-per-cpu=120gb
#SBATCH --cpus-per-task=2
#SBATCH --output=gemma_{biovar}_%j.out
#SBATCH --mail-user=tbellagio@carnegiescience.edu
#SBATCH --mail-type=FAIL

source /home/tbellagio/miniforge3/etc/profile.d/conda.sh

export PATH="${{PATH}}:/home/username/bin"

cd {biovar_gemma_path}

conda activate /home/tbellagio/miniforge3/envs/gwas

gemma \
-bfile 1001g_grenet_climate \
-maf 0.05 \
-lmm -k {kinship_path} \
-c {pcs_path} \
-o {biovar}

'''

    with open(file, 'w') as o:
        o.write(text)
    shfiles.append(file)
    

In [55]:
shfiles[0]

'/carnegie/nobackup/scratch/tbellagio/gea_grene-net/idea_fromind_to_pop/allele_assoc_runs/lmm_gemma/bio1/gemma_bio1.sh'

In [56]:
subprocess.run(['sbatch', shfiles[0]])

Submitted batch job 5825


CompletedProcess(args=['sbatch', '/carnegie/nobackup/scratch/tbellagio/gea_grene-net/idea_fromind_to_pop/allele_assoc_runs/lmm_gemma/bio1/gemma_bio1.sh'], returncode=0)

In [66]:
for file in shfiles:
    subprocess.run(['sbatch', file])

Submitted batch job 5829
Submitted batch job 5830
Submitted batch job 5831
Submitted batch job 5832
Submitted batch job 5833
Submitted batch job 5834
Submitted batch job 5835
Submitted batch job 5836
Submitted batch job 5837
Submitted batch job 5838
Submitted batch job 5839
Submitted batch job 5840
Submitted batch job 5841
Submitted batch job 5842
Submitted batch job 5843
Submitted batch job 5844
Submitted batch job 5845
Submitted batch job 5846
Submitted batch job 5847


In [67]:
shfiles = []
## submit bslmm jobs 
for biovar in biovars:
    biovar_bslmm_path = bslmm_path + biovar + '/'
    seed = random.randint(1, 100000000)
    file = biovar_bslmm_path + f'bslmm_{biovar}.sh'
    text = f'''#!/bin/bash
#SBATCH --job-name=bslmm_{biovar}
#SBATCH --time=2-00:00:00
#SBATCH --nodes=1
#SBATCH --ntasks=1
#SBATCH --mem-per-cpu=120gb
#SBATCH --cpus-per-task=2
#SBATCH --output=bslmm_{biovar}_%j.out
#SBATCH --mail-user=tbellagio@carnegiescience.edu
#SBATCH --mail-type=FAIL

source /home/tbellagio/miniforge3/etc/profile.d/conda.sh

export PATH="${{PATH}}:/home/username/bin"

cd {biovar_bslmm_path}

conda activate /home/tbellagio/miniforge3/envs/gwas

gemma \
-bfile 1001g_grenet_climate \
-maf 0.05 \
-bslmm 1 \
-o {biovar}

'''

    with open(file, 'w') as o:
        o.write(text)
    shfiles.append(file)

In [68]:
shfiles

['/carnegie/nobackup/scratch/tbellagio/gea_grene-net/idea_fromind_to_pop/allele_assoc_runs/bslmm/bio1/bslmm_bio1.sh',
 '/carnegie/nobackup/scratch/tbellagio/gea_grene-net/idea_fromind_to_pop/allele_assoc_runs/bslmm/bio2/bslmm_bio2.sh',
 '/carnegie/nobackup/scratch/tbellagio/gea_grene-net/idea_fromind_to_pop/allele_assoc_runs/bslmm/bio3/bslmm_bio3.sh',
 '/carnegie/nobackup/scratch/tbellagio/gea_grene-net/idea_fromind_to_pop/allele_assoc_runs/bslmm/bio4/bslmm_bio4.sh',
 '/carnegie/nobackup/scratch/tbellagio/gea_grene-net/idea_fromind_to_pop/allele_assoc_runs/bslmm/bio5/bslmm_bio5.sh',
 '/carnegie/nobackup/scratch/tbellagio/gea_grene-net/idea_fromind_to_pop/allele_assoc_runs/bslmm/bio6/bslmm_bio6.sh',
 '/carnegie/nobackup/scratch/tbellagio/gea_grene-net/idea_fromind_to_pop/allele_assoc_runs/bslmm/bio7/bslmm_bio7.sh',
 '/carnegie/nobackup/scratch/tbellagio/gea_grene-net/idea_fromind_to_pop/allele_assoc_runs/bslmm/bio8/bslmm_bio8.sh',
 '/carnegie/nobackup/scratch/tbellagio/gea_grene-net/ide

In [64]:
subprocess.run(['sbatch', shfiles[0]])

Submitted batch job 5827


CompletedProcess(args=['sbatch', '/carnegie/nobackup/scratch/tbellagio/gea_grene-net/idea_fromind_to_pop/allele_assoc_runs/bslmm/bio1/bslmm_bio1.sh'], returncode=0)

In [69]:
for file in shfiles:
    subprocess.run(['sbatch', file])

Submitted batch job 5848
Submitted batch job 5849
Submitted batch job 5850
Submitted batch job 5851
Submitted batch job 5852
Submitted batch job 5853
Submitted batch job 5854
Submitted batch job 5855
Submitted batch job 5856
Submitted batch job 5857
Submitted batch job 5858
Submitted batch job 5859
Submitted batch job 5860
Submitted batch job 5861
Submitted batch job 5862
Submitted batch job 5863
Submitted batch job 5864
Submitted batch job 5865
Submitted batch job 5866
