In [None]:
import intake
import allel
import numpy as np
import matplotlib.pyplot as plt
from dask_kubernetes import KubeCluster
from dask.distributed import Client
import pandas as pd
%matplotlib inline

import dask.array as da

import seaborn as sns
sns.set_context('paper')

import scipy.stats as ss

import functools
import zarr

import fsspec
import random

import yaml

import dask

import gcsfs
from pathlib import Path

## Setup

In [None]:
cluster = KubeCluster(n_workers=20)
client = Client(cluster)

client

In [None]:
# Use `cache_timeout=0` to prevent object list cache, to avoid recreating map for Zarr consolidated metadata
gcs_orig = gcsfs.GCSFileSystem(project='malariagen-jupyterhub', token='cache', cache_timeout=0)
gcs =  gcsfs.GCSFileSystem(project='malariagen-jupyterhub', token=gcs_orig.session.credentials, cache_timeout=0)

In [None]:
cat = intake.open_catalog('https://malariagen.github.io/intake/gcs.yml')
cat

In [None]:
ag3 = cat.ag3
ag3

In [None]:
df_sets = ag3.sample_sets.read()
df_sets

In [None]:
genome_path_gcs = 'gs://vo_agam_release/reference/genome/agamp4/Anopheles-gambiae-PEST_CHROMOSOMES_AgamP4.zarr'
genome_store = fsspec.get_mapper(genome_path_gcs)
genome = zarr.open_consolidated(genome_store)
genome

In [None]:
df_all_species = pd.DataFrame()
for s in df_sets['sample_set'][:-1]:
    print(s)
    df_species = cat.ag3.species_calls_20200422_aim(sample_set=s).read()
    df_species['species'] = df_species.species_gambcolu_arabiensis.copy()
    loc_gamb_colu = df_species.species_gambcolu_arabiensis == 'gamb_colu'
    df_species.loc[loc_gamb_colu, 'species'] = df_species.species_gambiae_coluzzii[loc_gamb_colu]
    df_all_species = df_all_species.append(df_species)
    print(df_species.groupby('species').size())

In [None]:
df_all_species

In [None]:
chromosomes = ['3R','3L']

In [None]:
# Determine the output cloud directory path
output_cloud_bucket_path = Path("jon_brenas_bucket")
output_cloud_zarr_path = output_cloud_bucket_path / 'Watt-Theta.zarr'
output_zarr_map = gcs.get_mapper(output_cloud_zarr_path.as_posix())
results = zarr.group(output_zarr_map) # Sometimes errors with `overwrite=True`, sometimes errors without, when dir not exist

In [None]:
# Determine the input cloud directory path for non_n-ness
nonn_bucket_path = Path("vo_agam_production")
nonn_cloud_zarr_path = nonn_bucket_path / 'resources' / 'observatory' / 'ag.allsites.nonN.zarr'
nonn_zarr_map = gcs.get_mapper(nonn_cloud_zarr_path.as_posix())
nonn = zarr.group(nonn_zarr_map) # Sometimes errors with `overwrite=True`, sometimes errors without, when dir not exist

In [None]:
# Determine the input cloud directory path for accessibility
accessibility_bucket_path = Path("vo_agam_production")
accessibility_cloud_zarr_path = accessibility_bucket_path / 'resources' / 'observatory' / 'non_n_accessibility' / 'non_n_accessibility.zarr'
accessibility_zarr_map = gcs.get_mapper(accessibility_cloud_zarr_path.as_posix())
accessibility = zarr.group(accessibility_zarr_map) # Sometimes errors with `overwrite=True`, sometimes errors without, when dir not exist

In [None]:
# Determine the input cloud directory path for species masks
gc_pass_bucket_path = Path("vo_agam_release")
gc_pass_cloud_zarr_path = gc_pass_bucket_path / 'v3' / 'site_filters' / 'dt_20200416' / 'gamb_colu'
gc_pass_zarr_map = gcs.get_mapper(gc_pass_cloud_zarr_path.as_posix())
gc_pass = zarr.group(gc_pass_zarr_map) # Sometimes errors with `overwrite=True`, sometimes errors without, when dir not exist

In [None]:
# Determine the in putcloud directory path for annotations
annotations_bucket_path = Path("vo_agam_release")
annotations_cloud_zarr_path = annotations_bucket_path / 'reference' / 'genome' / 'agamp4' / 'Anopheles-gambiae-PEST_SEQANNOTATION_AgamP4.12.zarr'
annotations_zarr_map = gcs.get_mapper(annotations_cloud_zarr_path.as_posix())
annotations = zarr.group(annotations_zarr_map) # Sometimes errors with `overwrite=True`, sometimes errors without, when dir not exist

In [None]:
codon_position_lg = annotations['codon_position']
codon_degeneracy_lg = annotations['codon_degeneracy']
seq_cls_lg = annotations['seq_cls']
seq_flen_lg = annotations['seq_flen']
seq_relpos_start_lg = annotations['seq_relpos_start']
seq_relpos_stop_lg = annotations['seq_relpos_stop']

In [None]:
pos = {}
for ch in chromosomes:
    pos[ch] = nonn[ch]['variants']['POS']

In [None]:
codon_position_da = {}
codon_degeneracy_da = {}
seq_cls_da = {}
seq_flen_da = {}
seq_relpos_start_da = {}
seq_relpos_stop_da = {}
for ch in chromosomes:
    codon_position_da[ch] = da.from_array(codon_position_lg[ch], chunks='auto')
    codon_degeneracy_da[ch] = da.from_array(codon_degeneracy_lg[ch], chunks='auto')
    seq_cls_da[ch] = da.from_array(seq_cls_lg[ch], chunks='auto')
    seq_flen_da[ch] = da.from_array(seq_flen_lg[ch], chunks='auto')
    seq_relpos_start_da[ch] = da.from_array(seq_relpos_start_lg[ch], chunks='auto')
    seq_relpos_stop_da[ch] = da.from_array(seq_relpos_stop_lg[ch], chunks='auto')

In [None]:
codon_position = {}
codon_degeneracy = {}
seq_cls = {}
seq_flen = {}
seq_relpos_start = {}
seq_relpos_stop = {}
for ch in chromosomes:
    codon_position[ch] = codon_position_da[ch][[p-1 for p in pos[ch]]]
    codon_degeneracy[ch] = codon_degeneracy_da[ch][[p-1 for p in pos[ch]]]
    seq_cls[ch] = seq_cls_da[ch][[p-1 for p in pos[ch]]]
    seq_flen[ch] = seq_flen_da[ch][[p-1 for p in pos[ch]]]
    seq_relpos_start[ch] = seq_relpos_start_da[ch][[p-1 for p in pos[ch]]]
    seq_relpos_stop[ch] = seq_relpos_stop_da[ch][[p-1 for p in pos[ch]]]

In [None]:
CLS_UNKNOWN = 0
CLS_UPSTREAM = 1
CLS_DOWNSTREAM = 2
CLS_5UTR = 3
CLS_3UTR = 4
CLS_CDS_FIRST = 5
CLS_CDS_MID = 6
CLS_CDS_LAST = 7
CLS_INTRON_FIRST = 8
CLS_INTRON_MID = 9
CLS_INTRON_LAST = 10

feature_cls_names = [
    'Unknown',
    'Upstream',
    'Downstream',
    "5' UTR",
    "3' UTR",
    "CDS (first)",
    "CDS (mid)",
    "CDS (last)",
    "Intron (first)",
    "Intron (mid)",
    "Intron (last)",
]

In [None]:
DEG_UNKNOWN = 0
DEG_0 = 1
DEG_2_SIMPLE = 2
DEG_2_COMPLEX = 3
DEG_4 = 4

degeneracy_names = [
    'unknown', '0-fold', '2-fold simple', '2-fold complex', 'f-fold'
]

In [None]:
@functools.lru_cache(maxsize=None)
def compute_selections(chrom):
    _is_accessible = accessibility[chrom]['is_accessible'][:]
    _codon_pos = codon_position[chrom][:]
    _codon_deg = codon_degeneracy[chrom][:]
    _seq_cls = seq_cls[chrom][:]
    _seq_flen = seq_flen[chrom][:]
    _seq_relpos_start = seq_relpos_start[chrom][:]
    _seq_relpos_stop = seq_relpos_stop[chrom][:]
    selections = [
        ('First codon position, non-degenerate',
         (_is_accessible &
          (_codon_pos == 0) & 
          (_codon_deg == DEG_0)
         )
        ),
        ('Second codon position, non-degenerate',
         (_is_accessible &
          (_codon_pos == 1) & 
          (_codon_deg == DEG_0)
         )
        ),
        ('Third codon position, 2-fold simple degenerate',
         (_is_accessible &
          (_codon_pos == 2) & 
          (_codon_deg == DEG_2_SIMPLE)
         )
        ),  
        ('Third codon position, 4-fold degenerate',
         (_is_accessible &
          (_codon_pos == 2) & 
          (_codon_deg == DEG_4)
         )
        ),
        ("Intron (\\textless 100bp)",
         (_is_accessible & 
          ((_seq_cls == CLS_INTRON_FIRST) |
           (_seq_cls == CLS_INTRON_MID) |
           (_seq_cls == CLS_INTRON_LAST)) &
          (_seq_flen < 100) & 
          (_seq_relpos_start > 6) & 
          (_seq_relpos_stop > 3)
         )
        ),
        ("Intron (100-200bp)",
         (_is_accessible & 
          ((_seq_cls == CLS_INTRON_FIRST) |
           (_seq_cls == CLS_INTRON_MID) |
           (_seq_cls == CLS_INTRON_LAST)) &
          (_seq_flen >= 100) &
          (_seq_flen < 200) & 
          (_seq_relpos_start > 6) & 
          (_seq_relpos_stop > 3)
         )
        ),
        ("Intron (\\textgreater 200bp)",
         (_is_accessible & 
          ((_seq_cls == CLS_INTRON_FIRST) |
           (_seq_cls == CLS_INTRON_MID) |
           (_seq_cls == CLS_INTRON_LAST)) &
          (_seq_flen > 200) & 
          (_seq_relpos_start > 6) & 
          (_seq_relpos_stop > 3)
         )
        ),
        ("Intron 5' splice site (2bp)",
         (_is_accessible & 
          ((_seq_cls == CLS_INTRON_FIRST) |
           (_seq_cls == CLS_INTRON_MID) |
           (_seq_cls == CLS_INTRON_LAST)) &
          (_seq_relpos_start < 2)
         )
        ),
        ("Intron 3' splice site (2bp)",
         (_is_accessible & 
          ((_seq_cls == CLS_INTRON_FIRST) |
           (_seq_cls == CLS_INTRON_MID) |
           (_seq_cls == CLS_INTRON_LAST)) &
          (_seq_relpos_stop < 2)
         )
        ),
        ("5' UTR",
         (_is_accessible & (_seq_cls == CLS_5UTR))
        ),
        ("3' UTR",
         (_is_accessible & (_seq_cls == CLS_3UTR))
        ),
        ('Upstream (\\textless 1kb from gene)',
         (_is_accessible & 
          (_seq_cls == CLS_UPSTREAM) & 
          (_seq_relpos_stop < 1000)
         )
        ),
        ('Downstream (\\textless 1kb from gene)',
         (_is_accessible & 
          (_seq_cls == CLS_DOWNSTREAM) & 
          (_seq_relpos_start < 1000)
         )
        ),
        ('Intergenic (\\textgreater 10kb from gene)',
         (_is_accessible & 
          (((_seq_cls == CLS_UPSTREAM) & (_seq_relpos_stop > 10000)) | 
           ((_seq_cls == CLS_DOWNSTREAM) & (_seq_relpos_start > 10000)))
         )
        ),
    ]
    #selections = [(k, zarr.array(v)) for k, v in selections]
    sel_dict = {k: v for k, v in selections}
    return sel_dict #selections

In [None]:
sel = {'3R': compute_selections('3R')['Third codon position, 4-fold degenerate'], '3L': compute_selections('3L')['Third codon position, 4-fold degenerate']}

In [None]:
seq_id = '3R'
gt = allel.GenotypeDaskArray(da.concatenate([cat.ag3.snp_genotypes(sample_set=ss).to_zarr()[seq_id]['calldata']['GT'] for ss in list(df_sets['sample_set'][:-1])], axis=1))

## Functions

In [None]:
#@functools.lru_cache(maxsize=None)
def load_mask(seq_id, datasets, max_miss=.1):
#     print('load_mask', seq_id)
    # site filters
    filter_pass = cat.ag3.site_filters_dt_20200416_gamb_colu.to_zarr()[seq_id]['variants']['filter_pass'][:]
    
    # missingness
    gt = allel.GenotypeDaskArray(da.concatenate([cat.ag3.snp_genotypes(sample_set=ds).to_zarr()[seq_id]['calldata']['GT'] for ds in datasets], axis=1))
    n_miss = gt.count_missing(axis=1).compute()
    n_samples = gt.shape[1]
    low_miss = (n_miss / n_samples) < max_miss
    
    # combine
    mask = filter_pass & low_miss & sel[seq_id]
    
    return mask

#@functools.lru_cache(maxsize=None)
def load_mask_arab(seq_id, datasets, max_miss=.1):
#     print('load_mask', seq_id)
    # site filters
    filter_pass = cat.ag3.site_filters_dt_20200416_arab.to_zarr()[seq_id]['variants']['filter_pass'][:]
    
    # missingness
    gt = allel.GenotypeDaskArray(da.concatenate([cat.ag3.snp_genotypes(sample_set=ds).to_zarr()[seq_id]['calldata']['GT'] for ds in datasets], axis=1))
    n_miss = gt.count_missing(axis=1).compute()
    n_samples = gt.shape[1]
    low_miss = (n_miss / n_samples) < max_miss
    
    # combine
    mask = filter_pass & low_miss & sel[seq_id]
    
    return mask

@functools.lru_cache(maxsize=None)
def load_pos(seq_id):
#     print('load_pos', seq_id)
    pos = allel.SortedIndex(cat.ag3.snp_sites.to_zarr()[seq_id]['variants']['POS'])
    return pos

#@functools.lru_cache(maxsize=None)
def get_region(region, datasets, max_miss=.1):
    
    # unpack region
    seq_id, region_start, region_stop = region
    
    # setup variables
    gt = allel.GenotypeDaskArray(da.concatenate([cat.ag3.snp_genotypes(sample_set=ds).to_zarr()[seq_id]['calldata']['GT'] for ds in datasets], axis=1))
    pos = load_pos(seq_id)
    
    # apply mask
    mask = load_mask(seq_id, datasets, max_miss)
    pos_mask =  allel.SortedIndex(pos[mask])
    gt_mask = gt[mask]
    gt_mask.compute_chunk_sizes()
    
    # restrict to genome region
    loc_region = pos_mask.locate_range(region_start, region_stop)
    pos_region = pos_mask[loc_region]
    mask_region = mask[loc_region]
    gt_region = gt_mask[loc_region]
        
    return mask_region, pos_region, gt_region

#@functools.lru_cache(maxsize=None)
def get_region_arab(region, datasets, max_miss=.1):
    
    # unpack region
    seq_id, region_start, region_stop = region
    
    # setup variables
    gt = allel.GenotypeDaskArray(da.concatenate([cat.ag3.snp_genotypes(sample_set=ds).to_zarr()[seq_id]['calldata']['GT'] for ds in datasets], axis=1))
    pos = load_pos(seq_id)
    
    # apply mask
    mask = load_mask_arab(seq_id, datasets, max_miss)
    pos_mask = allel.SortedIndex(pos[mask])
    gt_mask = gt[mask]
    gt_mask.compute_chunk_sizes()
    
    # restrict to genome region
    loc_region = pos_mask.locate_range(region_start, region_stop)
    pos_region = pos_mask[loc_region]
    mask_region = mask[loc_region]
    gt_region = gt_mask[loc_region]
        
    return mask_region, pos_region, gt_region

In [None]:
#@functools.lru_cache(maxsize=None)
def count_alleles_rdm(gt_region, pop_ids, downsample_size = 0):
    
    if downsample_size > 0:
        pop_ids = np.sort(random.sample(list(pop_ids), downsample_size))
    
    gt_pop = gt_region.take(pop_ids, axis=1)
    
    # perform allele count
    ac = gt_pop.count_alleles(max_allele=3).compute()
    
    return ac

In [None]:
#@functools.lru_cache(maxsize=None)
def get_masks_and_windows(datasets):
    mask_3L, pos_masked_3L, gt_3L = get_region(region_3L_free, datasets)
    mask_3R, pos_masked_3R, gt_3R = get_region(region_3R_free, datasets)

    all_mask_3L = load_mask('3L', datasets, .1)
    all_mask_3R = load_mask('3R', datasets, .1)

    is_accessible_3L = np.zeros(seq_len_3L, dtype=bool)
    is_accessible_3L[pos_3L - 1] = all_mask_3L
    is_accessible_3R = np.zeros(seq_len_3R, dtype=bool)
    is_accessible_3R[pos_3R - 1] = all_mask_3R

    windows_euchromatin_3L = allel.equally_accessible_windows(
        is_accessible_3L, 
        size=50_000, 
        start=1_000_000,
        stop=37_000_000
    )
    windows_euchromatin_3R = allel.equally_accessible_windows(
        is_accessible_3R, 
        size=50_000, 
        start=15_000_000,
        stop=41_000_000
    )
    return {
        'mask': {
            '3L': mask_3L,
            '3R': mask_3R
        },
        'pos_masked': {
            '3L': pos_masked_3L,
            '3R': pos_masked_3R
        },
        'gt': {
            '3L': gt_3L,
            '3R': gt_3R
        },
        'all_mask': {
            '3L': all_mask_3L,
            '3R': all_mask_3R
        },
        'is_accessible': {
            '3L': is_accessible_3L,
            '3R': is_accessible_3R
        },
        'windows_euchromatin': {
            '3L': windows_euchromatin_3L,
            '3R': windows_euchromatin_3R
        }
    }

In [None]:
#@functools.lru_cache(maxsize=None)
def get_masks_and_windows_arab(datasets):
    mask_3L, pos_masked_3L, gt_3L = get_region_arab(region_3L_free, datasets)
    
    all_mask_3L = load_mask_arab('3L', datasets, .1)
    
    is_accessible_3L = np.zeros(seq_len_3L, dtype=bool)
    is_accessible_3L[pos_3L - 1] = all_mask_3L
    
    windows_euchromatin_3L = allel.equally_accessible_windows(
        is_accessible_3L, 
        size=50_000, 
        start=1_000_000,
        stop=37_000_000
    )
    return {
        'mask': {
            '3L': mask_3L
        },
        'pos_masked': {
            '3L': pos_masked_3L
        },
        'gt': {
            '3L': gt_3L
        },
        'all_mask': {
            '3L': all_mask_3L
        },
        'is_accessible': {
            '3L': is_accessible_3L
        },
        'windows_euchromatin': {
            '3L': windows_euchromatin_3L
        }
    }

In [None]:
def get_idxs_query(dataset, species = None, location = None, year = None):
    samples = cat.ag3.samples(sample_set=dataset).read()
    
    calls = cat.ag3.species_calls_20200422_aim(sample_set=dataset).read()
    # call species - just use AIMs, should be fine
    try:
        df_species.species_gambcolu_arabiensis.replace({'intermediate': 'intermediate_gambcolu_arabiensis'}, inplace=True)
        df_species.species_gambiae_coluzzii.replace({'intermediate': 'intermediate_gambiae_coluzzii'}, inplace=True)
    except TypeError:
        pass
    calls['species'] = calls.species_gambcolu_arabiensis.copy()
    loc_gc = calls.species_gambcolu_arabiensis == 'gamb_colu'
    calls.loc[loc_gc, 'species'] = calls.species_gambiae_coluzzii[loc_gc]
    
    meta = samples.merge(calls, on='sample_id', how='left', sort=False)
    
    query = dataset
    loc =  (
            (meta.species != '')
    )
    
    if species:
        loc = loc & ( (meta.species == species) )
        query = query + "_" + species
        
    if location:
        loc  = loc & ( (meta.location == location) )
        query = query + "_" + location

 
    if year:
        loc = loc & ( (meta.year == int(year)) )
        query = query + "_" + year
    
    idxs = meta[loc].index.values
    
    return idxs, query

In [None]:
def compute_wat(query, sample_size, it, m_w, idxs):
    result_path = f'{query}_{sample_size}_{it}'
    
    if result_path in results:
        # load previously computed array into memory
        wat = results[result_path][:]
        return wat
    
    else:
        # run the computation to compute wat - N.B., wat should be a numpy array
        ac_3L = count_alleles_rdm(m_w['gt']['3L'], idxs, downsample_size = sample_size)
        ac_3R = count_alleles_rdm(m_w['gt']['3R'], idxs, downsample_size = sample_size)
        wat_euchromatin_3L, _, _, _ = allel.windowed_watterson_theta(
            pos=m_w['pos_masked']['3L'], 
            ac=ac_3L, 
            windows=m_w['windows_euchromatin']['3L'], 
            is_accessible=m_w['is_accessible']['3L']
            )
        wat_euchromatin_3R, _, _, _ = allel.windowed_watterson_theta(
            pos=m_w['pos_masked']['3R'], 
            ac=ac_3R, 
            windows=m_w['windows_euchromatin']['3R'], 
            is_accessible=m_w['is_accessible']['3R']
            )
        wat = np.concatenate((wat_euchromatin_3L, wat_euchromatin_3R))
        
        # store results, assuming wat is a numpy array
        results.create_dataset(result_path, data=wat)
        
        return wat

In [None]:
def compute_wat_arab(query, sample_size, it, m_w, idxs):
    result_path = f'{query}_mask_arab_{sample_size}_{it}'
    
    if result_path in results:
        # load previously computed array into memory
        wat = results[result_path][:]
        return wat
    
    else:
        ac_3L = count_alleles_rdm(m_w['gt']['3L'], idxs, downsample_size = sample_size)
        wat_euchromatin_3L, _, _, _ = allel.windowed_watterson_theta(
            pos=m_w['pos_masked']['3L'], 
            ac=ac_3L, 
            windows=m_w['windows_euchromatin']['3L'], 
            is_accessible=m_w['is_accessible']['3L']
            )
        wat = wat_euchromatin_3L
        
        # store results, assuming wat is a numpy array
        results.create_dataset(result_path, data=wat)
        
        return wat

In [None]:
def plot_wat_pop(population, start_value = 10, inc = 5, n_it = 5, end_value = None):
    dataset = sources_df.loc[population]['sample sets']
    country, pop_n, species, year = population.split('_')
    if str(type(sources_df.loc[population]['sample sets'])) == "<class 'pandas.core.series.Series'>":
        l_sources = list(sources_df.loc[population]['sample sets'])
    else:
        l_sources = [sources_df.loc[population]['sample sets']]
    if species == 'arabiensis':
        m_w = get_masks_and_windows_arab(l_sources)
    else:
        m_w = get_masks_and_windows(l_sources)

    samples = pd.concat([cat.ag3.samples(sample_set=dataset).read() for dataset in l_sources])
   
    idxs = list(samples[samples['sample_id'].isin(pops[population])].index)
    wat_list = []
    dps_list = []
    if not end_value:
        end_value = len(idxs)

    for sample_size in range(start_value, end_value, inc):
        for iteration in range(n_it):
            if species == 'arabiensis':
                wat = compute_wat_arab(population, sample_size, iteration, m_w, idxs)
            else:
                wat = compute_wat(population, sample_size, iteration, m_w, idxs)
            wat_list.append(wat)
            dps_list.append(sample_size)
    return wat_list, dps_list

## Regions

In [None]:
# reference genome
genome = zarr.open_consolidated(
    store=fsspec.get_mapper(
        'gs://vo_agam_release/reference/genome/agamp4/Anopheles-gambiae-PEST_CHROMOSOMES_AgamP4.zarr'
    )
)
list(genome)

In [None]:
seq_len_3R = len(genome['3R'])
seq_len_3L = len(genome['3L'])

In [None]:
region_3L_free = '3L', 15_000_000, 41_000_000
region_3R_free = '3R', 1_000_000, 37_000_000

In [None]:
pos_3R = load_pos('3R')
pos_3L = load_pos('3L')

## Populations

In [None]:
#Population definitions

with open(r'../content/population_definitions.yml') as file:
    # The FullLoader parameter handles the conversion from YAML
    # scalar values to Python the dictionary format
    pops = yaml.load(file, Loader=yaml.FullLoader)

In [None]:
sources_df = pd.read_csv('../content/population_source.csv', names=['population','sample sets'], index_col=0)
sources_df

## Dataframes

In [None]:
#df_gambiae = pd.read_csv('Watt-Theta-gambiae.csv')
#df_coluzzii = pd.read_csv('Watt-Theta-coluzzii.csv')
df_all = pd.read_csv('Watt-Theta.csv')

# Waterson's theta computation
## Gamb-colu filters

In [None]:
population = 'ANG_1_coluzzii_2009'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population)
df_all = pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)})
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_all[df_all.dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'BUF_1_coluzzii_2012'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_all[df_all.dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'BUF_1_coluzzii_2014'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_all[df_all.dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'BUF_1_gambiae_2012'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_all[df_all.dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'BUF_1_gambiae_2014'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_all[df_all.dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'CAR_1_coluzzii_1994'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population, start_value = 5, inc = 1)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_all[df_all.dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'CAR_1_gambiae_1994'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_all[df_all.dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'CIV_1_coluzzii_2012'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_all[df_all.species == 'coluzzii'][df_all[df_all.species == 'coluzzii'].dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'CMN_1_gambiae_2005'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population, start_value=7, inc=1)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_all[df_all.species == 'gambiae'][df_all[df_all.species == 'gambiae'].dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'CMN_2_coluzzii_2013'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population, start_value=7, inc=1)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_all[df_all.species == 'coluzzii'][df_all[df_all.species == 'coluzzii'].dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'CMN_3_gambiae_2005'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population, start_value=7, inc=1)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_all[df_all.species == 'gambiae'][df_all[df_all.species == 'gambiae'].dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'CMN_4_gambiae_2013'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population, start_value=7, inc=1)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_all[df_all.species == 'gambiae'][df_all[df_all.species == 'gambiae'].dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'CMN_5_gambiae_2009'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_all[df_all.species == 'gambiae'][df_all[df_all.species == 'gambiae'].dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'CMN_6_gambiae_2009'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_all[df_all.species == 'gambiae'][df_all[df_all.species == 'gambiae'].dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'CMN_7_gambiae_2009'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_all[df_all.country == 'CMN'][df_all[df_all.country == 'CMN'].dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'DRC_1_gambiae_2015'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_all[df_all.country != 'CMN'][df_all[df_all.country != 'CMN'].dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'GAB_1_gambiae_2000'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_all[df_all.country != 'CMN'][df_all[df_all.country != 'CMN'].dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'GAM_1_coluzzii_2012'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population, inc=2)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_all[df_all.species == 'coluzzii'][df_all[df_all.species == 'coluzzii'].dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'GAM_1_intermediate_2011'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_all[df_all.country == 'GAM'][df_all[df_all.country == 'GAM'].dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'GAM_2_coluzzii_2012'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_all[df_all.country == 'GAM'][df_all[df_all.country == 'GAM'].dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'GBU_1_intermediate_2010'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_all[df_all.species == 'intermediate'][df_all[df_all.species == 'intermediate'].dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'GHA_1_coluzzii_2012'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population, start_value=7, inc=1)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_all[df_all.species == 'coluzzii'][df_all[df_all.species == 'coluzzii'].dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'GHA_1_gambiae_2012'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_all[df_all.species == 'gambiae'][df_all[df_all.species == 'gambiae'].dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'GHA_2_coluzzii_2012'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_all[df_all.species == 'coluzzii'][df_all[df_all.species == 'coluzzii'].dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'GHA_3_coluzzii_2012'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_all[df_all.species == 'coluzzii'][df_all[df_all.species == 'coluzzii'].dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'GUI_1_coluzzii_2012'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population, start_value=1, inc=1)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_all[df_all.species == 'coluzzii'][df_all[df_all.species == 'coluzzii'].dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'GUI_1_gambiae_2012'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_all[df_all.species == 'gambiae'][df_all[df_all.species == 'gambiae'].dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'GUI_2_gambiae_2012'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_all[df_all.country == 'GUI'][df_all[df_all.country == 'GUI'].dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'KEN_1_gambiae_2000'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population, start_value=7, inc =2)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
df_nCMN = df_all[df_all.country != 'CMN']
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_nCMN[df_nCMN.species == 'gambiae'][df_nCMN[df_nCMN.species == 'gambiae'].dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'KEN_1_intermediate_2012'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_all[df_all.species == 'intermediate'][df_all[df_all.species == 'intermediate'].dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'MAL_1_coluzzii_2004'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population, inc=2)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_all[df_all.species == 'coluzzii'][df_all[df_all.species == 'coluzzii'].dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
df_small = df_all[df_all.country.isin(['MAL','BUF','CIV', 'GUI', 'GAB', 'KEN', 'ANG'])]
df_small_coluzzii = df_small[df_small.species == 'coluzzii']
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_small_coluzzii[df_small_coluzzii.dps < 70])

In [None]:
population = 'MAL_2_gambiae_2004'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
df_small = df_all[df_all.country.isin(['MAL','BUF','CIV', 'GUI', 'GAB', 'KEN', 'ANG'])]
df_small_gambiae = df_small[df_small.species == 'gambiae']
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_small_gambiae[df_small_gambiae.dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'MAL_3_coluzzii_2012'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population, inc=2)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
df_small = df_all[df_all.country.isin(['MAL','BUF','CIV', 'GUI', 'GAB', 'KEN', 'ANG'])]
df_small_coluzzii = df_small[df_small.species == 'coluzzii']
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_small_coluzzii[df_small_coluzzii.dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'MAL_4_gambiae_2012'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
df_small = df_all[df_all.country.isin(['MAL','BUF','CIV', 'GUI', 'GAB', 'KEN', 'ANG'])]
df_small_gambiae = df_small[df_small.species == 'gambiae']
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_small_gambiae[df_small_gambiae.dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'MAL_5_coluzzii_2004'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population, start_value=7, inc=1)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
df_small = df_all[df_all.country.isin(['MAL','BUF','CIV', 'GUI', 'GAB', 'KEN', 'ANG'])]
df_small_coluzzii = df_small[df_small.species == 'coluzzii']
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_small_coluzzii[df_small_coluzzii.dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'MAL_5_coluzzii_2014'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
df_small = df_all[df_all.country.isin(['MAL','BUF','CIV', 'GUI', 'GAB', 'KEN', 'ANG'])]
df_small_coluzzii = df_small[df_small.species == 'coluzzii']
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_small_coluzzii[df_small_coluzzii.dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'MAL_5_gambiae_2014'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
df_small = df_all[df_all.country.isin(['MAL','BUF','CIV', 'GUI', 'GAB', 'KEN', 'ANG'])]
df_small_gambiae = df_small[df_small.species == 'gambiae']
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_small_gambiae[df_small_gambiae.dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'MAY_1_gambiae_2011'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
df_small = df_all[df_all.country.isin(['MAY','BUF','CIV', 'GUI', 'GAB', 'KEN', 'ANG'])]
df_small_gambiae = df_small[df_small.species == 'gambiae']
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_small_gambiae[df_small_gambiae.dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'MLW_1_arabiensis_2015'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
df_small = df_all[df_all.country.isin(['MAY', 'BUF', 'CIV', 'GAB', 'KEN', 'ANG', 'MLW'])]
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_small[df_small.dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'MOZ_1_gambiae_2004'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
df_small = df_all[df_all.country.isin(['MAY', 'BUF', 'CIV', 'MOZ', 'GAB', 'KEN', 'ANG'])]
df_small_gambiae = df_small[df_small.species == 'gambiae']
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_small_gambiae[df_small_gambiae.dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'TZA_1_arabiensis_2015'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
df_small = df_all[df_all.country.isin(['MLW', 'TZA'])]
df_small_arab = df_small[df_small.species == 'arabiensis']
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_small_arab[df_small_arab.dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'TZA_1_gambiae_2015'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
df_small = df_all[df_all.country.isin(['MAY', 'BUF', 'CIV', 'MOZ', 'GAB', 'KEN', 'ANG', 'TZA'])]
df_small_gambiae = df_small[df_small.species == 'gambiae']
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_small_gambiae[df_small_gambiae.dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'TZA_2_arabiensis_2012'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
df_small = df_all[df_all.country.isin(['MLW', 'TZA'])]
df_small_arab = df_small[df_small.species == 'arabiensis']
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_small_arab[df_small_arab.dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'TZA_3_arabiensis_2012'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
df_small = df_all[df_all.country.isin(['MLW', 'TZA'])]
df_small_arab = df_small[df_small.species == 'arabiensis']
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_small_arab[df_small_arab.dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'TZA_4_gambiae_2013'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
df_small = df_all[df_all.country.isin(['MAY', 'BUF', 'CIV', 'MOZ', 'GAB', 'KEN', 'ANG', 'TZA'])]
df_small_gambiae = df_small[df_small.species == 'gambiae']
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_small_gambiae[df_small_gambiae.dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'UGA_1_gambiae_2012'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
df_small = df_all[df_all.country.isin(['MAY', 'BUF', 'CIV', 'MOZ', 'GAB', 'KEN', 'ANG', 'TZA', 'UGA'])]
df_small_gambiae = df_small[df_small.species == 'gambiae']
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_small_gambiae[df_small_gambiae.dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'UGA_2_arabiensis_2012'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
df_small = df_all[df_all.country.isin(['MLW', 'TZA', 'UGA'])]
df_small_arab = df_small[df_small.species == 'arabiensis']
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_small_arab[df_small_arab.dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

In [None]:
population = 'UGA_2_gambiae_2012'
country, pop_n, species, year = population.split('_')
wat_list, dps_list = plot_wat_pop(population)
df_all = df_all.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
df_small = df_all[df_all.country.isin(['MAY', 'BUF', 'CIV', 'MOZ', 'GAB', 'KEN', 'ANG', 'TZA', 'UGA'])]
df_small_gambiae = df_small[df_small.species == 'gambiae']
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_small_gambiae[df_small_gambiae.dps < 70])
df_all.to_csv('Watt-Theta.csv',index=False)

# Arabiensis filters

In [None]:
population = 'UGA_2_arabiensis_2012'
country, pop_n, species, year = population.split('_')
wat_list, dps_list, m_w = plot_wat_pop(population)
df_arab = pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)})
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_arab)

In [None]:
population = 'TZA_3_arabiensis_2012'
country, pop_n, species, year = population.split('_')
wat_list, dps_list, m_w = plot_wat_pop(population)
df_arab = df_arab.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_arab)

In [None]:
population = 'TZA_2_arabiensis_2012'
country, pop_n, species, year = population.split('_')
wat_list, dps_list, m_w = plot_wat_pop(population)
df_arab = df_arab.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_arab)

In [None]:
population = 'MLW_1_arabiensis_2015'
country, pop_n, species, year = population.split('_')
wat_list, dps_list, m_w = plot_wat_pop(population)
df_arab = df_arab.append(pd.DataFrame(data={'dps': dps_list,
                            'mean': [np.mean(wat) for wat in wat_list],
                            'population': [population] * len(dps_list),
                            'country': [country] * len(dps_list),
                            'pop_n': [pop_n] * len(dps_list),
                            'species': [species] * len(dps_list),
                            'year': [year] * len(dps_list)}))
sns.scatterplot(x = 'dps', y = 'mean', hue = 'population', data=df_arab)

In [None]:
df_arab.to_csv('Watt-Theta-arab.csv',index=False)