# FEEMS - Ag

Attempt to adapt [FEEMS example notebook](https://nbviewer.org/github/NovembreLab/feems/blob/main/docsrc/notebooks/getting-started.ipynb) to work with data from Ag1000G.

In [None]:
# install feems - N.B., do this first, because it causes a numpy downgrade
!pip install -q git+https://github.com/NovembreLab/feems

In [None]:
# install cartopy (special faff for colab)
!apt-get -q install libgdal-dev libproj-dev libgeos-dev proj-data proj-bin
!pip uninstall -q -y shapely  # make sure we have a fresh shapely install
!pip install -q shapely==1.7.1 --no-binary shapely
!pip install -q cartopy==0.18.0

In [None]:
# install scikit-sparse
!apt-get -q install libsuitesparse-dev
!pip install -q scikit-sparse

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
# check cartopy is working
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
fig = plt.figure(figsize=(10, 5))
ax = fig.add_subplot(1, 1, 1, projection=ccrs.Robinson())

# make the map global rather than have it zoom in to
# the extents of any plotted data
ax.set_global()

ax.stock_img()
ax.coastlines()

ax.plot(-0.08, 51.53, 'o', transform=ccrs.PlateCarree())
ax.plot([-0.08, 132], [51.53, 43.17], transform=ccrs.PlateCarree())
ax.plot([-0.08, 132], [51.53, 43.17], transform=ccrs.Geodetic());

In [None]:
# base
import numpy as np
import pkg_resources
from sklearn.impute import SimpleImputer
from pandas_plink import read_plink

# viz
import matplotlib.pyplot as plt
import cartopy.crs as ccrs

# feems
from feems.utils import prepare_graph_inputs
from feems import SpatialGraph, Viz

**Make sure numpy version is 1.18.5...**

In [None]:
np.__version__

In [None]:
!pip list | grep numpy
!pip list | grep scipy

In [None]:
data_path = pkg_resources.resource_filename("feems", "data/")

In [None]:
# setup graph
# coord = np.loadtxt("{}/wolvesadmix.coord".format(data_path))  # sample coordinates
# outer = np.loadtxt("{}/wolvesadmix.outer".format(data_path))  # outer coordinates
grid_path = "{}/grid_100.shp".format(data_path)  # path to discrete global grid

In [None]:
import malariagen_data

In [None]:
ag3 = malariagen_data.Ag3()
ag3

In [None]:
sample_sets = ["AG1000G-BF-A", "AG1000G-CM-B", "AG1000G-CD", "AG1000G-UG", "AG1000G-TZ", "AG1000G-MZ"]
sample_query = "taxon == 'gambiae'"

In [None]:
df_samples = ag3.sample_metadata(sample_sets=sample_sets)
df_samples

In [None]:
loc_cohort = df_samples.eval(sample_query).values
df_samples_cohort = df_samples.loc[loc_cohort]

In [None]:
ag_coord = np.array(df_samples_cohort[["longitude", "latitude"]])
ag_coord.shape

In [None]:
ag_outer = np.array([
    [-10, 20],
    [-10, 0],
    [10, 0],
    [10, -25],
    [40, -25],
    [40, 20],
])

In [None]:
# check cartopy is working
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature

fig = plt.figure(figsize=(15, 15))
projection = ccrs.EquidistantConic(central_longitude=20, central_latitude=0)
ax = fig.add_subplot(1, 1, 1, projection=projection)

ax.stock_img()
ax.coastlines(resolution='50m', linewidth=1)
ax.add_feature(cfeature.BORDERS, linewidth=1)
ax.gridlines(
    crs=ccrs.PlateCarree(), 
    xlocs=np.arange(-180, 180, 10), 
    ylocs=np.arange(-180, 180, 10), 
    draw_labels=True)
ax.set_extent([ag_outer[:, 0].min()-5, ag_outer[:, 0].max()+5, ag_outer[:, 1].min()-5, ag_outer[:, 1].max()+5], crs=ccrs.PlateCarree())
ax.plot(ag_coord[:, 0], ag_coord[:, 1], 'o', color="red", transform=ccrs.PlateCarree());
ax.plot(ag_outer[:, 0], ag_outer[:, 1], 'o-', color="green", transform=ccrs.PlateCarree());


In [None]:
region = "3L:10_000_000-11_000_000"
ds_snps = ag3.snp_calls(
    region=region,
    sample_sets=sample_sets
)
ds_snps

In [None]:
ds_snps_cohort = ds_snps.sel(samples=loc_cohort)
ds_snps_cohort

In [None]:
gt = ds_snps_cohort["call_genotype"].data
gt

In [None]:
import allel

In [None]:
%%time
ac = allel.GenotypeDaskArray(gt).count_alleles(max_allele=3).compute()

In [None]:
an = ac.sum(axis=1)

In [None]:
loc_seg = ac.is_biallelic_01() & ac.is_segregating() & (an == len(df_samples_cohort) * 2)
np.sum(loc_seg)

In [None]:
gt_seg = gt[loc_seg]
gt_seg

In [None]:
gn = allel.GenotypeDaskArray(gt_seg).to_n_alt().compute()
gn.shape

In [None]:
ag_coord.shape

In [None]:
ag_outer.shape

In [None]:
# graph input files
new_outer, edges, grid, _ = prepare_graph_inputs(
    coord=ag_coord, 
    ggrid=grid_path,
    translated=False, 
    buffer=2,
    outer=None,
)

In [None]:
%%time
sp_graph = SpatialGraph(gn.T, ag_coord, grid, edges, scale_snps=True)

In [None]:
projection = ccrs.EquidistantConic(central_longitude=20, central_latitude=0)
fig = plt.figure(dpi=300)
ax = fig.add_subplot(1, 1, 1, projection=projection)  
v = Viz(ax, sp_graph, projection=projection, edge_width=.5, 
        edge_alpha=1, edge_zorder=100, sample_pt_size=10, 
        obs_node_size=7.5, sample_pt_color="black", 
        cbar_font_size=10)
v.draw_map()
v.draw_samples()
v.draw_edges(use_weights=False)
v.draw_obs_nodes(use_ids=False)

In [None]:
%%time
sp_graph.fit(lamb=20.0)

In [None]:
fig = plt.figure(dpi=300)
ax = fig.add_subplot(1, 1, 1, projection=projection)  
v = Viz(ax, sp_graph, projection=projection, edge_width=.5, 
        edge_alpha=1, edge_zorder=100, sample_pt_size=20, 
        obs_node_size=7.5, sample_pt_color="black", 
        cbar_font_size=10)
v.draw_map()
v.draw_edges(use_weights=True)
v.draw_obs_nodes(use_ids=False) 
v.draw_edge_colorbar()

In [None]:
%%time
sp_graph.fit(lamb=2.0)

In [None]:
fig = plt.figure(dpi=300)
ax = fig.add_subplot(1, 1, 1, projection=projection)  
v = Viz(ax, sp_graph, projection=projection, edge_width=.5, 
        edge_alpha=1, edge_zorder=100, sample_pt_size=20, 
        obs_node_size=7.5, sample_pt_color="black", 
        cbar_font_size=10)
v.draw_map()
v.draw_edges(use_weights=True)
v.draw_obs_nodes(use_ids=False) 
v.draw_edge_colorbar()