# Produce the IR maps for GEM 2021

In [1]:
# Imports
import ag3
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import numpy as np
from matplotlib.lines import Line2D
import matplotlib.patches as patches
import matplotlib.patheffects as PathEffects

  import pandas.util.testing as tm


## Get the sample metadata for all of the wild sample sets

In [2]:
# Get the ag3 data
ag3_data = ag3.release_data()

In [3]:
# Get the list of all the wild sample sets
sample_sets = ag3_data.all_wild_sample_sets

In [4]:
# Eyeball the list of sample sets
sample_sets

['AG1000G-AO',
 'AG1000G-BF-A',
 'AG1000G-BF-B',
 'AG1000G-BF-C',
 'AG1000G-CD',
 'AG1000G-CF',
 'AG1000G-CI',
 'AG1000G-CM-A',
 'AG1000G-CM-B',
 'AG1000G-CM-C',
 'AG1000G-FR',
 'AG1000G-GA-A',
 'AG1000G-GH',
 'AG1000G-GM-A',
 'AG1000G-GM-B',
 'AG1000G-GM-C',
 'AG1000G-GN-A',
 'AG1000G-GN-B',
 'AG1000G-GQ',
 'AG1000G-GW',
 'AG1000G-KE',
 'AG1000G-ML-A',
 'AG1000G-ML-B',
 'AG1000G-MW',
 'AG1000G-MZ',
 'AG1000G-TZ',
 'AG1000G-UG']

In [5]:
# # Get the metadata for all samples in these sample sets
# sample_meta = ag3_data.load_sample_set_metadata(sample_sets)

In [6]:
sample_meta = pd.read_csv('../data/insecticide_resistance_genotype_table.csv')

In [7]:
# Eyeball some of the sample metadata
sample_meta

Unnamed: 0,sample_id,partner_sample_id,contributor,country,location,year,month,latitude,longitude,sex_call,...,Gste2,Gste3,Gste4,Gste5,Gste6,Gste7,Gstu4,Max_Gstue,Cyp9k1,any_cyp
0,AR0047-C,LUA047,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F,...,0,0,0,0,0,0,0,0,0,False
1,AR0049-C,LUA049,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F,...,1,1,1,1,1,1,1,1,1,True
2,AR0051-C,LUA051,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F,...,0,0,0,0,0,0,0,0,0,False
3,AR0061-C,LUA061,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F,...,0,0,0,0,0,0,0,0,0,False
4,AR0078-C,LUA078,Joao Pinto,Angola,Luanda,2009,4,-8.884,13.302,F,...,0,0,0,0,0,0,0,0,0,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2778,AC0295-C,K92,Martin Donnelly,Uganda,Kihihi,2012,11,-0.751,29.701,F,...,0,0,0,0,0,0,0,0,1,True
2779,AC0296-C,K93,Martin Donnelly,Uganda,Kihihi,2012,11,-0.751,29.701,F,...,0,0,0,0,0,0,0,0,2,True
2780,AC0297-C,K94,Martin Donnelly,Uganda,Kihihi,2012,11,-0.751,29.701,F,...,0,0,0,0,0,0,0,0,2,True
2781,AC0298-C,K95,Martin Donnelly,Uganda,Kihihi,2012,11,-0.751,29.701,F,...,0,0,0,0,0,0,0,0,1,True


In [8]:
sample_meta.columns

Index(['sample_id', 'partner_sample_id', 'contributor', 'country', 'location',
       'year', 'month', 'latitude', 'longitude', 'sex_call', 'sample_set',
       'release', 'aim_fraction_colu', 'aim_fraction_arab',
       'species_gambcolu_arabiensis', 'species_gambiae_coluzzii', 'species',
       'kdr_S', 'kdr_F', 'any_kdr', 'kdr_1570', 'ace_119s', 'gst_114t', 'sex',
       'high.var', 'Ace1', 'Cyp6aa1', 'Cyp6aa2', 'Cyp6p1', 'Cyp6p2', 'Cyp6p3',
       'Cyp6p4', 'Cyp6p5', 'Max_Cyp6aap', 'Cyp6m2', 'Cyp6m3', 'Cyp6m4',
       'Cyp6z1', 'Cyp6z2', 'Cyp6z3', 'Max_Cyp6mz', 'Gste1', 'Gste2', 'Gste3',
       'Gste4', 'Gste5', 'Gste6', 'Gste7', 'Gstu4', 'Max_Gstue', 'Cyp9k1',
       'any_cyp'],
      dtype='object')

## we are going make three maps, one for each species to compare/contrast 
- split the meta into three, we'll leave out the intermediates for now

In [9]:
sample_meta.species.unique()

array(['coluzzii', 'gambiae', 'intermediate_gambiae_coluzzii',
       'arabiensis'], dtype=object)

In [10]:
sample_meta_gamb = sample_meta[sample_meta.species == "gambiae"].copy()
sample_meta_colu = sample_meta[sample_meta.species == "coluzzii"].copy()
sample_meta_arab = sample_meta[sample_meta.species == "arabiensis"].copy()

In [11]:
# Eyeball the number of metadata records (i.e. samples) for each sample set
sample_meta_gamb.sample_set.value_counts()

AG1000G-CM-A    303
AG1000G-UG      207
AG1000G-GN-B    148
AG1000G-BF-A     98
AG1000G-CM-B     90
AG1000G-CD       76
AG1000G-MZ       74
AG1000G-GA-A     69
AG1000G-TZ       68
AG1000G-GM-A     58
AG1000G-CF       55
AG1000G-BF-B     46
AG1000G-GN-A     40
AG1000G-GH       36
AG1000G-ML-B     33
AG1000G-ML-A     33
AG1000G-GW       29
AG1000G-KE       28
AG1000G-CM-C     23
AG1000G-FR       23
AG1000G-BF-C     13
AG1000G-GQ       10
AG1000G-GM-B      9
AG1000G-GM-C      2
Name: sample_set, dtype: int64

In [12]:
sample_meta_colu.sample_set.value_counts()

AG1000G-GM-C    148
AG1000G-BF-A     82
AG1000G-AO       81
AG1000G-CI       80
AG1000G-GH       64
AG1000G-BF-B     53
AG1000G-ML-B     36
AG1000G-GN-B     35
AG1000G-ML-A     27
AG1000G-CM-C     19
AG1000G-CF       18
AG1000G-GM-B     16
AG1000G-CM-B      7
AG1000G-GM-A      5
AG1000G-GN-A      4
Name: sample_set, dtype: int64

In [13]:
sample_meta_arab.sample_set.value_counts()

AG1000G-TZ      225
AG1000G-UG       82
AG1000G-MW       41
AG1000G-KE       13
AG1000G-BF-B      3
AG1000G-CM-C      2
AG1000G-ML-B      2
Name: sample_set, dtype: int64

### See which sample sets contain samples from more than 1 country

In [14]:
# # Crosstab the sample_set ids with their countries, to count the number of samples in each country for each sample set
# sample_set_by_country = pd.crosstab(sample_meta.sample_set, sample_meta.country)
# sample_set_by_country.head()

In [15]:
# # Eyeball sample sets that have samples in multiple countries
# # `sample_set_by_country > 0` is true where a sample set has samples in that country,
# # so this sums the number of trues per row, and returns those that have > 1, i.e. samples for more than 1 country 
# sample_set_by_country.loc[(sample_set_by_country > 0).sum(axis=1) > 1]

### Convert the lat, long of all of the Mayotte sample metadata into the mean value, so that they are treated as one site, and represented as one pie on the map

In [16]:
sample_meta_gamb.loc[(sample_meta_gamb.country == 'Mayotte'),'latitude'] = sample_meta_gamb[sample_meta_gamb.country == 'Mayotte'].latitude.mean()
sample_meta_gamb.loc[(sample_meta_gamb.country == 'Mayotte'),'longitude'] = sample_meta_gamb[sample_meta_gamb.country == 'Mayotte'].longitude.mean()
sample_meta_gamb[sample_meta_gamb.country == 'Mayotte'].head()

Unnamed: 0,sample_id,partner_sample_id,contributor,country,location,year,month,latitude,longitude,sex_call,...,Gste2,Gste3,Gste4,Gste5,Gste6,Gste7,Gstu4,Max_Gstue,Cyp9k1,any_cyp
1050,AP0007-C,60,Igor Sharakhov,Mayotte,Mtsamboro Forest Reserve,2011,-1,-12.857087,45.137435,F,...,0,0,0,0,0,0,0,0,0,False
1051,AP0021-C,92,Igor Sharakhov,Mayotte,Karihani Lake,2011,-1,-12.857087,45.137435,F,...,0,0,0,0,0,0,0,0,0,False
1052,AP0019-C,88,Igor Sharakhov,Mayotte,Mtsanga Charifou,2011,-1,-12.857087,45.137435,M,...,0,0,0,0,0,0,0,0,0,True
1053,AP0020-C,78,Igor Sharakhov,Mayotte,Mtsanga Charifou,2011,-1,-12.857087,45.137435,M,...,0,0,0,0,0,0,0,0,0,False
1054,AP0009-C,62,Igor Sharakhov,Mayotte,Combani,2011,-1,-12.857087,45.137435,M,...,0,0,0,0,0,0,0,0,0,True


### Amend country names for map presentation and data-alignment

In [17]:
sample_meta_gamb.country = sample_meta_gamb.country.str.replace("Gambia, The", "The Gambia").str.replace("Equatorial Guinea", "Bioko")
sample_meta_colu.country = sample_meta_colu.country.str.replace("Gambia, The", "The Gambia").str.replace("Equatorial Guinea", "Bioko")

In [18]:
# Eyeball the amended country names
sample_meta_gamb.country.unique()

array(['Burkina Faso', 'Democratic Republic of Congo',
       'Central African Republic', 'Cameroon', 'Mayotte', 'Gabon',
       'Ghana', 'The Gambia', 'Guinea', 'Mali', 'Bioko', 'Guinea-Bissau',
       'Kenya', 'Mozambique', 'Tanzania', 'Uganda'], dtype=object)

In [19]:
sample_meta_colu.country.unique()

array(['Angola', 'Burkina Faso', 'Central African Republic',
       "Cote d'Ivoire", 'Cameroon', 'Ghana', 'The Gambia', 'Guinea',
       'Mali'], dtype=object)

In [20]:
sample_meta_arab.country.unique()

array(['Burkina Faso', 'Cameroon', 'Kenya', 'Mali', 'Malawi', 'Tanzania',
       'Uganda'], dtype=object)

## Define the sites as samples with the same latitude and longitude

In [21]:
# Get the sample counts by site, where a site is a unique lat, long. Include country for reference.
sites_gamb = sample_meta_gamb.groupby(['latitude', 'longitude', 'country']).size().reset_index(name='nsamples')
sites_colu = sample_meta_colu.groupby(['latitude', 'longitude', 'country']).size().reset_index(name='nsamples')
sites_arab = sample_meta_arab.groupby(['latitude', 'longitude', 'country']).size().reset_index(name='nsamples')

In [22]:
# Eyeball the number of samples in each site
sites_gamb

Unnamed: 0,latitude,longitude,country,nsamples
0,-23.716000,35.299000,Mozambique,74
1,-12.857087,45.137435,Mayotte,23
2,-4.940000,38.948000,Tanzania,36
3,-3.511000,39.909000,Kenya,28
4,-1.962000,31.651000,Tanzania,32
...,...,...,...,...
91,12.900000,-8.160000,Mali,4
92,13.200000,-6.130000,Mali,1
93,13.550000,-15.900000,The Gambia,58
94,13.567000,-14.917000,The Gambia,2


In [23]:
sites_colu

Unnamed: 0,latitude,longitude,country,nsamples
0,-8.884,13.302,Angola,81
1,2.367,9.817,Cameroon,2
2,3.88,11.506,Cameroon,7
3,3.972,11.516,Cameroon,5
4,4.055,9.721,Cameroon,2
5,4.367,18.583,Central African Republic,18
6,4.912,-1.774,Ghana,24
7,5.609,-1.549,Ghana,25
8,5.668,-0.219,Ghana,14
9,5.898,-4.823,Cote d'Ivoire,80


In [24]:
sites_arab

Unnamed: 0,latitude,longitude,country,nsamples
0,-15.933,34.755,Malawi,41
1,-4.94,38.948,Tanzania,1
2,-3.511,39.909,Kenya,13
3,-3.482,37.308,Tanzania,40
4,-1.962,31.651,Tanzania,137
5,-1.431,34.199,Tanzania,47
6,-0.751,29.701,Uganda,1
7,0.77,34.026,Uganda,81
8,9.049,13.656,Cameroon,2
9,11.15,-4.235,Burkina Faso,2


### Assign each site a unique identifier (short-hand for the unique map coordinates)

In [25]:
# Name the index 'site_id', effectively assigning a unique identifier to each site
sites_gamb.index.name = 'site_id'
sites_colu.index.name = 'site_id'
sites_arab.index.name = 'site_id'

## Separate out the sites that do not have many samples

In [26]:
# Get the subset of sites with nsamples >= cutoff
site_nsamples_cutoff = 10
sites_with_many_samples_gamb = sites_gamb[sites_gamb.nsamples >= site_nsamples_cutoff].copy()
sites_with_many_samples_colu = sites_colu[sites_colu.nsamples >= site_nsamples_cutoff].copy()
sites_with_many_samples_arab = sites_arab[sites_arab.nsamples >= site_nsamples_cutoff].copy()

In [27]:
# Eyeball the subset df
sites_with_many_samples_arab

Unnamed: 0_level_0,latitude,longitude,country,nsamples
site_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,-15.933,34.755,Malawi,41
2,-3.511,39.909,Kenya,13
3,-3.482,37.308,Tanzania,40
4,-1.962,31.651,Tanzania,137
5,-1.431,34.199,Tanzania,47
7,0.77,34.026,Uganda,81


In [28]:
# Eyeball the number of unique lats and longs, for the sites with nsamples >= cutoff
print('Num. sites with nsamples >= cutoff:', len(sites_with_many_samples_gamb))
print('Num. unique latitudes:', len(sites_with_many_samples_gamb.latitude.unique()))
print('Num. unique longitudes:', len(sites_with_many_samples_gamb.longitude.unique()))

Num. sites with nsamples >= cutoff: 28
Num. unique latitudes: 28
Num. unique longitudes: 28


## Get the metadata for sites with many samples 

In [29]:
# Get the sample metadata for the subset of sites with nsamples >= cutoff, preserving species call columns
sample_meta_for_sites_with_many_samples_gamb = sample_meta_gamb.merge(sites_with_many_samples_gamb.reset_index()[['longitude','latitude','site_id']])

In [30]:
sample_meta_for_sites_with_many_samples_colu = sample_meta_colu.merge(sites_with_many_samples_colu.reset_index()[['longitude','latitude','site_id']])
sample_meta_for_sites_with_many_samples_arab = sample_meta_arab.merge(sites_with_many_samples_arab.reset_index()[['longitude','latitude','site_id']])

In [31]:
# Eyeball some of the data
sample_meta_for_sites_with_many_samples_arab

Unnamed: 0,sample_id,partner_sample_id,contributor,country,location,year,month,latitude,longitude,sex_call,...,Gste3,Gste4,Gste5,Gste6,Gste7,Gstu4,Max_Gstue,Cyp9k1,any_cyp,site_id
0,AK0041-C,KIL52,Janet Midega,Kenya,Kilifi,2012,-1,-3.511,39.909,F,...,0,0,0,0,0,0,0,0,False,2
1,AK0042-C,KIL53,Janet Midega,Kenya,Kilifi,2012,-1,-3.511,39.909,F,...,0,0,0,0,0,0,0,0,False,2
2,AK0043-C,KIL54,Janet Midega,Kenya,Kilifi,2012,-1,-3.511,39.909,F,...,0,0,0,0,0,0,0,0,False,2
3,AK0044-C,KIL55,Janet Midega,Kenya,Kilifi,2012,-1,-3.511,39.909,F,...,0,0,0,0,0,0,0,0,False,2
4,AK0046-C,KIL35,Janet Midega,Kenya,Kilifi,2012,-1,-3.511,39.909,F,...,1,1,1,1,1,1,1,0,False,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
354,AC0084-C,3_C2,Martin Donnelly,Uganda,Nagongera,2012,10,0.770,34.026,F,...,0,0,0,0,0,0,0,0,True,7
355,AC0085-C,3_E2,Martin Donnelly,Uganda,Nagongera,2012,10,0.770,34.026,F,...,0,0,0,0,0,0,0,0,False,7
356,AC0086-C,3_F1,Martin Donnelly,Uganda,Nagongera,2012,10,0.770,34.026,F,...,0,0,0,0,0,0,0,0,False,7
357,AC0087-Cx,3_F2,Martin Donnelly,Uganda,Nagongera,2012,10,0.770,34.026,F,...,0,0,0,0,0,0,0,0,False,7


In [32]:
# Eyeball the number of unique lats and longs, for the sites with nsamples >= cutoff
print('Num. unique latitudes:', len(sample_meta_for_sites_with_many_samples_gamb.latitude.unique()))
print('Num. unique longitudes:', len(sample_meta_for_sites_with_many_samples_gamb.longitude.unique()))

Num. unique latitudes: 28
Num. unique longitudes: 28


In [33]:
sample_meta_for_sites_with_many_samples_gamb.columns

Index(['sample_id', 'partner_sample_id', 'contributor', 'country', 'location',
       'year', 'month', 'latitude', 'longitude', 'sex_call', 'sample_set',
       'release', 'aim_fraction_colu', 'aim_fraction_arab',
       'species_gambcolu_arabiensis', 'species_gambiae_coluzzii', 'species',
       'kdr_S', 'kdr_F', 'any_kdr', 'kdr_1570', 'ace_119s', 'gst_114t', 'sex',
       'high.var', 'Ace1', 'Cyp6aa1', 'Cyp6aa2', 'Cyp6p1', 'Cyp6p2', 'Cyp6p3',
       'Cyp6p4', 'Cyp6p5', 'Max_Cyp6aap', 'Cyp6m2', 'Cyp6m3', 'Cyp6m4',
       'Cyp6z1', 'Cyp6z2', 'Cyp6z3', 'Max_Cyp6mz', 'Gste1', 'Gste2', 'Gste3',
       'Gste4', 'Gste5', 'Gste6', 'Gste7', 'Gstu4', 'Max_Gstue', 'Cyp9k1',
       'any_cyp', 'site_id'],
      dtype='object')

## get the resistance 'status' for sites with many samples

In [34]:
def status(row):
    if row.any_kdr == False:
        if row.any_cyp == False:
            return "none"
        if row.any_cyp == True:
            return "cyp"
    if row.any_kdr == True:
        if row.any_cyp == False:
            return "kdr"
        if row.any_cyp == True:
            return "both"

In [35]:
sample_meta_for_sites_with_many_samples_gamb['pyrethroid_ir'] = sample_meta_for_sites_with_many_samples_gamb.apply (lambda row: status(row), axis=1)
sample_meta_for_sites_with_many_samples_colu['pyrethroid_ir'] = sample_meta_for_sites_with_many_samples_colu.apply (lambda row: status(row), axis=1)
sample_meta_for_sites_with_many_samples_arab['pyrethroid_ir'] = sample_meta_for_sites_with_many_samples_arab.apply (lambda row: status(row), axis=1)

In [36]:
kdr_call_gamb = pd.crosstab(sample_meta_for_sites_with_many_samples_gamb.site_id, sample_meta_for_sites_with_many_samples_gamb.pyrethroid_ir)
kdr_call_colu = pd.crosstab(sample_meta_for_sites_with_many_samples_colu.site_id, sample_meta_for_sites_with_many_samples_colu.pyrethroid_ir)
kdr_call_arab = pd.crosstab(sample_meta_for_sites_with_many_samples_arab.site_id, sample_meta_for_sites_with_many_samples_arab.pyrethroid_ir)

In [37]:
kdr_call_gamb

pyrethroid_ir,both,cyp,kdr,none
site_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,0,13,0,61
1,0,5,0,18
2,3,4,18,11
3,1,0,6,21
4,31,0,1,0
5,87,0,8,0
6,2,0,67,0
7,91,0,21,0
14,0,0,0,10
19,64,0,12,0


In [38]:
kdr_call_colu

pyrethroid_ir,both,cyp,kdr,none
site_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,16,1,62,2
5,0,2,0,16
6,4,0,19,1
7,8,0,17,0
8,3,0,10,1
9,78,0,2,0
13,8,0,3,0
22,10,1,0,0
23,79,0,10,0
24,31,0,4,0


In [39]:
kdr_call_arab

pyrethroid_ir,cyp,none
site_id,Unnamed: 1_level_1,Unnamed: 2_level_1
0,16,25
2,0,13
3,2,38
4,19,118
5,6,41
7,11,70


## Prepare the list of country names for the map(s)

In [40]:
# Get the countries as a list
countries = list(sample_meta.country.unique())
# TODO - can we get rid of these?
countries_gamb = list(sample_meta_gamb.country.unique())
countries_colu = list(sample_meta_colu.country.unique())
countries_arab = list(sample_meta_arab.country.unique())

In [41]:
# Eyeball the list of countries
countries_gamb

['Burkina Faso',
 'Democratic Republic of Congo',
 'Central African Republic',
 'Cameroon',
 'Mayotte',
 'Gabon',
 'Ghana',
 'The Gambia',
 'Guinea',
 'Mali',
 'Bioko',
 'Guinea-Bissau',
 'Kenya',
 'Mozambique',
 'Tanzania',
 'Uganda']

In [42]:
country_names_as_multiline = {
    "Bioko": "Bioko island".replace(" ", "\n"),
    "Central African Republic": "Central African Republic".replace(" ", "\n"),
    "Cote d'Ivoire": "Cote d'Ivoire".replace(" ", "\n"),
    "Democratic Republic of Congo": "Democratic\nRepublic\nof Congo",
    "Guinea-Bissau": "Guinea- Bissau".replace(" ", "\n"),
    "Mayotte":"Mayotte island".replace(" ", "\n")
}

## Get the country centres (lat, lon) for the map labels

In [43]:
df_centroids = pd.read_csv("../data/Africa_centroids.csv")
df_centroids.head()

Unnamed: 0,name_long,pop_est,gdp_md_est,lastcensus,Longitude,Latitude
0,Angola,12799293,110300.0,1970,17.537368,-12.293361
1,Burundi,8988091,3102.0,2008,29.875122,-3.359397
2,Benin,8791832,12830.0,2002,2.327853,9.64176
3,Burkina Faso,15746232,17820.0,2006,-1.754566,12.269538
4,Botswana,1990876,27060.0,2011,23.798534,-22.184032


In [44]:
# Lowercase the columns (e.g. Longitude => longitude)
df_centroids.columns = map(str.lower, df_centroids.columns)
df_centroids.columns

Index(['name_long', 'pop_est', 'gdp_md_est', 'lastcensus', 'longitude',
       'latitude'],
      dtype='object')

## Specify offsets for the country labels, using a copy of the country centres

In [45]:
# Check that the country identifiers are aligned
print('Number of countries:', len(countries))
print('Number of matching countries:', df_centroids['name_long'].isin(countries).sum())

Number of countries: 19
Number of matching countries: 19


In [46]:
# Get a copy of the country centres, only for the list of countries
# Set the index to name_long, so we can use df.loc[label]
country_metadata = df_centroids[df_centroids['name_long'].isin(countries)].copy().set_index('name_long')
country_metadata

Unnamed: 0_level_0,pop_est,gdp_md_est,lastcensus,longitude,latitude
name_long,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Angola,12799293,110300.0,1970,17.537368,-12.293361
Burkina Faso,15746232,17820.0,2006,-1.754566,12.269538
Central African Republic,4511488,3198.0,2003,20.468268,6.568233
Cote d'Ivoire,20617068,33850.0,1998,-5.569216,7.628426
Cameroon,18879301,42750.0,2005,12.739642,5.691098
Democratic Republic of Congo,68692542,20640.0,1984,23.643961,-2.877463
Gabon,1514993,21110.0,2003,11.788629,-0.5866
Ghana,23832495,34200.0,2010,-1.216766,6.853456
Guinea,10057975,10600.0,1996,-10.940666,10.436216
The Gambia,1782893,2272.0,2003,-15.396013,13.449652


In [47]:
# Set the default offsets to 0
country_metadata['offset_lat'] = 0.0
country_metadata['offset_lon'] = 0.0

In [48]:
# Specify lat, lon offsets for the country labels (relative to their centre)
# lat: north, lon: east
country_metadata.loc['Angola', 'offset_lat'] = -1.2
country_metadata.loc['Angola', 'offset_lon'] = -5.9

country_metadata.loc['Bioko', 'offset_lat'] = -3.8
country_metadata.loc['Bioko', 'offset_lon'] = -2.8

country_metadata.loc['Burkina Faso', 'offset_lat'] = 0.5
country_metadata.loc['Burkina Faso', 'offset_lon'] = 1.3

country_metadata.loc['Cameroon', 'offset_lat'] = 3.2
country_metadata.loc['Cameroon', 'offset_lon'] = -3.1

country_metadata.loc['Central African Republic', 'offset_lat'] = 1.4
country_metadata.loc['Central African Republic', 'offset_lon'] = -0.6

country_metadata.loc["Cote d'Ivoire", 'offset_lat'] = -7.7
country_metadata.loc["Cote d'Ivoire", 'offset_lon'] = -0.9

country_metadata.loc['Democratic Republic of Congo', 'offset_lat'] = -0.4
country_metadata.loc['Democratic Republic of Congo', 'offset_lon'] = -0.4

country_metadata.loc['Gabon', 'offset_lat'] = -4.8
country_metadata.loc['Gabon', 'offset_lon'] = -3.8

country_metadata.loc['Ghana', 'offset_lat'] = -5
country_metadata.loc['Ghana', 'offset_lon'] = 2.1

country_metadata.loc['Guinea', 'offset_lat'] = -6.5
country_metadata.loc['Guinea', 'offset_lon'] = -1.7

country_metadata.loc['Guinea-Bissau', 'offset_lat'] = -5.9
country_metadata.loc['Guinea-Bissau', 'offset_lon'] = -2.6

country_metadata.loc['Kenya', 'offset_lat'] = -1.9
country_metadata.loc['Kenya', 'offset_lon'] = 4.6

country_metadata.loc['Malawi', 'offset_lat'] = 0
country_metadata.loc['Malawi', 'offset_lon'] = -1.3

country_metadata.loc['Mali', 'offset_lat'] = -1.2
country_metadata.loc['Mali', 'offset_lon'] = -4.3

country_metadata.loc['Mayotte', 'offset_lat'] = 2.6
country_metadata.loc['Mayotte', 'offset_lon'] = -1.7

country_metadata.loc['Mozambique', 'offset_lat'] = -3.5
country_metadata.loc['Mozambique', 'offset_lon'] = 1.8

country_metadata.loc['Tanzania', 'offset_lat'] = -0.5
country_metadata.loc['Tanzania', 'offset_lon'] = -1.7

country_metadata.loc['The Gambia', 'offset_lat'] = 3.9
country_metadata.loc['The Gambia', 'offset_lon'] = 0.3

country_metadata.loc['Uganda', 'offset_lat'] = 1
country_metadata.loc['Uganda', 'offset_lon'] = -0.9

In [49]:
# Eyeball the country label offsets
country_metadata[['offset_lat', 'offset_lon']]

Unnamed: 0_level_0,offset_lat,offset_lon
name_long,Unnamed: 1_level_1,Unnamed: 2_level_1
Angola,-1.2,-5.9
Burkina Faso,0.5,1.3
Central African Republic,1.4,-0.6
Cote d'Ivoire,-7.7,-0.9
Cameroon,3.2,-3.1
Democratic Republic of Congo,-0.4,-0.4
Gabon,-4.8,-3.8
Ghana,-5.0,2.1
Guinea,-6.5,-1.7
The Gambia,3.9,0.3


## Specify offsets for the site markers & pie charts

In [None]:
# # need to change index to country to use loc
# # TODO - we lose the sites index here, is that an issue?
# sites_with_many_samples_gamb.set_index('country', inplace=True)

In [53]:
sites_with_many_samples_gamb

Unnamed: 0_level_0,latitude,longitude,country,nsamples,offset_lat,offset_lon
site_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,-23.716,35.299,Mozambique,74,0.0,0.0
1,-12.857087,45.137435,Mayotte,23,0.0,0.0
2,-4.94,38.948,Tanzania,36,0.0,0.0
3,-3.511,39.909,Kenya,28,0.0,0.0
4,-1.962,31.651,Tanzania,32,0.0,0.0
5,-0.751,29.701,Uganda,95,0.0,0.0
6,0.384,9.455,Gabon,69,0.0,0.0
7,0.77,34.026,Uganda,112,0.0,0.0
14,3.7,8.7,Bioko,10,0.0,0.0
19,4.283,21.017,Democratic Republic of Congo,76,0.0,0.0


In [51]:
# Default the site offsets to 0
sites_with_many_samples_gamb['offset_lat'] = 0.0
sites_with_many_samples_gamb['offset_lon'] = 0.0
sites_with_many_samples_colu['offset_lat'] = 0.0
sites_with_many_samples_colu['offset_lon'] = 0.0
sites_with_many_samples_arab['offset_lat'] = 0.0
sites_with_many_samples_arab['offset_lon'] = 0.0

In [None]:
# [0] = lat, [1] lon:

pie_offset_dict = {
    "Angola" :
sites_with_many_samples.loc[3, 'offset_lat'] = -2
sites_with_many_samples.loc[3, 'offset_lon'] = -1.5

#Bioko
sites_with_many_samples.loc[19, 'offset_lat'] = -1
sites_with_many_samples.loc[19, 'offset_lon'] = -1

#Burkina Faso
sites_with_many_samples.loc[98, 'offset_lat'] = -1.5
sites_with_many_samples.loc[98, 'offset_lon'] = -1.75
sites_with_many_samples.loc[97, 'offset_lat'] = -2.0
sites_with_many_samples.loc[97, 'offset_lon'] = 0.7
sites_with_many_samples.loc[96, 'offset_lat'] = 0.05
sites_with_many_samples.loc[96, 'offset_lon'] = 2.0
sites_with_many_samples.loc[103, 'offset_lat'] = -0.5
sites_with_many_samples.loc[103, 'offset_lon'] = 1.5

#CAR
sites_with_many_samples.loc[29, 'offset_lat'] = 2.0
sites_with_many_samples.loc[29, 'offset_lon'] = 1.0

#Cameroon
sites_with_many_samples.loc[23, 'offset_lat'] = -2.0 # bottom small
sites_with_many_samples.loc[23, 'offset_lon'] = 0.28
sites_with_many_samples.loc[28, 'offset_lat'] = -2.0 # bottom big
sites_with_many_samples.loc[28, 'offset_lon'] = 0.0
sites_with_many_samples.loc[30, 'offset_lat'] = -1.65 # mid big
sites_with_many_samples.loc[30, 'offset_lon'] = 2.0
sites_with_many_samples.loc[37, 'offset_lat'] = -0.4 # top big
sites_with_many_samples.loc[37, 'offset_lon'] = 2.0
sites_with_many_samples.loc[36, 'offset_lat'] = 2.0
sites_with_many_samples.loc[36, 'offset_lon'] = -1.0

#Cote d'Ivoire
sites_with_many_samples.loc[39, 'offset_lat'] = -2
sites_with_many_samples.loc[39, 'offset_lon'] = -1.5

#DRC
sites_with_many_samples.loc[26, 'offset_lat'] = -2.0
sites_with_many_samples.loc[26, 'offset_lon'] = 2.0

#Gabon
sites_with_many_samples.loc[10, 'offset_lat'] = -3.2
sites_with_many_samples.loc[10, 'offset_lon'] = -1.2

#Ghana
sites_with_many_samples.loc[31, 'offset_lat'] = -1.5
sites_with_many_samples.loc[31, 'offset_lon'] = -0.3
sites_with_many_samples.loc[34, 'offset_lat'] = -1.5
sites_with_many_samples.loc[34, 'offset_lon'] = 1.5
sites_with_many_samples.loc[35, 'offset_lat'] = -0.5
sites_with_many_samples.loc[35, 'offset_lon'] = 2.0
sites_with_many_samples.loc[40, 'offset_lat'] = 1.0
sites_with_many_samples.loc[40, 'offset_lon'] = 1.5

#Guinea
sites_with_many_samples.loc[72, 'offset_lat'] = -2
sites_with_many_samples.loc[72, 'offset_lon'] = -2.5
sites_with_many_samples.loc[60, 'offset_lat'] = -2.2
sites_with_many_samples.loc[60, 'offset_lon'] = -0.7


#Guinea Bissau
sites_with_many_samples.loc[101, 'offset_lat'] = -2.0
sites_with_many_samples.loc[101, 'offset_lon'] = 0.05
sites_with_many_samples.loc[102, 'offset_lat'] = -1.0
sites_with_many_samples.loc[102, 'offset_lon'] = -1.5

#Kenya
sites_with_many_samples.loc[5, 'offset_lat'] = 0.5
sites_with_many_samples.loc[5, 'offset_lon'] = 2.5

#Malawi
sites_with_many_samples.loc[1, 'offset_lat'] = 1.5
sites_with_many_samples.loc[1, 'offset_lon'] = -1.5

#Mali
sites_with_many_samples.loc[95, 'offset_lat'] = 0
sites_with_many_samples.loc[95, 'offset_lon'] = -2.0

sites_with_many_samples.loc[99, 'offset_lat'] = 1
sites_with_many_samples.loc[99, 'offset_lon'] = -2

sites_with_many_samples.loc[100, 'offset_lat'] = 2.0 # one before double branch
sites_with_many_samples.loc[100, 'offset_lon'] = -1.4

sites_with_many_samples.loc[109, 'offset_lat'] = 2.0 # double branch LH
sites_with_many_samples.loc[109, 'offset_lon'] = -0.5

sites_with_many_samples.loc[110, 'offset_lat'] = 2.2 # double branch RH
sites_with_many_samples.loc[110, 'offset_lon'] = 1.0

sites_with_many_samples.loc[112, 'offset_lat'] = 1.3 # top right
sites_with_many_samples.loc[112, 'offset_lon'] = -0.1

#Mayotte
sites_with_many_samples.loc[2, 'offset_lat'] = 1.5
sites_with_many_samples.loc[2, 'offset_lon'] = -1.5

#Mozambique
sites_with_many_samples.loc[0, 'offset_lat'] = 1.5
sites_with_many_samples.loc[0, 'offset_lon'] = 2

#Tanzanoa
sites_with_many_samples.loc[4, 'offset_lat'] = -1.6 #tan-east
sites_with_many_samples.loc[4, 'offset_lon'] = -1.6
sites_with_many_samples.loc[6, 'offset_lat'] = -1.5 #tan-mideast
sites_with_many_samples.loc[6, 'offset_lon'] = -0.9
sites_with_many_samples.loc[8, 'offset_lat'] = -2 #tan-midwest
sites_with_many_samples.loc[8, 'offset_lon'] = 1
sites_with_many_samples.loc[7, 'offset_lat'] = -2 #tan-west
sites_with_many_samples.loc[7, 'offset_lon'] = 1

#The Gambia
sites_with_many_samples.loc[113, 'offset_lat'] = 0.8
sites_with_many_samples.loc[113, 'offset_lon'] = -2.0
sites_with_many_samples.loc[114, 'offset_lat'] = 2
sites_with_many_samples.loc[114, 'offset_lon'] = -1.0
sites_with_many_samples.loc[117, 'offset_lat'] = 2.0
sites_with_many_samples.loc[117, 'offset_lon'] = 0.75
sites_with_many_samples.loc[116, 'offset_lat'] = 1.8
sites_with_many_samples.loc[116, 'offset_lon'] = 2

#Uganda 
sites_with_many_samples.loc[11, 'offset_lat'] = 2 # east
sites_with_many_samples.loc[11, 'offset_lon'] = 2
sites_with_many_samples.loc[9, 'offset_lat'] = 1.3 # west
sites_with_many_samples.loc[9, 'offset_lon'] = -1


## Specify the species names, and associate a different colour with each species, for pie charts on the map

In [None]:
# Set the list of long species names, with italics
pyr_nm = ['$kdr$', '$Cyp$ amplification', '$kdr$ and $Cyp$ amplification', 'neither']

In [None]:
# palette
flat = ["#af3bf7",  "#ffe27c", "#ffa042","#dddddd"]
palette = sns.color_palette(flat, n_colors=len(pyr_nm))
sns.palplot(palette)
_ = plt.gca().set_xticklabels(pyr_nm, rotation='vertical', ha='left')

In [None]:
#merge all the useful info
plotting_df = pd.merge(sites_with_many_samples, kdr_call, on='site_id')

In [None]:
plotting_df

In [None]:
for site, row in plotting_df.iterrows():
    print(site, row.longitude)

## Define a function to plot the sites

In [None]:
def plot_map(label_countries=True, pie_scale_factor=0.01):
    
    subplot_kw = dict(projection=ccrs.PlateCarree())
    # figsize here is the entire figure size in inches, reduced later by bbox_inches
    # This dpi setting affects the display size of the figure in the notebook
    fig, ax = plt.subplots(figsize=(8, 4), subplot_kw=subplot_kw, dpi=150)
    ax.add_feature(cfeature.LAND, linewidth=1, zorder=1, edgecolor='gray')
    ax.add_feature(cfeature.BORDERS, linewidth=0.5, zorder=2, edgecolor='gray')
    
    # For each site, plot the site marker or pie
    for site, metadata in plotting_df.iterrows():
        lon = metadata.longitude
        lat = metadata.latitude
        nsamples = metadata.nsamples
          
        # Get the centre offsets for this site
        offset_lon = metadata.offset_lon
        offset_lat = metadata.offset_lat

        # Calculate the pie's centre (offset) and radius (scaled)
        centre = (lon + offset_lon, lat + offset_lat)
        radius = np.sqrt(nsamples * pie_scale_factor)

        # Get the species calls for this site
        both = metadata.both
        cyp = metadata.cyp
        kdr = metadata.kdr
        none = metadata.none

        # Add a circle around the pie
        ax.add_patch(plt.Circle(xy=centre, radius=radius, edgecolor='k', zorder=5, lw=2))

        # Add the pie
        ax.pie([kdr, cyp, both, none], wedgeprops=dict(zorder=7) , colors=palette, center=centre, radius=radius)

        # If either of the offsets aren't 0, then plot a line from the site center to the pie centre
        if offset_lat != 0 or offset_lon != 0:
            ln2, = ax.plot([lon, lon + offset_lon],[lat, lat + offset_lat], 'k', lw=1, zorder=5)
            ln2.set_solid_capstyle('round')


        # If option to label the countries
        if label_countries:

            for country, metadata in country_metadata.iterrows():

                # Calculate the offset centre for the country label
                lon = metadata['longitude'] + metadata['offset_lon']
                lat = metadata['latitude'] + metadata['offset_lat']

                # If there is no corresponding multiline country then default
                long_label = country_names_as_multiline.get(country, country)

                # Add the text
                country_label = plt.text(lon, lat, long_label, fontsize=6, ha='center', zorder=10)
                country_label.set_path_effects([PathEffects.withStroke(linewidth=1.5, foreground='w')])


    # Legends
    
    # Cover up islands under the legend, e.g. Ascension Island, using a white rectangle 
    ax.add_patch(patches.Rectangle((-14.5,-8.5), 1, 1 , linewidth=1, edgecolor='w', facecolor='w', zorder=3))
            
    ## Species colour legend
    plt.text(-21.7, -3, 'Pyrethroid resistance genotype', fontsize=7, fontweight='bold')
    swatch_y = -5.4
    swatch_text_y = -6
    for i in range(0, 4):
        plt.plot([-18.5, -21], [swatch_y, swatch_y], color=flat[i], lw=7, zorder=11, solid_capstyle='round')
        plt.text(-17, swatch_text_y, s=pyr_nm[i], fontsize=7 )
        swatch_y -= 2.5
        swatch_text_y -= 2.5

    # Sample size legend
    plt.text(-21.7, -19, 'Sample size', fontsize=7, fontweight='bold')
    # Markers of same size as circles (below) used for spacing, hidden by alpha
    l1 = Line2D([], [], marker = 'o', markersize=4.9, markerfacecolor='k', color='w', alpha=0) 
    l2 = Line2D([], [], marker = 'o', markersize=8.5, markerfacecolor='k', color='w', alpha=0) 
    l3 = Line2D([], [], marker = 'o', markersize=11.3, markerfacecolor='k', color='w', alpha=0)
    labelz = [f"n=10", " n=50", "  n=100"]
    leg = ax.legend([l1, l2, l3], labelz, ncol=3, frameon=False, fontsize=6.5, handlelength=1, loc=3,
                    borderpad=1.6, handletextpad=0.6, bbox_to_anchor=(-0.05, 0.0))
    # lw=2 to match the circles added around the pies
    ax.add_patch(plt.Circle(xy=(-16.6,-21.3), radius=np.sqrt(10 * pie_scale_factor), facecolor='k', zorder=12, lw=2))
    ax.add_patch(plt.Circle(xy=(-7.3,-21.3), radius=np.sqrt(50 * pie_scale_factor), facecolor='k', zorder=12, lw=2))
    ax.add_patch(plt.Circle(xy=(3.4,-21.3), radius=np.sqrt(100 * pie_scale_factor), facecolor='k', zorder=12, lw=2))

    # This needs to be near the end
    ax.set_extent([-23, 47, -21, 16])
    
    # `bbox_inches='tight'` only saves a tight bounding box around the figure, otherwise surrounding white-space as per figsize
    # DPI set as a precaution, in case bitmaps are included in the SVG
    fig.savefig('GEM_pyr_ir_map.svg', bbox_inches='tight', dpi=300)
    fig.savefig('GEM_pyr_ir_map.jpg', bbox_inches='tight', dpi=300)

    
    return ax

## Produce the map using the plot function

In [None]:
ax = plot_map()