
Kaggle link: https://www.kaggle.com/c/predict-west-nile-virus/


Our 4th project is a group project, we were given the West Nile Virus Predicition Challenge from Kaggle https://www.kaggle.com/c/predict-west-nile-virus/ 

- Given weather, location, testing, and spraying data, predict when and where different species of mosquitos will test positive for West Nile virus.

- Analyse effectiveness of spraying

- Present in class with our findings.

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.dates import DateFormatter
import datetime

from sklearn.cluster import DBSCAN


In [None]:
df_weather = pd.read_csv('../datasets/weather.csv')
df_spray = pd.read_csv('../datasets/spray.csv')
df_train = pd.read_csv('../datasets/train.csv')

df_test = pd.read_csv('../datasets/test.csv')

## Weather Dataset


In [None]:
#checking the size of the data for weather
df_weather.shape 

In [None]:
#check whether there is null values
df_weather.isnull().sum() 

In [None]:
df_weather
#duplicate dates(2 rows), 1472 rows missing data (50% of the data)

Identified that two stations took the reading on the same date, we will be grouping both station data into one and aggregate it accordingly

In [None]:
#Replace "M" & "-" as null value (because they are missing data) and "  T" as 0 (because T is insignificant value i.e 0.00001)
df_weather = df_weather.replace("M", np.nan).replace("-", np.nan).replace("  T",0) 
#filling up null value with above value in other to get the mean value for grouping purpose in the later part
df_weather.fillna(method='ffill', inplace=True) #fill nan with above values as they belongs to the same date
df_weather

In [None]:
#changing columns type to the intented data type for better analysis
df_weather = df_weather.astype({'Tavg':'float64','Depart':'float64','WetBulb':'float64','Heat':'float64','Cool':'float64','SnowFall':'float64','PrecipTotal':'float64','StnPressure':'float64','SeaLevel':'float64','AvgSpeed':'float64'})
#changing "Date" as datetime type
df_weather['Date'] = df_weather[['Date']].astype('datetime64[ns]')
df_weather


In [None]:
#Grouping with intented aggregation for individual columns
df_weather = df_weather.groupby(by="Date",as_index=False).agg({'Tmax':'mean','Tmin':'mean','Tavg':'mean','Depart':'mean','DewPoint':'mean','WetBulb':'mean','Heat':'mean','Cool':'mean',
    'SnowFall':'mean','PrecipTotal':'mean','StnPressure':'mean','SeaLevel':'mean','ResultSpeed':'mean','ResultDir':'mean','AvgSpeed':'mean',
    'Sunrise': 'first','Sunset': 'first','CodeSum':list
})
df_weather

In [None]:
#creating function to split list into words
def apply_set(segment): 
    a = []
    for words in segment:
        a.extend(words.split())
    x = str(list(set(a)))
    
    return x.replace('[','').replace(']','').replace(' ','')

In [None]:
#apply function to split tuple into list
df_weather["CodeSum_set"] = df_weather["CodeSum"].apply(apply_set)
df_weather[["CodeSum", "CodeSum_set"]]

In [None]:
#drop unnecessary columns
df_weather.drop(columns="CodeSum",inplace=True)

df_weather

In [None]:
#convert datetime to Month name
df_weather['Month'] = df_weather['Date'].dt.month_name()
df_weather

In [None]:
#Create combo chart in subplot
fig = plt.figure(figsize=(16,10))
coord1 = 211
coord2 = 212
plt.subplot(coord1)
color = 'tab:red'
#bar plot creation
#ax1.set_title('Average Temp in Fahrenheit', fontsize=16)


plt.ylabel('Avg Max Temp in Fahrenheit', fontsize=16, color=color)
ax1 = sns.lineplot(x='Month', y='Tmax', data = df_weather, sort=False, color=color)
ax1.tick_params(axis='y', color=color)
ax1.set_xlabel('Month', fontsize=16)
#specify we want to share the same x-axis
ax2 = ax1.twinx()
color2 = 'tab:blue'
#line plot creation
ax2.set_ylabel('Avg Min Temp in Fahrenheit', fontsize=16, color=color2)
ax2 = sns.lineplot(x='Month', y='Tmin', data = df_weather, sort=False, color=color2)
ax2.tick_params(axis='y', color=color);
#show plot


plt.subplot(coord2)
sns.lineplot(data=df_weather,x='Month',y='PrecipTotal')
plt.xlabel('Month')
plt.ylabel('PrecipTotal',color='green', fontsize=16);

##### It is believed that hot and dry conditions are more favorable for West Nile virus than cold and wet.Thus, from the line graph we are expecting higher infection rate between June and August since the temperature during this period is high and there is a drop in precipitation level (water vapor in the air).

In [None]:
df_weather.drop(columns=['Tmax','Tmin','Heat','Cool','SnowFall'], inplace=True)

In [None]:
df_weather["Sunrise"].isnull().sum().sum()

In [None]:
df_weather.to_csv("../datasets/cleaned_weather.csv", index=False)

Dropping unnecessary column to fine tune relevancy for analysis: <br>
Tmax & Tmin --> will be using average temp instead for data engineering <br>
Heat & Cool --> both factors are irrelevant as they are used to calculate energy used <br>
SnowFall & Sunrise & Sunset --> irrelevant, not an indicator that will impact West Nile virus

## Spray Dataset

In [None]:

#show sample  spray data
df_spray.head()

In [None]:
pd.DatetimeIndex(df_spray['Date']).year.value_counts()

In [None]:
#checking data types
df_spray.dtypes

In [None]:
#how many data poitns
df_spray.shape

In [None]:
#chek how many are NULLs
df_spray.isnull().sum()

In [None]:
df_spray.dtypes

In [None]:
# Change date to date time 
df_spray['Date'] = pd.to_datetime(df_spray['Date'])

In [None]:
#Time has null values
df_spray.loc[df_spray["Time"].isnull()]

 #2011-09-07 has null time

In [None]:
#are there any duplicates?
df_spray.duplicated().sum()

In [None]:
# lets check for the details on the duplicates

df_spray[df_spray.duplicated() == True]

#looks like almost  all are  "2011-09-07 @7:44:32 PM"

In [None]:
#lets drop the duplicates 
df_spray.drop_duplicates(inplace=True)

In [None]:
#final data count
df_spray.shape

In [None]:
#after removing duplicates on the data
#any duplicate Spray on the same Date and Location (multiple Times per Day)
df_tmp = df_spray.groupby(by=["Date","Latitude", "Longitude"]).count()
df_tmp[df_tmp['Time'] > 1]

#none!

In [None]:
#just drop the time 
#looks like there is only one spray per Day per Location, we can drop the Time
df_spray.drop(columns="Time", inplace=True)

In [None]:
df_spray.head()

In [None]:
plt.figure(figsize=(20,5))
sns.histplot(df_spray["Date"])
plt.xlabel('Date')
plt.ylabel('No of Spray')
plt.title("Spray by Date")
plt.show();

***only 2 days of Spray in 2011 and 8 days of spray in 2013***

In [None]:
def dbs_cluster(df): 
    '''
    parameter: dataframe with latitude and longitude
    return the label
    '''
    latlong = df[['Latitude', 'Longitude']] 
    dbs = DBSCAN(eps=.03, min_samples=5).fit(latlong) 
    return dbs.labels_ 

In [None]:
df_spray_2011 = df_spray[pd.DatetimeIndex(df_spray['Date']).year == 2011]
df_spray_2013 = df_spray[pd.DatetimeIndex(df_spray['Date']).year == 2013]

In [None]:
#create cluster per year
df_spray_2011["label"] = dbs_cluster(df_spray_2011).copy()
df_spray_2013["label"] = dbs_cluster(df_spray_2013).copy()

In [None]:
#visualize spray data (and cluster using DBSCAN)

#use map data provided by kaggle
mapdata = np.loadtxt('../datasets/mapdata_copyright_openstreetmap_contributors.txt')  

#set dimensions of the plot
lon_lat_box = (df_spray['Longitude'].min()-.1, df_spray['Longitude'].max()+.1
               , df_spray['Latitude'].min()-.1, df_spray['Latitude'].max()+.1)

#create two chart, one for 2011 and one for 2013
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10,8))

#map
ax[0].imshow(mapdata, cmap=plt.get_cmap('gray'), aspect='auto', extent=lon_lat_box)
ax[1].imshow(mapdata, cmap=plt.get_cmap('gray'), aspect='auto', extent=lon_lat_box)

#data
#ax[0].scatter(df_spray_2011['Longitude'], df_spray_2011['Latitude'], marker='x', label='Spray', color='b');
#ax[1].scatter(df_spray_2013['Longitude'], df_spray_2013['Latitude'], marker='x', label='Spray', color='b');

#plot 2011
for i in df_spray_2011['label'].unique():  
    ax[0].scatter(df_spray_2011[df_spray_2011['label'] == i]['Longitude'], 
               df_spray_2011[df_spray_2011['label'] == i]['Latitude'], 
               label=i, marker='x')

#plot for 2013
for i in df_spray_2013['label'].unique():  
    ax[1].scatter(df_spray_2013[df_spray_2013['label'] == i]['Longitude'], 
               df_spray_2013[df_spray_2013['label'] == i]['Latitude'], 
               label=i, marker='x')
    
#titles
ax[0].set_title('Spray - 2011');
ax[1].set_title('Spray - 2013');

#show legend
ax[0].legend();
ax[1].legend();

In [None]:

df_spray.to_csv("../datasets/cleaned_spray.csv", index=False)

In [None]:
#train Data Cleaning/EDA
#assigned to andrea

## Training Dataset

In [None]:
df_train.head()

In [None]:
df_train.shape

In [None]:
df_train.isnull().sum()

In [None]:
df_train

In [None]:
df_train.dtypes

#### Changing the data type for Date column

In [None]:
df_train['Date'] = pd.to_datetime(df_train['Date'])

In [None]:
df_train.dtypes

#### Removing unrequired columns

In [None]:
#unrequired columns will be dropped
#add Black back to training data
drop_col = ['Address', 'Street', 'AddressNumberAndStreet', 'AddressAccuracy']

[df_train.drop(col, axis=1, inplace=True) for col in drop_col]

#### Check for duplicates in the data

In [None]:
df_train[df_train.duplicated(keep=False)]
#df_train.duplicated()

There are quite a number of duplicates in the data. This is due to the limits in data collection, the number of mosquitoes captured for each row seems to be limited to 50. However, this happened because address were dropped previously. In other words, it happened to have 50 mosquitoes in different address. Thus, there is a need to sum it up at the later part. 

#### Presence of West Nile Virus

In [None]:
df_train['WnvPresent'].value_counts(normalize=True)

The training data collected in 2007, 2009, 2011 and 2013 showed 94.75% of no West Nile Virus present in mosquitoes and and 5.24% with West Nile Virus present. 

### Presence of West Nile Virus (2007 - 2013)

In [None]:
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
plt.style.use('seaborn')

fig, ax = plt.subplots(figsize=(20, 8))

ax.bar(df_train['Date'],
       df_train['WnvPresent'],)

ax.set(xlabel="Date",
       ylabel="Virus Present",
       title="Presence of West Nile Virus ")

date_form = DateFormatter("%m-%y")
ax.set_yticks([0,1])
ax.xaxis.set_major_locator(mdates.MonthLocator(interval = 4))
plt.tight_layout()
plt.show()

The training data reflected the seasonality of the virus, with the virus being present between July and October. There were some exceptions with the virus not present in certain years - 2008, 2010, 2012. The training data set consists of data from 2007, 2009, 2011 and 2013. 

### Presence of West Nile Virus in traps - Month

In [None]:
df_train['year'] = df_train['Date'].dt.year
df_train['month'] = df_train['Date'].dt.month

In [None]:
plt.figure(figsize=(20,5))

splot = sns.countplot(data = df_train, x = 'month', hue='WnvPresent')


for p in splot.patches:
    splot.annotate(format(p.get_height(), '.0f'), (p.get_x() + p.get_width() / 2., p.get_height()), ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')

Delving deeper within the specific months, it was noticed that West Nile Virus is present within mosquitoes during July, August and September. With a higher numbers of West Nile Virus present in mosquitoes in August as compared to July and September. 

The data within the training set also reaffirmed the collection process in which mosquito traps being scattered across the city from May to October. 

### Presence of West Nile Virus in traps - Year

In [None]:
plt.figure(figsize=(20,5))

splot = sns.countplot(data = df_train, x = 'year', hue = 'WnvPresent')

for p in splot.patches:
    splot.annotate(format(p.get_height(), '.0f'), (p.get_x() + p.get_width() / 2., p.get_height()), ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')

Breaking down within the specific years, we were able to notice that 2007 and 2013 saw higher cases of mosquitoes with West Nile Virus present, with 200 more cases while 2009 and 2013 saw less than 60 cases. 

Comparing the training dataset with the spray data set, we were able to notice while frequency of sprays were much higher in 2013 as compared to 2011, the number of traps with mosquitoes carrying West Nile Virus is 4 times higher than 2011. 

### Types of Mosquitoes captured in traps

In [None]:
df_train['Species'].value_counts(normalize=True)

In [None]:
plt.figure(figsize=(20,5))

splot = sns.countplot(data = df_train, x = 'Species')

for p in splot.patches:
    splot.annotate(format(p.get_height(), '.0f'), (p.get_x() + p.get_width() / 2., p.get_height()), ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')

Out of the 6 different mosquito species caught within the traps in 2007, 2009, 2011 and 2013,  45.23% of cases included mosquitoes that were Culex Pipens/ Restuans (combination of both Culex Restuans & Culex Pipiens), followed by Culex Restuans (26.08%) and Culex Pipiens (25.69%). 

Next, we will be exploring whether certain species are more likely to carrying the West Nile Virus. 

### Counts of traps with mosquitoes carrying West Nile Virus

In [None]:
train_present = df_train.loc[df_train['WnvPresent']==1]
train_present

In [None]:
plt.figure(figsize=(20,5))

splot = sns.countplot(data = train_present, x = 'Species', hue='WnvPresent')

for p in splot.patches:
    splot.annotate(format(p.get_height(), '.0f'), (p.get_x() + p.get_width() / 2., p.get_height()), ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')

Species tested carrying the West Nile Virus are primarily three main species - Culex Pipiens/Restuans, Culex Pipiens and Culex Restuans

### Counts of species captured in traps over the years

In [None]:
plt.figure(figsize=(20,5))

splot = sns.countplot(data = df_train, x = 'year', hue = 'Species')

for p in splot.patches:
    splot.annotate(format(p.get_height(), '.0f'), (p.get_x() + p.get_width() / 2., p.get_height()), ha = 'center', va = 'center', xytext = (0, 10), textcoords = 'offset points')

Culex Pipiens/ Resturans, Culex Restuans and Culex Pipiens were the three main species seen in the training dataset. This was seen with these three species taking up the bulk of the number of cases of mosquitoes captured in traps in 2007, 2009, 2011 and 2013. 


2007 saw Culex Pipiens taking up 42% of traps sampled with mosquitoes caught. However in 2009, 2011 and 2013, Culex Pipiens/Restuans represented the bulk of the traps sampled - (49.93% in 2009, 46.83% in 2011 and 46.59% in 2013). Furthermore, the number of traps has decreased, 2009, 2011 and 2013 reported less traps as compared to 2007. 

### Sum of Mosquitoes (Year, Month)

In [None]:
mos_yearmth = df_train.groupby(['year', 'month'])['NumMosquitos'].agg(['sum'])
mos_yearmth

In [None]:
mos_year = df_train.groupby(['year'])['NumMosquitos'].agg(['sum'])
mos_year

In [None]:
plt.figure(figsize=(18, 8))
mos_yearmth['sum'].plot(kind="barh")
plt.xlabel('Total Mosquito count')
plt.title('Total sum of mosquitoes by Year, Month')

August 2007 saw the highest number of mosquitoes caught with more than 40,000 mosquitoes followed by july and august 2013 with more than 12,000 mosquitoes caught. 

In [None]:
sss = df_train.groupby(['year', 'month', 'Species','WnvPresent'], as_index=False).agg({'NumMosquitos': 'sum'})
sss

In [None]:
cols = ['year', 'month']
sss['year_month'] = sss[cols].apply(lambda row: '_'.join(row.values.astype(str)), axis=1)
sss.head()

### Sum of Mosquitoes (Year, Month) by Species

In [None]:
plt.figure(figsize=(20, 20))
sns.barplot(x="year_month", y="NumMosquitos", data=sss, hue = 'Species', estimator=sum)

In August 2007, more than 25,000 Culex Pipens was caught, which was 10,000 more than the next highest number of mosquito species (Culex Pipiens/Restuans) caught in the same month. 

Taking a closer look at the specific years- it was noticed that in 2009, majority of species caught were Culux Pipiens and Culux Pipiens/Restuans. In 2009 and 2011 and 2013(june, july) , majority of species caught belonged to Culex Pipiens/Restuans and Culex Restuans while in August and September 2013, majority of species caught were Culex Pipiens and Culex Pipens/Restuans.

### Traps

In [None]:
trap_count = df_train['Trap'].nunique()

print( f"There are {trap_count} unique number of traps in the training dataset")

In [None]:
trap_present = train_present.groupby('Trap')['NumMosquitos'].agg(['sum','count'])
trap_present.index.name = 'Trap'

In [None]:

top_10 = trap_present.sort_values(by="sum", ascending=False).head(10)
#top_10

#### Visualizing the count of traps with presence of West Nile Virus in mosquitoes

In [None]:
top_10['sum'].plot(kind="barh")

The top 2 Traps with the highest count of mosquitoes with west nile virus are T900 and T115, with more than 1750 counts.

### Addressing 50 mosquitoes per row limit

Pulling up duplicate rows with the same date, species and location. 


In [None]:
#Filtering rows that are duplicated based on these columns - Date, Species, Latitude, Longitude and WnvPresent
df_train.loc[df_train.duplicated(subset=['Date', 'Species', 'Latitude', 'Longitude','WnvPresent'], keep=False)]

In [None]:
# grouping every col within the df to remove the duplicated rows

df_train = df_train.groupby([col for col in df_train.columns if col not in ['NumMosquitos']], as_index=False).sum().reindex()


In [None]:
df_train.duplicated().sum()

In [None]:
df_train.shape

In [None]:
df_train

In [None]:
#Visualize if Sprayed area has less WVN cases

df_train_2011 = df_train[(pd.DatetimeIndex(df_train['Date']).year == 2011) & (df_train['WnvPresent'] > 0 )]
df_train_2013 = df_train[(pd.DatetimeIndex(df_train['Date']).year == 2013) & (df_train['WnvPresent'] > 0 )]

#use map data provided by kaggle
mapdata = np.loadtxt('../datasets/mapdata_copyright_openstreetmap_contributors.txt')  

#set dimensions of the plot
lon_lat_box = (df_spray['Longitude'].min()-.1, df_spray['Longitude'].max()+.1
, df_spray['Latitude'].min()-.1, df_spray['Latitude'].max()+.1)

fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(16,10))


#map
ax[0].imshow(mapdata, cmap=plt.get_cmap('gray'), aspect='auto', extent=lon_lat_box)
ax[1].imshow(mapdata, cmap=plt.get_cmap('gray'), aspect='auto', extent=lon_lat_box)

#data
#plot spray 2011
for i in df_spray_2011['label'].unique():  
    ax[0].scatter(df_spray_2011[df_spray_2011['label'] == i]['Longitude'], 
               df_spray_2011[df_spray_2011['label'] == i]['Latitude'], 
               label=i, marker='x')

#plot spray 2013
for i in df_spray_2013['label'].unique():  
    ax[1].scatter(df_spray_2013[df_spray_2013['label'] == i]['Longitude'], 
               df_spray_2013[df_spray_2013['label'] == i]['Latitude'], 
               label=i, marker='x')

#WVN 
ax[0].scatter(df_train_2011['Longitude'], df_train_2011['Latitude'], marker='o', label='WVN', color='orange');
ax[1].scatter(df_train_2013['Longitude'], df_train_2013['Latitude'], marker='o', label='WVN', color='orange');

#titles
ax[0].set_title('Spray/WVN - 2011');
ax[1].set_title('Spray/WVN - 2013');

#show legend
ax[0].legend();
ax[1].legend();

*Above is the spray location and the presence of WVN , further analysis needed to check if the area that were sprayed have any impact on the number of mosquitos or presence of WNV*

In [None]:
df_train.info()

In [None]:
df_merge = pd.merge(left=df_train, right=df_weather, how="inner", on="Date")
df_merge.shape

In [None]:
df_merge.info()

In [None]:
# Looking for correlation against WmnPresent
features = ['Species','Trap','NumMosquitos','Tavg','Depart','DewPoint','WetBulb','PrecipTotal','StnPressure','SeaLevel','ResultSpeed','ResultDir','CodeSum_set','WnvPresent']
sns.heatmap(df_merge[features].corr(), annot=True, cmap='coolwarm')

#### Decision of dropping columns which have weak correlation with WmnPresent.

we are dropping features that has less than 0.05 or more than -0.05 against WmnPresent. <br>
Afterwhich, we have indentified ['StnPressure','SeaLevel','Depart','WetBulb','ResultDir','PrecipTotal']  to be dropped. <br>
However, we noticed 'PrecipTotal' has is an indicator to identify whether the day has heavy rain or no rain which is a critical influence of mosquitoes activity (Mosquitoes are weak flying insect and if there is heavy rain, it will damage their wings thus heavy rain affects mosquitoes to staying indoor).Therefore, we will keep 'PrecipTotal' as one of the features.


In [None]:
df_merge.drop(columns=['StnPressure','Depart','WetBulb','ResultDir'], inplace=True)

In [None]:
df_merge

In [None]:

#check to make sure there is no more missing data
assert df_merge.isnull().sum().sum() == 0 

#save our dataset for further processing (feature engineering)
df_merge.to_csv("../datasets/cleaned_traindf.csv", index=False)

We have decided to drop the spray dataset and merge the training dataset with the weather dataset. 

This decision was made due to various reasons. 
1) There is only data in the spray dataset for 2 years (2011 and 2013)
2) While frequency of sprays are much higher in 2013 as compared to 2011, the number of traps with mosquitoes carrying West Nile Virus is 4 times higher than 2011. Also, the sum of mosquitoes caught in 2013 is higher than 2011 (1.94X higher than 2011) which shows that the sprays may not be effective in killing the mosquitoes and reducing West Nile Virus in the area.

In [None]:
# merge with Spray dataset
df_spray["is_spray"] = 1
df_spray.rename(columns={'Date':'Spray_Date'},inplace=True)
df_spray["Latitude"] = df_spray["Latitude"].round(6)
df_spray["Longitude"] = df_spray["Longitude"].round(6)
df_mwspray = pd.merge(left=df_merge, right=df_spray, how="left", left_on=["Latitude","Longitude"], right_on=["Latitude","Longitude"])
df_mwspray.shape

In [None]:
df_mwspray.to_csv("../datasets/cleaned_traindf w spray.csv", index=False)
df_mwspray

In [None]:
df_spray

## Test Dataset

In [None]:
#do the same for test dataset

In [None]:
#remove the columns
[df_test.drop(col, axis=1, inplace=True) for col in drop_col]

In [None]:
#create date fields
df_test['Date'] = pd.to_datetime(df_test['Date'])
df_test['year'] = df_test['Date'].dt.year
df_test['month'] = df_test['Date'].dt.month

In [None]:
df_test.info()

In [None]:
df_merge_test = pd.merge(left=df_test, right=df_weather, how="left", on="Date")
df_merge_test.shape

In [None]:
df_merge_test.info()

In [None]:
# following dropped columns from df_merge dataset
df_merge_test.drop(columns=['StnPressure','Depart','WetBulb','ResultDir'], inplace=True)

In [None]:
#check to make sure there is no more missing data
assert df_merge_test.isnull().sum().sum() == 0 

#save our dataset for further processing (feature engineering)
df_merge_test.to_csv("../datasets/cleaned_testdf.csv", index=False)

In [None]:
### END OF DATA CLEANING ####

In [None]:
#https://www.kaggle.com/code/khyh00/west-nile-heatmap/script

from sklearn.neighbors import KernelDensity

traps = df_train[['Date', 'Trap','Longitude', 'Latitude', 'WnvPresent']]

# these are needed for plotting densities over map image,
# it changes alpha channel?

# see,
# Meaning of the colormap._lut list in matplotlib.color
# http://stackoverflow.com/questions/18035411/meaning-of-the-colormap-lut-list-in-matplotlib-color

alpha_cm = plt.cm.Reds
alpha_cm._init()
alpha_cm._lut[:-3,-1] = abs(np.logspace(0, 1, alpha_cm.N) / 10 - 1)[::-1]


aspect = mapdata.shape[0] * 1.0 / mapdata.shape[1]
lon_lat_box = (-88, -87.5, 41.6, 42.1) # xmin, xmax, ymin, ymax

sigthings = traps[traps['WnvPresent'] > 0]
sigthings = sigthings.groupby(['Date', 'Trap','Longitude', 'Latitude']).max()['WnvPresent'].reset_index()
X = sigthings[['Longitude', 'Latitude']].values
kd = KernelDensity(bandwidth=0.02)
kd.fit(X)

xv,yv = np.meshgrid(np.linspace(-88, -87.5, 100), np.linspace(41.6, 42.1, 100))
gridpoints = np.array([xv.ravel(),yv.ravel()]).T
zv = np.exp(kd.score_samples(gridpoints).reshape(100,100))
plt.figure(figsize=(10,14))
plt.imshow(mapdata, 
           cmap=plt.get_cmap('gray'), 
           extent=lon_lat_box, 
           aspect=aspect)
plt.imshow(zv, 
           origin='lower', 
           cmap=alpha_cm, 
           extent=lon_lat_box, 
           aspect=aspect)

# -> how to use 'extent' in matplotlib.pyplot.imshow
# http://stackoverflow.com/questions/6999621/how-to-use-extent-in-matplotlib-pyplot-imshow

locations = traps[['Longitude', 'Latitude']].drop_duplicates().values
plt.scatter(locations[:,0], locations[:,1], marker='x')

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=3abded61-2db4-446b-a02d-565b62921f1b' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>