In [None]:
import seaborn as sns
%matplotlib inline
import folium
import numpy as np
import pandas as pd

In [None]:
#open a previously prepared file containing average long term rainfall data
#the csv file was prepared from TRMM3B42ALT_199801_201505.tif
#availbe from: ftp://africagrids.net/30000m/TRMM3B42/Average/
df = pd.read_csv('../data/mapping_data/avg_rainfall.csv')
df.head()

In [None]:
import matplotlib.cm as cm
import matplotlib
from matplotlib.colors import LogNorm

def convert_to_hex(rgba_color):
    """A hex converter"""
    #note that currently this only uses rgb. 
    #the transparency informatoin in 'a' is discarded
    red = int(rgba_color[0]*255)
    green = int(rgba_color[1]*255)
    blue = int(rgba_color[2]*255)
    return '#{r:02x}{g:02x}{b:02x}'.format(r=red,g=green,b=blue)


def color(x, scalemin=df['avg_rainfall'].min(), scalemax=df['avg_rainfall'].max(), log=True):
    """This function assigns a color based on the value of x"""
    if log:
        norm=LogNorm(vmin=scalemin, vmax=scalemax)
    else:
        norm =matplotlib.colors.Normalize(vmin=scalemin, vmax=scalemax)
    #select a color map from matplotlib
    cmap = cm.RdYlGn
    #get a scalar map based on the normalization and color map we choose
    m = cm.ScalarMappable(norm=norm, cmap=cmap)
    #return the rbga color that the value of x corresponds to
    #convert it to hex for plotting with folium
    return convert_to_hex(m.to_rgba(x))
    


In [None]:
af = folium.Map(location=[-4, 21], zoom_start=3, tiles='cartodbpositron')

#Foilium requires a list of [lat, long] points, not an array
plist = []
for lat, long in zip(df.lat.values,df.long.values):
    plist.append([lat,long])

#since there are so many points we need to downsample
#otherwise we won't be able to plot
downsample = 100
for idx, p in enumerate(plist[::downsample]):
    rain = df.avg_rainfall.values[idx*downsample]
    col_idx = color(rain, log=False)
    folium.CircleMarker(location=p,radius=2, popup="%0.1f inches" %rain, fill_color=col_idx, color=col_idx).add_to(af)

af

In [None]:
import geopandas as gpd

#this shape file is downloaded from: 
#https://www.arcgis.com/home/item.html?id=16da193d9bcd4ae0b74febe39730658a
shp = gpd.GeoDataFrame.from_file('../data/mapping_data/AfricanCountires.shp')

#let's take a look at what is in the shape file
shp.head()



In [None]:
print(shp.COUNTRY.unique())
print('unique countries = ', len(shp.COUNTRY.unique()))
print('Note that the UN recognizes 54 countries in Africa')

#grab the polygons
polys = shp.geometry.values


In [None]:
#This block assigns a country to each lat,long point
#Note that it takes SEVERAL HOURS to run
#set the following variable to true to actually run the code
#else get it from a file
HAVE_ALL_DAY = False


from shapely.geometry import shape, Point

def check(lon, lat, polygon):
    # build a shapely point from your geopoint
    point = Point(lon, lat)
    # the contains function does exactly what you want
    return polygon.contains(point)

if HAVE_ALL_DAY:
    contained_in = []
    for lat, long in zip(df.lat.values, df.long.values):
        gotit = False
        for poly, country in zip(polys, shp.COUNTRY.values):
            # build a shapely polygon from your shape
            polygon = shape(poly)
            if gotit:
                break;
            if check(long, lat, polygon):
                contained_in.append(country)
                gotit = True
        if not gotit: #if never find it
            contained_in.append('None')

    #make a pandas series out of the list
    ctry = pd.Series(contained_in)
    #add it to the dataframe
    df['Country'] = ctry.values

else:
    print('Go to next cell for previously prepared file')

In [None]:
df2 = pd.read_csv('../data/mapping_data/avg_rainfall_country.csv')
df2.head()

In [None]:
tr = df2['Country'] == 'None'
print(sum(tr), ' points were not assigned a country')

In [None]:
af = folium.Map(location=[-4, 21], zoom_start=3, tiles='cartodbpositron')

#Foilium requires a list of [lat, long] points, not an array
plist = []
for lat, long in zip(df[tr].lat.values,df[tr].long.values):
    plist.append([lat,long])

#let's see where they are
downsample = 5 #set to 1 for no down-sampling
for idx, p in enumerate(plist[::downsample]):
    rain = df.avg_rainfall.values[idx*downsample]
    col_idx = color(rain, log=False)
    folium.CircleMarker(location=p,radius=2, popup="%0.1f inches" %rain, fill_color=col_idx, color=col_idx).add_to(af)

#we can see that assigning a country to a latitude-longitude point is subject to the shape file's accuracy
af

In [None]:
#get avg rainfall per country
means = df2.groupby(['Country'])['avg_rainfall'].mean()
#print(means.index)