In [2]:
import pandas as pd
import numpy as np
import xarray as xr
import io
import urllib.request
import datetime
import gzip
import geopandas
import geopandas as gpd
import os
import zipfile as zf
import shapefile
from shapely.geometry import shape
#Api world bank
import wbdata as wb

Using the World Bank API to import economic indicators, such as the GPD per capita

In [2]:
#GDP ppc and indicator data from World Bank API
indicators = {"NY.GDP.PCAP.PP.KD": "gdppc"}
wbdf = wb.get_dataframe(indicators=indicators, country="all", convert_date=True)
wbdf = pd.DataFrame.reset_index(wbdf, level=None, drop=False, inplace=False, col_level=0, col_fill='')

Importing the world borders polygon file, in order to be able to map coordinates to their country locations

In [5]:
zp = zf.ZipFile('TM_WORLD_BORDERS-0.3.zip')
files_to_read = [y for y in zp.namelist() for ending in ['dbf', 'prj', 'shp', 'shx'] if y.endswith(ending)]
dummy = zp.read(files_to_read[0])
dbf_file, prj_file, shp_file, shx_file = [zp.open(filename) for filename in files_to_read]
r = shapefile.Reader(shp = shp_file, shx = shx_file, dbf = dbf_file, encoding='windows-1252')
attributes, geometry = [], []
field_names = [field[0] for field in r.fields[1:]]
for row in r.shapeRecords():
    geometry.append(shape(row.shape.__geo_interface__))
    attributes.append(dict(zip(field_names, row.record)))
#Creating a GeoDataframe of the World Borders
gdf = gpd.GeoDataFrame(data = attributes, geometry = geometry)

Using a web request to scrape the temperature anomalies dataset from NASA

In [4]:
url = "https://data.giss.nasa.gov/pub/gistemp/gistemp1200_GHCNv4_ERSSTv5.nc.gz"
req = urllib.request.Request(url)
with gzip.open(urllib.request.urlopen(req)) as resp:
    xr_df = xr.open_dataset(io.BytesIO(resp.read()))
dfnasa = xr_df.to_dataframe()
#Transforming into pandas dataframe
dfnasa = dfnasa.reset_index()
#dfnasa.to_csv("nasa_temperature.csv")

## Subsetting data

In [8]:
nasa50 = dfnasa[dfnasa['time'].dt.year > 1950]
nasa50 = gpd.GeoDataFrame(
    nasa50, geometry=gpd.points_from_xy(nasa50.lat, nasa50.lon))

In [11]:
nasa50.sample(10)

Unnamed: 0,lat,lon,nv,time,time_bnds,tempanomaly,geometry
9761617,-59.0,175.0,1,1974-10-15,1974-11-01,-0.2,POINT (-59.000 175.000)
10296079,-57.0,131.0,0,1993-04-15,1993-04-01,0.58,POINT (-57.000 131.000)
35954845,27.0,139.0,1,1991-10-15,1991-11-01,0.44,POINT (27.000 139.000)
35286521,25.0,105.0,1,1983-06-15,1983-07-01,0.53,POINT (25.000 105.000)
16215152,-37.0,21.0,0,1996-01-15,1996-01-01,1.49,POINT (-37.000 21.000)
1807738,-85.0,165.0,1,2004-11-15,2004-12-01,-0.16,POINT (-85.000 165.000)
37470992,33.0,-47.0,1,1985-05-15,1985-06-01,0.8,POINT (33.000 -47.000)
50092664,75.0,-165.0,1,1988-09-15,1988-10-01,-0.19,POINT (75.000 -165.000)
35220780,25.0,67.0,0,2017-01-15,2017-01-01,0.86,POINT (25.000 67.000)
10611694,-55.0,-43.0,0,2006-07-15,2006-07-01,1.23,POINT (-55.000 -43.000)


In [None]:
bord_nasa50 = gpd.sjoin(gdf, nasa50, how="inner")