This notebook is about building a database of Longhurst polygons that I will later use to for querying the biome/province that particular points belong to.
There are 4 biomes (coastal, polar, trades, westerlies) and 54 provinces:
<img src='./figJar/lhurstbiogeog.jpg'>
<br>
First, I load the appropriate shapefile, at the time of this writing, available [here](http://www.marineregions.org/gazetteer.php?p=details&id=22538).

In [1]:
import pandas as pd
import shapefile

In [2]:
sf = shapefile.Reader('/accounts/ekarakoy/DATA/Marine_ECO_REGIONS/Longhurst/Longhurst_world_v4_2010.shp')

In [3]:
fields = [x[0] for x in sf.fields][1:]
records=sf.records()
shps = [s.points for s in sf.shapes()]

Build a pandas dataframe, including a 'coords' column containing the, usually more than 4, columns of the corresponding polygons'.

In [4]:
df = pd.DataFrame(columns=fields, data=records)
df = df.assign(coords=shps)

In [6]:
df['ProvDescr'] = df.ProvDescr.str.lower()

In [7]:
df

Unnamed: 0,ProvCode,ProvDescr,coords
0,BPLR,polar - boreal polar province (polr),"[(-161.18425551507818, 63.49999999999986), (-1..."
1,ARCT,polar - atlantic arctic province,"[(-21.513050836786306, 64.64409414795617), (-2..."
2,SARC,polar - atlantic subarctic province,"[(11.264715222107696, 63.96082319213352), (11...."
3,NADR,westerlies - n. atlantic drift province (wwdr),"[(-11.499999999999886, 57.50000000000006), (-1..."
4,GFST,westerlies - gulf stream province,"[(-43.5, 43.50000000000003), (-43.5, 42.500000..."
5,NASW,westerlies - n. atlantic subtropical gyral pro...,"[(-39.499999999999915, 25.500000000000057), (-..."
6,NATR,trades - n. atlantic tropical gyral province (...,"[(-72.34673390464255, 18.535969182733368), (-7..."
7,WTRA,trades - western tropical atlantic province,"[(-19.499999999999886, -6.499999999999986), (-..."
8,ETRA,trades - eastern tropical atlantic province,"[(9.500000000000114, -12.499999999999858), (8...."
9,SATL,trades - south atlantic gyral province (satg),"[(-19.499999999999886, -6.499999999999986), (-..."


In [10]:
df=df.sort_values('ProvDescr')

In [18]:
df

Unnamed: 0,ProvCode,ProvDescr,coords
42,ALSK,coastal - alaska downwelling coastal province,"[(-127.4999999999999, 50.58680189157428), (-12..."
28,AUSW,coastal - australia-indonesia coastal province,"[(125.66080820729292, -14.61166092766527), (12..."
20,BENG,coastal - benguela current coastal province,"[(11.716972846451625, -17.49999999999993), (11..."
18,BRAZ,coastal - brazil current coastal province,"[(-33.49999999999994, -10.499999999999986), (-..."
43,CCAL,coastal - california upwelling coastal province,"[(-110.4999999999999, 24.500000000000142), (-1..."
11,CNRY,coastal - canary coastal province (eacb),"[(-16.339471208351284, 11.50000000000017), (-1..."
44,CAMR,coastal - central american coastal province,"[(-80.44239450415253, -0.4999999999998437), (-..."
45,CHIL,coastal - chile-peru current coastal province,"[(-80.49999999999991, -0.37113815817150453), (..."
46,CHIN,coastal - china sea coastal province,"[(128.50000000000017, 35.12007487044124), (128..."
23,EAFR,coastal - e. africa coastal province,"[(48.50315282529476, -13.499999999999929), (48..."


In [19]:
dfc = df.copy()

In [23]:
dfc = pd.DataFrame(df.ProvDescr.str.split(' - ').tolist(), index=df.index,
                                          columns=['Biome', 'Province'])

In [24]:
dfc.head()

Unnamed: 0,Biome,Province
42,coastal,alaska downwelling coastal province
28,coastal,australia-indonesia coastal province
20,coastal,benguela current coastal province
18,coastal,brazil current coastal province
43,coastal,california upwelling coastal province


In [25]:
dfc.Biome.value_counts()

coastal       22
westerlies    14
trades        12
polar          6
Name: Biome, dtype: int64

In [26]:
dfc.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 54 entries, 42 to 35
Data columns (total 2 columns):
Biome       54 non-null object
Province    54 non-null object
dtypes: object(2)
memory usage: 3.8+ KB


In [27]:
da = pd.merge(dfc, df, how='outer', left_index=True, right_index=True)

In [28]:
da.drop('ProvDescr', axis=1, inplace=True)
da.head()

Unnamed: 0,Biome,Province,ProvCode,coords
42,coastal,alaska downwelling coastal province,ALSK,"[(-127.4999999999999, 50.58680189157428), (-12..."
28,coastal,australia-indonesia coastal province,AUSW,"[(125.66080820729292, -14.61166092766527), (12..."
20,coastal,benguela current coastal province,BENG,"[(11.716972846451625, -17.49999999999993), (11..."
18,coastal,brazil current coastal province,BRAZ,"[(-33.49999999999994, -10.499999999999986), (-..."
43,coastal,california upwelling coastal province,CCAL,"[(-110.4999999999999, 24.500000000000142), (-1..."


In [29]:
da.sort_values(by=['Biome', 'Province'], inplace=True)

In [30]:
da.reset_index(drop=True, inplace=True)

In [31]:
da.Biome=da.Biome.str.capitalize()
da.Province = da.Province.str.title()

In [32]:
da.head()

Unnamed: 0,Biome,Province,ProvCode,coords
0,Coastal,Alaska Downwelling Coastal Province,ALSK,"[(-127.4999999999999, 50.58680189157428), (-12..."
1,Coastal,Australia-Indonesia Coastal Province,AUSW,"[(125.66080820729292, -14.61166092766527), (12..."
2,Coastal,Benguela Current Coastal Province,BENG,"[(11.716972846451625, -17.49999999999993), (11..."
3,Coastal,Brazil Current Coastal Province,BRAZ,"[(-33.49999999999994, -10.499999999999986), (-..."
4,Coastal,California Upwelling Coastal Province,CCAL,"[(-110.4999999999999, 24.500000000000142), (-1..."


In [33]:
da.tail()

Unnamed: 0,Biome,Province,ProvCode,coords
49,Westerlies,Pacific Subarctic Gyres Province (West),PSAW,"[(148.54025737372314, 45.44776908500796), (148..."
50,Westerlies,S. Pacific Subtropical Gyre Province,SPSG,"[(174.79553431438694, -37.02305590229166), (17..."
51,Westerlies,S. Subtropical Convergence Province,SSTC,"[(180.0, -41.49999999999986), (179.50000000000..."
52,Westerlies,Subantarctic Province,SANT,"[(-69.16417186341673, -54.51973098872139), (-6..."
53,Westerlies,Tasman Sea Province,TASM,"[(174.50000000000006, -35.891554686677495), (1..."


In [34]:
da.insert(1, 'BiomCat', da.Biome.astype('category').cat.codes)

In [35]:
da.insert(4, 'ProvCat', da.index.tolist())

In [36]:
da.head()

Unnamed: 0,Biome,BiomCat,Province,ProvCode,ProvCat,coords
0,Coastal,0,Alaska Downwelling Coastal Province,ALSK,0,"[(-127.4999999999999, 50.58680189157428), (-12..."
1,Coastal,0,Australia-Indonesia Coastal Province,AUSW,1,"[(125.66080820729292, -14.61166092766527), (12..."
2,Coastal,0,Benguela Current Coastal Province,BENG,2,"[(11.716972846451625, -17.49999999999993), (11..."
3,Coastal,0,Brazil Current Coastal Province,BRAZ,3,"[(-33.49999999999994, -10.499999999999986), (-..."
4,Coastal,0,California Upwelling Coastal Province,CCAL,4,"[(-110.4999999999999, 24.500000000000142), (-1..."


In [37]:
da

Unnamed: 0,Biome,BiomCat,Province,ProvCode,ProvCat,coords
0,Coastal,0,Alaska Downwelling Coastal Province,ALSK,0,"[(-127.4999999999999, 50.58680189157428), (-12..."
1,Coastal,0,Australia-Indonesia Coastal Province,AUSW,1,"[(125.66080820729292, -14.61166092766527), (12..."
2,Coastal,0,Benguela Current Coastal Province,BENG,2,"[(11.716972846451625, -17.49999999999993), (11..."
3,Coastal,0,Brazil Current Coastal Province,BRAZ,3,"[(-33.49999999999994, -10.499999999999986), (-..."
4,Coastal,0,California Upwelling Coastal Province,CCAL,4,"[(-110.4999999999999, 24.500000000000142), (-1..."
5,Coastal,0,Canary Coastal Province (Eacb),CNRY,5,"[(-16.339471208351284, 11.50000000000017), (-1..."
6,Coastal,0,Central American Coastal Province,CAMR,6,"[(-80.44239450415253, -0.4999999999998437), (-..."
7,Coastal,0,Chile-Peru Current Coastal Province,CHIL,7,"[(-80.49999999999991, -0.37113815817150453), (..."
8,Coastal,0,China Sea Coastal Province,CHIN,8,"[(128.50000000000017, 35.12007487044124), (128..."
9,Coastal,0,E. Africa Coastal Province,EAFR,9,"[(48.50315282529476, -13.499999999999929), (48..."


In [None]:
da['Province'] = da.Province.str.replace('Nw', 'NW')
da['Province'] = da.Province.str.replace('Sw', 'SW')

In [38]:
da.to_pickle('./pickleJar/d_longhurst.pkl')