# Locations of Suvivor US seasons
> Make a geocoded list of season locations and sketch them on a map.   

#### Load Python tools and Jupyter config

In [4]:
import os
import requests
import googlemaps
import pandas as pd
import altair as alt
import geopandas as gpd
from shapely.geometry import Point, mapping

---

## Read, process data

#### Seasons dataframe with geocoded locations

In [None]:
locations_gdf = gpd.read_file('https://stilesdata.com/survivor/seasons_geocoded_locations.geojson')

In [267]:
locations_gdf.head()

Unnamed: 0,season_name,season,location,country,filming_started,filming_ended,latitude,longitude,geometry
0,Survivor: Borneo,1,"Pulau Tiga, Sabah, Malaysia",Malaysia,2000-03-13,2000-04-20,5.72531,115.653951,POINT (115.65395 5.72531)
1,Survivor: The Australian Outback,2,"Herbert River at Goshen Station, Queensland, A...",Australia,2000-10-23,2000-12-03,-18.146232,145.438669,POINT (145.43867 -18.14623)
2,Survivor: Africa,3,"Shaba National Reserve, Kenya",Kenya,2001-07-11,2001-08-18,0.640064,37.830003,POINT (37.83 0.64006)
3,Survivor: Marquesas,4,"Nuku Hiva, Marquesas Islands, French Polynesia",French Polynesia,2001-11-12,2001-12-20,-8.90617,-140.105531,POINT (-140.10553 -8.90617)
4,Survivor: Thailand,5,"Ko Tarutao, Satun Province, Thailand",Thailand,2002-06-10,2002-07-18,6.591317,99.656409,POINT (99.65641 6.59132)


In [256]:
locations_gdf.loc[locations_gdf["location"] == "Aitutaki, Cook Islands", "country"] = 'Cook Islands'
locations_gdf.loc[locations_gdf["location"] == "Nuku Hiva, Marquesas Islands, French Polynesia", "country"] = 'French Polynesia'
locations_gdf.loc[locations_gdf["season"].isin([7, 8, 12]), "country"] = 'Panama'

In [None]:
locations_countries = list(locations_gdf.country.str.strip().unique())

---

## Weather

In [None]:
climate_normals_src = gpd.read_file('')

#### WMO weather stations from [NCDC](https://gis.ncdc.noaa.gov/arcgis/rest/services/cdo/stations/MapServer/15)

In [258]:
stations_src = gpd.read_file('https://stilesdata.com/weather/wmo_weather_stations.geojson')

In [259]:
stations_src.columns = stations_src.columns.str.lower()
stations_src.country = stations_src.country.str.strip()

In [None]:
stations_src.loc[stations_src["country"] == "Cook Is.", "country"] = 'Cook Islands'
stations_src.loc[stations_src["station_name"].str.contains(', PM'), "country"] = 'Panama'

#### Eliminate any stations that stopped having data before Survivor first filmed and that aren't in Survivor countries

In [261]:
stations_gdf = stations_src.query(f'data_end_date >= 952291200000 and country.isin({locations_countries})').copy()

In [262]:
stations_gdf['data_begin'] = pd.to_datetime(stations_gdf['data_begin_date'], unit='ms').dt.strftime('%Y-%m-%d')
stations_gdf['data_end'] = pd.to_datetime(stations_gdf['data_end_date'], unit='ms').dt.strftime('%Y-%m-%d')

#### Which countries are missing? 

In [264]:
# Convert lists to sets
locations_set = set(locations_countries)
stations_set = set(stations_countries)

# Find items in locations_countries that are not in stations_countries
missing_from_stations = locations_set - stations_set

# Find items in stations_countries that are not in locations_countries
missing_from_locations = stations_set - locations_set

print("Which seasons don't have a country with a station?", missing_from_stations)

Which seasons don't have a country with a station? {'Samoa', 'Panama', 'French Polynesia'}


---

#### Find missing stations

In [250]:
missing_stations_seasons = [{
    'season_country': 'Samoa',
    'station_id': 'GHCND:AQW00061705',
    'station_name': 'PAGO PAGO WEATHER SERVICE OFFICE AIRPORT, US',
    'station_url': 'https://www.ncdc.noaa.gov/cdo-web/datasets/GHCND/stations/GHCND:AQW00061705/detail',    
}]

In [253]:
missing_stations_seasons_df = pd.DataFrame(stations_seasons)
missing_stations_seasons_df

Unnamed: 0,season_country,station_id,station_name,station_url
0,Samoa,GHCND:AQW00061705,"PAGO PAGO WEATHER SERVICE OFFICE AIRPORT, US",https://www.ncdc.noaa.gov/cdo-web/datasets/GHC...
