# Google Maps API
Setup the notebook with functions to get geo coordinates based on postal code. There was timeout issues with the Google Maps API which caused geo coordinates to be empty. A loop to check for this was added to ensure we have complete information available.

In [122]:
import requests
import pandas as pd
import re
import getpass
from datetime import datetime


# get lon and lat geo coordinates
def get_geo_coordinates(row):
    long, lat, has_no_geoloc = 0, 0, True
    
    while has_no_geoloc:
        # get geo coordinates via google api
        resp = requests.get("https://maps.googleapis.com/maps/api/geocode/json?address=" + re.sub("\s","%20",row['postal_code']) + "&json=1")
        if resp.status_code != 200:
            # This means something went wrong.
            raise Exception
        else:
            geo_data = resp.json()
            if len(geo_data['results']) > 0:
                long = geo_data['results'][0]['geometry']['location']['lng']
                lat = geo_data['results'][0]['geometry']['location']['lat']
                has_no_geoloc = False
    return pd.Series({'long': long, 'lat': lat})

def get_precip_probability(row):
    return pd.Series({'precip_probability': 0})

In [70]:
# read and explore locations.csv
locations = pd.read_csv("locations.csv")
locations.head()

Unnamed: 0,loc_id,postal_code,date_first,date_last
0,9be000ae23275d57e1273d211a54ffd7,AL3 5DQ,1485907200,1488326400
1,a35e427b4130be7b2a892e286f0ebb91,CB2 3QF,1486944000,1488326400
2,185674a2eb5c14fbdbb1d05a4109ea55,EX1 1GJ,1486425600,1488326400
3,afbddd7f957a1c822293616e95a2d84c,NG2 5GR,1486512000,1488326400
4,81cea1e224ad183b751acce139f4e276,GU21 6XX,1486080000,1488326400


In [71]:
# assuming that the data is clean and that there are only exactly days between date_first and date_last
locations['days'] = (locations['date_last'] - locations['date_first']) / (60 * 60 * 24)

In [72]:
locations.head()

Unnamed: 0,loc_id,postal_code,date_first,date_last,days
0,9be000ae23275d57e1273d211a54ffd7,AL3 5DQ,1485907200,1488326400,28.0
1,a35e427b4130be7b2a892e286f0ebb91,CB2 3QF,1486944000,1488326400,16.0
2,185674a2eb5c14fbdbb1d05a4109ea55,EX1 1GJ,1486425600,1488326400,22.0
3,afbddd7f957a1c822293616e95a2d84c,NG2 5GR,1486512000,1488326400,21.0
4,81cea1e224ad183b751acce139f4e276,GU21 6XX,1486080000,1488326400,26.0


In [73]:
locations[["long", "lat"]] = locations.apply(get_geo_coordinates, axis=1)
locations

Unnamed: 0,loc_id,postal_code,date_first,date_last,days,long,lat
0,9be000ae23275d57e1273d211a54ffd7,AL3 5DQ,1485907200,1488326400,28.0,51.751905,-0.34051
1,a35e427b4130be7b2a892e286f0ebb91,CB2 3QF,1486944000,1488326400,16.0,52.204212,0.120337
2,185674a2eb5c14fbdbb1d05a4109ea55,EX1 1GJ,1486425600,1488326400,22.0,50.723242,-3.528028
3,afbddd7f957a1c822293616e95a2d84c,NG2 5GR,1486512000,1488326400,21.0,52.931983,-1.127144
4,81cea1e224ad183b751acce139f4e276,GU21 6XX,1486080000,1488326400,26.0,51.319326,-0.558755
5,5f1ce9b7c8cd32c08d98310540fb6604,BS34 5DG,1487289600,1488326400,12.0,51.525159,-2.595357
6,5180af03094779de849ca816c9f5b753,N10 1BY,1486944000,1488326400,16.0,51.592149,-0.143414
7,7f2aa8e72612f9130e06b32a0d2a58d7,BR1 1DN,1485907200,1488326400,28.0,51.403891,0.016582
8,8b049b660e984912c48da213f2f7c650,LS29 9PA,1486425600,1488326400,22.0,53.924762,-1.827509
9,fe5d591b3509247487a917d4e8a33f65,SE1 9FU,1486512000,1488326400,21.0,51.507117,-0.101308


# Dark Sky API

In [79]:
# get dark sky api key from user
dark_sky_api_key = getpass.getpass()

········


## Note
Dark Sky API states:
```
https://api.darksky.net/forecast/[key]/[latitude],[longitude],[time]
```
However, after testing, it looks like the documents have long and lat reversed. This project uses the follow api access url:
```
https://api.darksky.net/forecast/[key]/[longitude],[latitude],[time]
```

In [82]:
resp = requests.get("https://api.darksky.net/forecast/" + dark_sky_api_key + "/50.903584,-1.407188,1487808000?exclude=currently,minutely,hourly,flags")
if resp.status_code != 200:
    # This means something went wrong.
    raise Exception
else:
    weather_data = resp.json()
    print(weather_data)

{'offset': 0, 'timezone': 'Europe/London', 'daily': {'data': [{'humidity': 0.73, 'time': 1487808000, 'apparentTemperatureMax': 42.39, 'visibility': 6.85, 'dewPoint': 38.42, 'windSpeed': 21.64, 'windBearing': 261, 'temperatureMax': 49.97, 'cloudCover': 0.66, 'precipType': 'rain', 'summary': 'Windy and mostly cloudy until evening.', 'apparentTemperatureMinTime': 1487883600, 'moonPhase': 0.89, 'sunsetTime': 1487871423, 'sunriseTime': 1487833432, 'icon': 'wind', 'temperatureMinTime': 1487887200, 'pressure': 1000.95, 'apparentTemperatureMin': 33.42, 'apparentTemperatureMaxTime': 1487833200, 'temperatureMaxTime': 1487833200, 'temperatureMin': 41.09}]}, 'latitude': 50.903584, 'longitude': -1.407188}


In [83]:
locations

Unnamed: 0,loc_id,postal_code,date_first,date_last,days,long,lat
0,9be000ae23275d57e1273d211a54ffd7,AL3 5DQ,1485907200,1488326400,28.0,51.751905,-0.34051
1,a35e427b4130be7b2a892e286f0ebb91,CB2 3QF,1486944000,1488326400,16.0,52.204212,0.120337
2,185674a2eb5c14fbdbb1d05a4109ea55,EX1 1GJ,1486425600,1488326400,22.0,50.723242,-3.528028
3,afbddd7f957a1c822293616e95a2d84c,NG2 5GR,1486512000,1488326400,21.0,52.931983,-1.127144
4,81cea1e224ad183b751acce139f4e276,GU21 6XX,1486080000,1488326400,26.0,51.319326,-0.558755
5,5f1ce9b7c8cd32c08d98310540fb6604,BS34 5DG,1487289600,1488326400,12.0,51.525159,-2.595357
6,5180af03094779de849ca816c9f5b753,N10 1BY,1486944000,1488326400,16.0,51.592149,-0.143414
7,7f2aa8e72612f9130e06b32a0d2a58d7,BR1 1DN,1485907200,1488326400,28.0,51.403891,0.016582
8,8b049b660e984912c48da213f2f7c650,LS29 9PA,1486425600,1488326400,22.0,53.924762,-1.827509
9,fe5d591b3509247487a917d4e8a33f65,SE1 9FU,1486512000,1488326400,21.0,51.507117,-0.101308


In [120]:
rows_list = []
for index, row in locations.iterrows():
    i = 0
    epoch = row['date_first']
    while epoch < row['date_last']:
        s = {
            'loc_id' : row['loc_id'],
            'date' : datetime.fromtimestamp(epoch).strftime('%Y-%m-%d'),
            'long' : row['long'],
            'lat' : row['lat'],
        }
        rows_list.append(s)
        i += 1
        epoch += (60*60*24)
    
df_locations_cleaned = pd.DataFrame(rows_list)

df_locations_cleaned

Unnamed: 0,date,lat,loc_id,long
0,2017-01-31,-0.340510,9be000ae23275d57e1273d211a54ffd7,51.751905
1,2017-02-01,-0.340510,9be000ae23275d57e1273d211a54ffd7,51.751905
2,2017-02-02,-0.340510,9be000ae23275d57e1273d211a54ffd7,51.751905
3,2017-02-03,-0.340510,9be000ae23275d57e1273d211a54ffd7,51.751905
4,2017-02-04,-0.340510,9be000ae23275d57e1273d211a54ffd7,51.751905
5,2017-02-05,-0.340510,9be000ae23275d57e1273d211a54ffd7,51.751905
6,2017-02-06,-0.340510,9be000ae23275d57e1273d211a54ffd7,51.751905
7,2017-02-07,-0.340510,9be000ae23275d57e1273d211a54ffd7,51.751905
8,2017-02-08,-0.340510,9be000ae23275d57e1273d211a54ffd7,51.751905
9,2017-02-09,-0.340510,9be000ae23275d57e1273d211a54ffd7,51.751905


In [123]:
df_locations_cleaned[["pp"]] = df_locations_cleaned.apply(get_precip_probability, axis=1)
df_locations_cleaned

Unnamed: 0,date,lat,loc_id,long,pp
0,2017-01-31,-0.340510,9be000ae23275d57e1273d211a54ffd7,51.751905,0
1,2017-02-01,-0.340510,9be000ae23275d57e1273d211a54ffd7,51.751905,0
2,2017-02-02,-0.340510,9be000ae23275d57e1273d211a54ffd7,51.751905,0
3,2017-02-03,-0.340510,9be000ae23275d57e1273d211a54ffd7,51.751905,0
4,2017-02-04,-0.340510,9be000ae23275d57e1273d211a54ffd7,51.751905,0
5,2017-02-05,-0.340510,9be000ae23275d57e1273d211a54ffd7,51.751905,0
6,2017-02-06,-0.340510,9be000ae23275d57e1273d211a54ffd7,51.751905,0
7,2017-02-07,-0.340510,9be000ae23275d57e1273d211a54ffd7,51.751905,0
8,2017-02-08,-0.340510,9be000ae23275d57e1273d211a54ffd7,51.751905,0
9,2017-02-09,-0.340510,9be000ae23275d57e1273d211a54ffd7,51.751905,0


In [124]:
df_locations_cleaned.pivot(index='loc_id', columns='date', values='pp')

date,2017-01-31,2017-02-01,2017-02-02,2017-02-03,2017-02-04,2017-02-05,2017-02-06,2017-02-07,2017-02-08,2017-02-09,...,2017-02-18,2017-02-19,2017-02-20,2017-02-21,2017-02-22,2017-02-23,2017-02-24,2017-02-25,2017-02-26,2017-02-27
loc_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
07d09eab6c837d4b0c0b17aba37e1dcf,,,,,,,,,,,...,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11aeddbd12e79cae8dddb694e200f00d,,,,,,,,,,,...,,,,,0.0,0.0,0.0,0.0,0.0,0.0
16bdd6fe40ed85ce99a456af6d7cff93,,,,,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
185674a2eb5c14fbdbb1d05a4109ea55,,,,,,,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5180af03094779de849ca816c9f5b753,,,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
51fa5362e943615c7b31d367b461fd2c,,,,,,,,,,,...,,,,,0.0,0.0,0.0,0.0,0.0,0.0
58f7d1d3ce8cc4e808bf840b56714b38,,,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5f1ce9b7c8cd32c08d98310540fb6604,,,,,,,,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7f2aa8e72612f9130e06b32a0d2a58d7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
81cea1e224ad183b751acce139f4e276,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
