In [19]:
import pandas as pd
import io
import requests
import json

#Saving csv content from git in a pandas dataframe.
url="https://raw.githubusercontent.com/abeasock/coding_assessment/master/locations.csv"
s=requests.get(url).content
locations = pd.read_csv(io.StringIO(s.decode('utf-8')))

In [20]:
#Extracting hour of day and date as separate columns from timestamp column in csv.
import dateutil.parser
def parse_full_date(row):
    yourdate = pd.to_datetime(dateutil.parser.parse(row['timestamp']))
    return yourdate

locations['datetime'] = locations.apply(parse_full_date,axis=1)
#API returns hour as '1200' for 12PM etc. To match with it, we are converting our hour to string and appending 2 zeros.
locations['hour'] = locations['datetime'].dt.hour.astype(str)+'00'
locations['hour'] = locations['hour'].replace('000','0')
locations['date'] = locations['datetime'].dt.date

In [21]:
#Converting latitude and longitude to strings to pass to API call.
locations['latitude'] = locations['latitude'].astype(str)
locations['longitude'] = locations['longitude'].astype(str)

In [4]:
#getweather function takes in API weather request url and dataframe row as input and extracts tempF, weatherDesc, precipMM and
#cloudcover from the json file returned by the API.
def getweather(url,row):
    dic = json.loads(requests.get(url).content)
    hourly = dic['data']['weather'][0]['hourly']
    df = pd.DataFrame(hourly)
    tempF = df[df['time']==row['hour']]
    tempF = tempF.get_value(tempF.index[0],'tempF')
    weatherDesc = df[df['time']==row['hour']]
    weatherDesc = weatherDesc.get_value(weatherDesc.index[0],'weatherDesc')[0]['value']
    precipMM = df[df['time']==row['hour']]
    precipMM = precipMM.get_value(precipMM.index[0],'precipMM')
    cloudcover = df[df['time']==row['hour']]
    cloudcover = cloudcover.get_value(cloudcover.index[0],'cloudcover')
    return tempF, weatherDesc, precipMM, cloudcover

In [5]:
#getlocation function takes in API search request url and dataframe row as input and extracts areaname,
#state and population from JSON output
def getlocation(url,row):
    dic = json.loads(requests.get(url).content)
    result = dic['search_api']['result']
    df = pd.DataFrame(result)
    areaName = df['areaName'][0][0]['value']
    state = df['region'][0][0]['value']
    population = df['population'][0]
    return areaName, state, population

In [6]:
#Initializing new variables in the dataframe to be written to csv.
locations["tempF"] = ""
locations["weatherDesc"] = ""
locations["precipMM"] = ""
locations["cloudcover"] = ""
locations["areaName"] = ""
locations["state"] = ""
locations["population"] = ""
#API key for Srihari.
key = "81be49b0cf4c4459b3d130631171108"
#Weather API request url.
weatherurl = "http://api.worldweatheronline.com/premium/v1/past-weather.ashx?key="
#Location API request url.
locationurl = "http://api.worldweatheronline.com/premium/v1/search.ashx?key="
#For every row in locations dataframe, we create the complete urls for each API and call the getweather and getlocation functions.
for index, row in locations.iterrows():
    wurl = weatherurl + key + "&q="+ row['latitude'] + "," + row['longitude'] + "&tp=1&date=" + str(row['date']) + "&format=json"
    locations.loc[index,"tempF"], locations.loc[index,"weatherDesc"], locations.loc[index,"precipMM"], locations.loc[index,"cloudcover"] = getweather(wurl,row)
    lurl = locationurl + key + "&q="+ row['latitude'] + "," + row['longitude'] + "&num_of_results=1" + "&format=json"
    locations.loc[index,"areaName"], locations.loc[index,"state"], locations.loc[index,"population"] = getlocation(lurl,row)

In [7]:
#Writing out the locations dataframe to locations_weather.csv file.
output_col = ['timestamp','latitude','longitude','areaName','state','population','tempF','weatherDesc','precipMM','cloudcover']
locations.to_csv('C:/PravMC/locations_weather.csv',columns=output_col,index=False)