# WEATHER DATA
This notebook contains code used to scrape Visual Crossing Weather API. 
It reads events.csv file with data about each running event, from which date and city are then used as input into the URL.
From the weather API, several parameters are extracted, presented as a data frame and written to a weather.csv file.

In [1]:
## Load libraries
library(tidyverse)
library(glue)
library(httr)
library(jsonlite)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.2 ──
[32m✔[39m [34mggplot2[39m 3.3.6      [32m✔[39m [34mpurrr  [39m 0.3.5 
[32m✔[39m [34mtibble [39m 3.1.8      [32m✔[39m [34mdplyr  [39m 1.0.10
[32m✔[39m [34mtidyr  [39m 1.2.0      [32m✔[39m [34mstringr[39m 1.4.1 
[32m✔[39m [34mreadr  [39m 2.1.2      [32m✔[39m [34mforcats[39m 0.5.2 
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()

Attaching package: ‘jsonlite’


The following object is masked from ‘package:purrr’:

    flatten




In [2]:
## read the events.csv file
races <- read.csv("events.csv", header=TRUE)

In [3]:
races

X,Event,Link,Date,Participants,Location,City
<int>,<chr>,<chr>,<chr>,<int>,<chr>,<chr>
1,Tauranga Marathon,https://taurangamarathon.nz,2022-09-18,1570,Tauranga,Tauranga
2,Devonport Half Marathon,https://devonporthalfmarathon.co.nz,2022-10-02,1268,Devonport,Auckland
3,Run Orewa,https://runorewa.nz,2022-10-16,857,Orewa,Auckland
4,Corporate Challenge Wellington,https://corporatechallenge.co.nz,2020-11-11,393,Wellington,Wellington
5,Corporate Challenge Christchurch,https://corporatechallenge.co.nz,2022-02-16,767,Christchurch,Christchurch
6,Corporate Challenge Auckland,https://corporatechallenge.co.nz,2020-11-18,938,Auckland,Auckland
7,Run The Point,https://runthepoint.nz,2022-02-20,771,Hobsonville,Auckland
8,Omaha Half Marathon,https://omahahalfmarathon.co.nz,2022-03-27,1689,Omaha,Auckland
9,Run Devonport,https://rundevonport.nz,2022-02-07,768,Devonport,Auckland
10,Coatesville Half Marathon,https://coatesvillehalfmarathon.co.nz,2022-02-13,1270,Coatesville,Auckland


In [4]:
##run function to obtain weather data

get_weather <- function(races) {
    weather = NULL
    for(i in 1:nrow(races)) {
        city = races$City # assign variable to City column in races data frame
        date = races$Date # assign variable to Date column in races dataframe
        query_url <- glue("https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/{city[i]}/{date[i]}/{date[i]}?unitGroup=metric&key=2AGY57Q48GAUKNU3LA8E2YAYC&contentType=json")
        api_call <- httr::GET(query_url, encoding = "json")
        api_content <- api_call$content
        api_char <- rawToChar(api_content)
        api_json <- jsonlite::stream_in(textConnection(api_char), simplifyDataFrame = FALSE)
        Date <- api_json[[1]][['days']][[1]][['datetime']] # extract date 
        Condition <- api_json[[1]][['days']][[1]][['conditions']] # extract condition description
        Temp <- api_json[[1]][['days']][[1]][['temp']] # extract average temperature
        Rain <- api_json[[1]][['days']][[1]][['precip']] # extract amount of rain
        Wind <- api_json[[1]][['days']][[1]][['windspeed']] # extract wind speed
        Pressure <- api_json[[1]][['days']][[1]][['pressure']] # extract pressure
        Visibility <- api_json[[1]][['days']][[1]][['visibility']] # extract visibility
        Cloud <- api_json[[1]][['days']][[1]][['cloudcover']] # extract cloud cover
        UV <- api_json[[1]][['days']][[1]][['uvindex']] # extract UV index
        weather = rbind(weather, data.frame(Date, Condition, Temp, Rain, Wind, Pressure, Visibility, Cloud, UV)) }
                        
    print(weather) }

In [5]:
weather <- get_weather(races)

 Imported 1 records. Simplifying...
 Imported 1 records. Simplifying...
 Imported 1 records. Simplifying...
 Imported 1 records. Simplifying...
 Imported 1 records. Simplifying...
 Imported 1 records. Simplifying...
 Imported 1 records. Simplifying...
 Imported 1 records. Simplifying...
 Imported 1 records. Simplifying...
 Imported 1 records. Simplifying...
 Imported 1 records. Simplifying...
 Imported 1 records. Simplifying...
 Imported 1 records. Simplifying...
         Date              Condition Temp  Rain Wind Pressure Visibility Cloud
1  2022-09-18                  Clear 11.6  0.00 14.8   1027.1       20.0   0.0
2  2022-10-02 Rain, Partially cloudy 16.5  3.99 24.7   1001.3       11.6  49.7
3  2022-10-16 Rain, Partially cloudy 14.0  0.10 29.5   1013.7       10.6  26.3
4  2020-11-11 Rain, Partially cloudy 12.0  0.03 39.3   1015.5       10.6  81.5
5  2022-02-16       Partially cloudy 16.9  0.00 27.6   1018.2       11.5  38.9
6  2020-11-18 Rain, Partially cloudy 17.4  0.77 42.6   101

In [6]:
weather

Date,Condition,Temp,Rain,Wind,Pressure,Visibility,Cloud,UV
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
2022-09-18,Clear,11.6,0.0,14.8,1027.1,20.0,0.0,6
2022-10-02,"Rain, Partially cloudy",16.5,3.99,24.7,1001.3,11.6,49.7,7
2022-10-16,"Rain, Partially cloudy",14.0,0.1,29.5,1013.7,10.6,26.3,10
2020-11-11,"Rain, Partially cloudy",12.0,0.03,39.3,1015.5,10.6,81.5,4
2022-02-16,Partially cloudy,16.9,0.0,27.6,1018.2,11.5,38.9,9
2020-11-18,"Rain, Partially cloudy",17.4,0.77,42.6,1010.5,12.6,29.9,10
2022-02-20,Clear,21.1,0.0,22.0,1016.2,13.5,16.5,8
2022-03-27,Clear,17.5,0.0,20.5,1016.1,13.7,3.0,7
2022-02-07,"Rain, Partially cloudy",20.8,46.31,22.0,1010.7,10.2,60.5,3
2022-02-13,"Rain, Partially cloudy",20.9,3.21,76.0,1000.1,9.9,48.3,8


In [7]:
##Rename columns
weather <- weather %>%
  rename('Average Temp' = Temp,
        Precipitation = Rain,
        'Cloud Cover' = Cloud)

In [8]:
weather

Date,Condition,Average Temp,Precipitation,Wind,Pressure,Visibility,Cloud Cover,UV
<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
2022-09-18,Clear,11.6,0.0,14.8,1027.1,20.0,0.0,6
2022-10-02,"Rain, Partially cloudy",16.5,3.99,24.7,1001.3,11.6,49.7,7
2022-10-16,"Rain, Partially cloudy",14.0,0.1,29.5,1013.7,10.6,26.3,10
2020-11-11,"Rain, Partially cloudy",12.0,0.03,39.3,1015.5,10.6,81.5,4
2022-02-16,Partially cloudy,16.9,0.0,27.6,1018.2,11.5,38.9,9
2020-11-18,"Rain, Partially cloudy",17.4,0.77,42.6,1010.5,12.6,29.9,10
2022-02-20,Clear,21.1,0.0,22.0,1016.2,13.5,16.5,8
2022-03-27,Clear,17.5,0.0,20.5,1016.1,13.7,3.0,7
2022-02-07,"Rain, Partially cloudy",20.8,46.31,22.0,1010.7,10.2,60.5,3
2022-02-13,"Rain, Partially cloudy",20.9,3.21,76.0,1000.1,9.9,48.3,8


In [None]:
## Save as CSV file 
write.csv(weather,"weather.csv")