In [4]:
# dplyr provides a lot of great ways to handle your data.
# In this example, we will merge Train and Weather data by Date column.
# The main operator of this tutorial is %>%. It taxes argument from the left
# and send it functions on the righ. x %>% f(...) is equivivalent to f(x, ...).
# Looks useless for now, but believe me, you'll love it. 

library(dplyr)
library(data.table)

# Fast reading with data.table and converting to usual data.frame
train <- as.data.frame(fread("Datasets/west_nile/input/train.csv"))
weather <- as.data.frame(fread("Datasets/west_nile/input/weather.csv"))

# For example, I want to select few columns from weather (that are interesting for me),
# convert everything to numeric (by default everything is char),
# convert Date to date format, and take the mean temperatures between two stations for every date.
# With dplyr it's simple and beautiful!

weather <- weather %>%                              # Take weather data.frame and send it to select function.
    select( Date:Tmin, DewPoint ) %>%               # Selecting only interesting columns. Result will be sent further.
        mutate_each(funs(as.numeric), -Date) %>%    # Applying as.numeric to every (but not Date) columns.
            mutate(Date = as.Date(Date)) %>%        # applying as.Date to Date column
                group_by(Date) %>%                  #Grouping by date to summarise
                    summarise_each(funs(mean))      # And take mean for every date for (average of two stations)
                    
head(weather) # Take a look at our new data. Isn't it cool?

train <- train %>%
    mutate(Date = as.Date(Date)) %>%    # Transforming Date column to date format.
        left_join(weather, by="Date")   # Joining our data frames by Date column.

colnames(train) # Here you go. Now you can start your serious business. (Note last three columns)

Unnamed: 0,Date,Tmax,Tmin,DewPoint
1,2007-05-01,83.5,51.0,51.0
2,2007-05-02,59.5,42.5,42.0
3,2007-05-03,66.5,47.0,40.0
4,2007-05-04,72.0,50.0,41.5
5,2007-05-05,66.0,53.5,38.5
6,2007-05-06,68.0,50.5,30.0
