# Reshaping data to long format with *tidyr*

The following makes use of the packages *tidyr* and *readr*. You may need to
install them from [CRAN](https://cran.r-project.org) using the code
`install.packages(c("tidyr","readr"))` if you want to run this on your computer. (The packages are already installed
on the notebook container, however.)

In [1]:
# Inspecting the file
substr(readLines("gini-oecd.tsv",n=5),start=1,stop=50)

[1] "Data table for: Income inequality, Gini coefficien"        
[2] "Location \t 2007\t2008\t 2009\t2010\t2011\t2012\t2013\t201"
[3] "Australia\t\t\t\t\t\t0.326\t\t0.337\t\t0.330\t"            
[4] "Austria\t0.284\t0.281\t0.289\t0.280\t0.281\t0.275\t0.279\t"
[5] "Belgium\t0.277\t0.266\t0.272\t0.267\t0.270\t0.265\t0.265\t"

In [2]:
library(readr)

In [3]:
gini.oecd <- read_tsv("gini-oecd.tsv",
                      skip=1)

[1mRows: [22m[34m42[39m [1mColumns: [22m[34m12[39m[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m "\t"
[31mchr[39m  (1): Location
[32mdbl[39m (11): 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017
[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.

In [4]:
gini.oecd

[38;5;246m# A tibble: 42 × 12[39m
   Location       `2007` `2008` `2009` `2010` `2011` `2012` `2013` `2014` `2015`
   [3m[38;5;246m<chr>[39m[23m           [3m[38;5;246m<dbl>[39m[23m  [3m[38;5;246m<dbl>[39m[23m  [3m[38;5;246m<dbl>[39m[23m  [3m[38;5;246m<dbl>[39m[23m  [3m[38;5;246m<dbl>[39m[23m  [3m[38;5;246m<dbl>[39m[23m  [3m[38;5;246m<dbl>[39m[23m  [3m[38;5;246m<dbl>[39m[23m  [3m[38;5;246m<dbl>[39m[23m
[38;5;250m 1[39m Australia      [31mNA[39m     [31mNA[39m     [31mNA[39m     [31mNA[39m     [31mNA[39m      0.326 [31mNA[39m      0.337 [31mNA[39m    
[38;5;250m 2[39m Austria         0.284  0.281  0.289  0.28   0.281  0.275  0.279  0.274  0.276
[38;5;250m 3[39m Belgium         0.277  0.266  0.272  0.267  0.27   0.265  0.265  0.266  0.268
[38;5;250m 4[39m Brazil         [31mNA[39m     [31mNA[39m      0.485 [31mNA[39m      0.483 [31mNA[39m      0.47  [31mNA[39m     [31mNA[39m    
[38;5;250m 5[39m Canada         

In [5]:
library(tidyr)

In [6]:
gini.oecd %>% gather(`2007`,`2008`,`2009`,`2010`,`2011`,`2012`,
                      `2013`,`2014`,`2015`,`2016`,`2017`,
                      key="year",value="gini") -> gini.oecd.long
gini.oecd.long

[38;5;246m# A tibble: 462 × 3[39m
   Location                     year    gini
   [3m[38;5;246m<chr>[39m[23m                        [3m[38;5;246m<chr>[39m[23m  [3m[38;5;246m<dbl>[39m[23m
[38;5;250m 1[39m Australia                    2007  [31mNA[39m    
[38;5;250m 2[39m Austria                      2007   0.284
[38;5;250m 3[39m Belgium                      2007   0.277
[38;5;250m 4[39m Brazil                       2007  [31mNA[39m    
[38;5;250m 5[39m Canada                       2007   0.317
[38;5;250m 6[39m Chile                        2007  [31mNA[39m    
[38;5;250m 7[39m China (People's Republic of) 2007  [31mNA[39m    
[38;5;250m 8[39m Costa Rica                   2007  [31mNA[39m    
[38;5;250m 9[39m Czech Republic               2007   0.256
[38;5;250m10[39m Denmark                      2007  [31mNA[39m    
[38;5;246m# ℹ 452 more rows[39m

In [7]:
gini.oecd %>% gather(-Location,
                      key="year",value="gini") -> gini.oecd.long
gini.oecd.long

[38;5;246m# A tibble: 462 × 3[39m
   Location                     year    gini
   [3m[38;5;246m<chr>[39m[23m                        [3m[38;5;246m<chr>[39m[23m  [3m[38;5;246m<dbl>[39m[23m
[38;5;250m 1[39m Australia                    2007  [31mNA[39m    
[38;5;250m 2[39m Austria                      2007   0.284
[38;5;250m 3[39m Belgium                      2007   0.277
[38;5;250m 4[39m Brazil                       2007  [31mNA[39m    
[38;5;250m 5[39m Canada                       2007   0.317
[38;5;250m 6[39m Chile                        2007  [31mNA[39m    
[38;5;250m 7[39m China (People's Republic of) 2007  [31mNA[39m    
[38;5;250m 8[39m Costa Rica                   2007  [31mNA[39m    
[38;5;250m 9[39m Czech Republic               2007   0.256
[38;5;250m10[39m Denmark                      2007  [31mNA[39m    
[38;5;246m# ℹ 452 more rows[39m

In [8]:
gini.oecd %>% pivot_longer(-Location,
                           names_to="year",
                           values_to="gini") -> gini.oecd.long
gini.oecd.long

[38;5;246m# A tibble: 462 × 3[39m
   Location  year    gini
   [3m[38;5;246m<chr>[39m[23m     [3m[38;5;246m<chr>[39m[23m  [3m[38;5;246m<dbl>[39m[23m
[38;5;250m 1[39m Australia 2007  [31mNA[39m    
[38;5;250m 2[39m Australia 2008  [31mNA[39m    
[38;5;250m 3[39m Australia 2009  [31mNA[39m    
[38;5;250m 4[39m Australia 2010  [31mNA[39m    
[38;5;250m 5[39m Australia 2011  [31mNA[39m    
[38;5;250m 6[39m Australia 2012   0.326
[38;5;250m 7[39m Australia 2013  [31mNA[39m    
[38;5;250m 8[39m Australia 2014   0.337
[38;5;250m 9[39m Australia 2015  [31mNA[39m    
[38;5;250m10[39m Australia 2016   0.33 
[38;5;246m# ℹ 452 more rows[39m