# Reshaping data to wide format with *tidyr*

The following makes use of the packages *tidyr* and *readr*. You may need to
install them from [CRAN](https://cran.r-project.org) using the code
`install.packages(c("tidyr","readr"))` if you want to run this on your computer. (The packages are already installed
on the notebook container, however.)

In [1]:
substr(readLines("inequality-oecd-downloaded.csv",n=5),
       start=1,stop=40)

[1] "\"LOCATION\",\"INDICATOR\",\"SUBJECT\",\"MEASUR"    
[2] "\"AUS\",\"INCOMEINEQ\",\"P50P10\",\"RT\",\"A\",\"20"
[3] "\"AUS\",\"INCOMEINEQ\",\"P50P10\",\"RT\",\"A\",\"20"
[4] "\"AUS\",\"INCOMEINEQ\",\"P50P10\",\"RT\",\"A\",\"20"
[5] "\"AUS\",\"INCOMEINEQ\",\"P90P10\",\"RT\",\"A\",\"20"

In [2]:
library(readr)

In [3]:
inequality.oecd.dld <- read_csv("inequality-oecd-downloaded.csv")

[1mRows: [22m[34m2315[39m [1mColumns: [22m[34m8[39m[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (6): LOCATION, INDICATOR, SUBJECT, MEASURE, FREQUENCY, Flag Codes
[32mdbl[39m (2): TIME, Value
[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.

In [4]:
inequality.oecd.dld

[38;5;246m# A tibble: 2,315 × 8[39m
   LOCATION INDICATOR  SUBJECT MEASURE FREQUENCY  TIME Value `Flag Codes`
   [3m[38;5;246m<chr>[39m[23m    [3m[38;5;246m<chr>[39m[23m      [3m[38;5;246m<chr>[39m[23m   [3m[38;5;246m<chr>[39m[23m   [3m[38;5;246m<chr>[39m[23m     [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<chr>[39m[23m       
[38;5;250m 1[39m AUS      INCOMEINEQ P50P10  RT      A          [4m2[24m012 2.2   [31mNA[39m          
[38;5;250m 2[39m AUS      INCOMEINEQ P50P10  RT      A          [4m2[24m014 2.2   [31mNA[39m          
[38;5;250m 3[39m AUS      INCOMEINEQ P50P10  RT      A          [4m2[24m016 2.1   [31mNA[39m          
[38;5;250m 4[39m AUS      INCOMEINEQ P90P10  RT      A          [4m2[24m012 4.4   [31mNA[39m          
[38;5;250m 5[39m AUS      INCOMEINEQ P90P10  RT      A          [4m2[24m014 4.3   [31mNA[39m          
[38;5;250m 6[39m AUS      INCOMEINEQ P90P10  RT      A          [4

In [5]:
library(tidyr)

In [6]:
inequality.oecd.dld %>% spread(key="SUBJECT",value="Value") ->
                                               inequality.oecd

In [7]:
inequality.oecd[-c(2,4,6)]

[38;5;246m# A tibble: 771 × 9[39m
   LOCATION MEASURE  TIME   GINI P50P10 P90P10 P90P50 PALMA S80S20
   [3m[38;5;246m<chr>[39m[23m    [3m[38;5;246m<chr>[39m[23m   [3m[38;5;246m<dbl>[39m[23m  [3m[38;5;246m<dbl>[39m[23m  [3m[38;5;246m<dbl>[39m[23m  [3m[38;5;246m<dbl>[39m[23m  [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<dbl>[39m[23m  [3m[38;5;246m<dbl>[39m[23m
[38;5;250m 1[39m AUS      INEQ     [4m2[24m012  0.326   [31mNA[39m     [31mNA[39m     [31mNA[39m   [31mNA[39m      [31mNA[39m  
[38;5;250m 2[39m AUS      INEQ     [4m2[24m014  0.337   [31mNA[39m     [31mNA[39m     [31mNA[39m   [31mNA[39m      [31mNA[39m  
[38;5;250m 3[39m AUS      INEQ     [4m2[24m016  0.33    [31mNA[39m     [31mNA[39m     [31mNA[39m   [31mNA[39m      [31mNA[39m  
[38;5;250m 4[39m AUS      RT       [4m2[24m012 [31mNA[39m        2.2    4.4    2    1.24    5.5
[38;5;250m 5[39m AUS      RT       [4m2[24m014 [31mNA[39m        2.2   

In [8]:
library(dplyr)


Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



In [9]:
inequality.oecd.sub <- select(inequality.oecd.dld,
                              LOCATION,SUBJECT,TIME,Value)
inequality.oecd.sub

[38;5;246m# A tibble: 2,315 × 4[39m
   LOCATION SUBJECT  TIME Value
   [3m[38;5;246m<chr>[39m[23m    [3m[38;5;246m<chr>[39m[23m   [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<dbl>[39m[23m
[38;5;250m 1[39m AUS      P50P10   [4m2[24m012 2.2  
[38;5;250m 2[39m AUS      P50P10   [4m2[24m014 2.2  
[38;5;250m 3[39m AUS      P50P10   [4m2[24m016 2.1  
[38;5;250m 4[39m AUS      P90P10   [4m2[24m012 4.4  
[38;5;250m 5[39m AUS      P90P10   [4m2[24m014 4.3  
[38;5;250m 6[39m AUS      P90P10   [4m2[24m016 4.3  
[38;5;250m 7[39m AUS      P90P50   [4m2[24m012 2    
[38;5;250m 8[39m AUS      P90P50   [4m2[24m014 2    
[38;5;250m 9[39m AUS      P90P50   [4m2[24m016 2.1  
[38;5;250m10[39m AUS      GINI     [4m2[24m012 0.326
[38;5;246m# ℹ 2,305 more rows[39m

In [10]:
inequality.oecd.sub %>% spread(key=SUBJECT,
                               value=Value) -> inequality.oecd

In [11]:
inequality.oecd

[38;5;246m# A tibble: 386 × 8[39m
   LOCATION  TIME  GINI P50P10 P90P10 P90P50 PALMA S80S20
   [3m[38;5;246m<chr>[39m[23m    [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<dbl>[39m[23m  [3m[38;5;246m<dbl>[39m[23m  [3m[38;5;246m<dbl>[39m[23m  [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<dbl>[39m[23m  [3m[38;5;246m<dbl>[39m[23m
[38;5;250m 1[39m AUS       [4m2[24m012 0.326    2.2    4.4    2    1.24    5.5
[38;5;250m 2[39m AUS       [4m2[24m014 0.337    2.2    4.3    2    1.34    5.7
[38;5;250m 3[39m AUS       [4m2[24m016 0.33     2.1    4.3    2.1  1.26    5.5
[38;5;250m 4[39m AUT       [4m2[24m007 0.284    2      3.6    1.8  1       4.4
[38;5;250m 5[39m AUT       [4m2[24m008 0.281    1.9    3.4    1.8  1       4.3
[38;5;250m 6[39m AUT       [4m2[24m009 0.289    2      3.6    1.8  1.03    4.5
[38;5;250m 7[39m AUT       [4m2[24m010 0.28     1.9    3.5    1.8  0.98    4.3
[38;5;250m 8[39m AUT       [4m2[24m011 0.281    1.9    3.5    1

In [12]:
inequality.oecd.dld %>% pivot_wider(names_from=SUBJECT,
                                    values_from=Value,
                                    id_cols=c(LOCATION,TIME)) ->
                                                inequality.oecd

In [13]:
inequality.oecd

[38;5;246m# A tibble: 386 × 8[39m
   LOCATION  TIME P50P10 P90P10 P90P50  GINI S80S20 PALMA
   [3m[38;5;246m<chr>[39m[23m    [3m[38;5;246m<dbl>[39m[23m  [3m[38;5;246m<dbl>[39m[23m  [3m[38;5;246m<dbl>[39m[23m  [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<dbl>[39m[23m  [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<dbl>[39m[23m
[38;5;250m 1[39m AUS       [4m2[24m012    2.2    4.4    2   0.326    5.5  1.24
[38;5;250m 2[39m AUS       [4m2[24m014    2.2    4.3    2   0.337    5.7  1.34
[38;5;250m 3[39m AUS       [4m2[24m016    2.1    4.3    2.1 0.33     5.5  1.26
[38;5;250m 4[39m AUT       [4m2[24m007    2      3.6    1.8 0.284    4.4  1   
[38;5;250m 5[39m AUT       [4m2[24m008    1.9    3.4    1.8 0.281    4.3  1   
[38;5;250m 6[39m AUT       [4m2[24m009    2      3.6    1.8 0.289    4.5  1.03
[38;5;250m 7[39m AUT       [4m2[24m010    1.9    3.5    1.8 0.28     4.3  0.98
[38;5;250m 8[39m AUT       [4m2[24m011    1.9    3.5    1.8 0.2