## File I/O: Reading from /writing to a datafile

In [144]:
using Pkg
Pkg.add("DataFrames")
using DataFrames

[32m[1m Resolving[22m[39m package versions...
[32m[1m  Updating[22m[39m `/opt/julia/environments/v1.0/Project.toml`
[90m [no changes][39m
[32m[1m  Updating[22m[39m `/opt/julia/environments/v1.0/Manifest.toml`
[90m [no changes][39m


In [None]:
?readdlm

In [173]:
using DelimitedFiles
myData = readdlm("exampleData.csv", ';', Any, '\r', header=true)

(Any["A" 14.51 … "CH" 2017; "B" 24.96 … "D" 2016; … ; "D" 34.65 … "I" 2017; "E" 15.49 … "USA" 2018], AbstractString["line" "trait1" … "location" "year"])

In [228]:
myData[1]

5×6 Array{Any,2}:
 "A"  14.51  164.26  54.92  "CH"   2017
 "B"  24.96  554.82  75.18  "D"    2016
 "C"  24.39   94.43  94.8   "F"    2015
 "D"  34.65  915.16  45.62  "I"    2017
 "E"  15.49  725.89  26.24  "USA"  2018

In [229]:
myData[2]

1×6 Array{AbstractString,2}:
 "line"  "trait1"  "trait2"  "trait3"  "location"  "year"

In [231]:
output = open("exampleData.reformatted", "w")

IOStream(<file exampleData.reformatted>)

In [232]:
writedlm(output, [myData[2]; myData[1]])
close(output)

In [None]:
output = open("exampleData.reformatted", "a") 
writedlm(output, myData[1])
close(output)

In [233]:
Pkg.add("CSV")
using CSV

In [262]:
myCSVdata = CSV.read("exampleData.csv", delim=';',header=true)

Unnamed: 0_level_0,line,trait1,trait2,trait3,location,year
Unnamed: 0_level_1,String⍰,Float64⍰,Float64⍰,Float64⍰,String⍰,Int64⍰
1,A,14.51,164.26,54.92,CH,2017
2,B,24.96,554.82,75.18,D,2016
3,C,24.39,94.43,94.8,F,2015
4,D,34.65,915.16,45.62,I,2017
5,E,15.49,725.89,26.24,USA,2018


In [263]:
typeof(myCSVdata)

DataFrame

In [268]:
CSV.write("exampleData.txtReformatted",myCSVdata, delim='\t')

"exampleData.txtReformatted"

In [34]:
myDF = readtable("exampleData.csv", separator=';', header=true)

Unnamed: 0_level_0,line,trait1,trait2,trait3,location,year
Unnamed: 0_level_1,String⍰,Float64⍰,Float64⍰,Float64⍰,String⍰,Int64⍰
1,A,14.51,164.26,54.92,CH,2017
2,B,24.96,554.82,75.18,D,2016
3,C,24.39,94.43,94.8,F,2015
4,D,34.65,915.16,45.62,I,2017
5,E,15.49,725.89,26.24,USA,2018


In [None]:
?CSV.write

### Summary

- readdlm(source, delim::AbstractChar, T::Type, eol::AbstractChar; header=false, skipstart=0, - skipblanks=true, use_mmap, quotes=true, dims, comments=false, comment_char='#')
- writedlm(f, A, delim='\t'; opts)
- CSV.read(fullpath::Union{AbstractString,IO}, sink::Type{T}=DataFrame, args...; kwargs...)
- CSV.write(file_or_io::Union{AbstractString,IO}, source::Type{T}, args...; kwargs...) 
- readtable(filename, [keyword options])

https://docs.julialang.org/en/v1/stdlib/DelimitedFiles/index.html

http://juliadata.github.io/CSV.jl/v0.1.1/



## Dataframes

In [30]:
names(myDF)

6-element Array{Symbol,1}:
 :line    
 :trait1  
 :trait2  
 :trait3  
 :location
 :year    

In [37]:
typeof(:line)

Symbol

In [269]:
head(myDF)

Unnamed: 0_level_0,line,trait1,trait2,trait3,location,year
Unnamed: 0_level_1,String⍰,Float64⍰,Float64⍰,Float64⍰,String⍰,Int64⍰
1,A,14.51,164.26,54.92,CH,2017
2,B,24.96,554.82,75.18,D,2016
3,C,24.39,94.43,94.8,F,2015
4,D,34.65,915.16,45.62,I,2017
5,E,15.49,725.89,26.24,USA,2018


In [270]:
size(myDF)

(5, 6)

In [39]:
describe(myDF)

Unnamed: 0_level_0,variable,mean,min,median,max,nunique,nmissing,eltype
Unnamed: 0_level_1,Symbol,Union…,Any,Union…,Any,Union…,Int64,DataType
1,line,,A,,E,5.0,0,String
2,trait1,22.8,14.51,24.39,34.65,,0,Float64
3,trait2,490.912,94.43,554.82,915.16,,0,Float64
4,trait3,59.352,26.24,54.92,94.8,,0,Float64
5,location,,CH,,USA,5.0,0,String
6,year,2016.6,2015,2017.0,2018,,0,Int64


In [52]:
myDF[1:3,:trait1]

3-element Array{Union{Missing, Float64},1}:
 14.51
 24.96
 24.39

In [97]:
myDF[[1, 2, 4],[:line,:trait1]]

Unnamed: 0_level_0,line,trait1
Unnamed: 0_level_1,String⍰,Float64⍰
1,A,14.51
2,B,24.96
3,D,34.65


In [64]:
myDF[:year]

5-element Array{Union{Missing, Int64},1}:
 2017
 2016
 2015
 2017
 2018

In [67]:
myDF[:year] .> 2015

5-element BitArray{1}:
  true
  true
 false
  true
  true

In [69]:
myDF[myDF[:year] .> 2015,:]

Unnamed: 0_level_0,line,trait1,trait2,trait3,location,year
Unnamed: 0_level_1,String⍰,Float64⍰,Float64⍰,Float64⍰,String⍰,Int64⍰
1,A,14.51,164.26,54.92,CH,2017
2,B,24.96,554.82,75.18,D,2016
3,D,34.65,915.16,45.62,I,2017
4,E,15.49,725.89,26.24,USA,2018


In [73]:
show(myDF)

5×6 DataFrame
│ Row │ line    │ trait1   │ trait2   │ trait3   │ location │ year   │
│     │ [90mString⍰[39m │ [90mFloat64⍰[39m │ [90mFloat64⍰[39m │ [90mFloat64⍰[39m │ [90mString⍰[39m  │ [90mInt64⍰[39m │
├─────┼─────────┼──────────┼──────────┼──────────┼──────────┼────────┤
│ 1   │ A       │ 14.51    │ 164.26   │ 54.92    │ CH       │ 2017   │
│ 2   │ B       │ 24.96    │ 554.82   │ 75.18    │ D        │ 2016   │
│ 3   │ C       │ 24.39    │ 94.43    │ 94.8     │ F        │ 2015   │
│ 4   │ D       │ 34.65    │ 915.16   │ 45.62    │ I        │ 2017   │
│ 5   │ E       │ 15.49    │ 725.89   │ 26.24    │ USA      │ 2018   │

In [77]:
colwise(typeof,myDF)

6-element Array{DataType,1}:
 Array{Union{Missing, String},1} 
 Array{Union{Missing, Float64},1}
 Array{Union{Missing, Float64},1}
 Array{Union{Missing, Float64},1}
 Array{Union{Missing, String},1} 
 Array{Union{Missing, Int64},1}  

In [78]:
size(myDF)

(5, 6)

In [80]:
typeof(myDF)

DataFrame

In [139]:
myDF2 = DataFrame([myDF[:line] myDF[:trait1]*2 myDF[:trait2]*2.5 myDF[:trait3]*2.6 myDF[:location] myDF[:year]], names(myDF))

Unnamed: 0_level_0,line,trait1,trait2,trait3,location,year
Unnamed: 0_level_1,Any,Any,Any,Any,Any,Any
1,A,29.02,410.65,142.792,CH,2017
2,B,49.92,1387.05,195.468,D,2016
3,C,48.78,236.075,246.48,F,2015
4,D,69.3,2287.9,118.612,I,2017
5,E,30.98,1814.72,68.224,USA,2018


In [140]:
vcat(myDF, myDF2)

Unnamed: 0_level_0,line,trait1,trait2,trait3,location,year
Unnamed: 0_level_1,Any,Any,Any,Any,Any,Any
1,A,14.51,164.26,54.92,CH,2017
2,B,24.96,554.82,75.18,D,2016
3,C,24.39,94.43,94.8,F,2015
4,D,34.65,915.16,45.62,I,2017
5,E,15.49,725.89,26.24,USA,2018
6,A,29.02,410.65,142.792,CH,2017
7,B,49.92,1387.05,195.468,D,2016
8,C,48.78,236.075,246.48,F,2015
9,D,69.3,2287.9,118.612,I,2017
10,E,30.98,1814.72,68.224,USA,2018


 ## Summary
 - accessing header /column names of dataframe
 - concatenating data
 - selecting elements, rows, cols
 - http://juliadata.github.io/DataFrames.jl/v0.9.1/
 