In [1]:
using CSV: read, write
using DataFrames: eachcol
using Missings: ismissing

In [2]:
# Processing the data
f = CSV.read("./PressureData/Stations_2018.csv")
Nw = 25                                      # Window size
Nc = length(f)-1                             # Number of columns
T = Int64(floor(length(f[:,1]) / Nw) * Nw)   # Number of data points
s = Array{Float64, 2}(Nc*Nw,Int64(T/Nw))

# count number of missing values
num = 0
for (name,col) in eachcol(f[:,2:Nc+1])
    for i=1:length(col)
        if ismissing(col[i])
            num += 1
        end
    end
end
println("number of missing elements: $num")

# linear interpolation to estimate missing values
for (name,col) in eachcol(f[:,2:Nc+1])
    count = 0
    for i=1:length(col)
        if ismissing(col[i])
            count += 1
        end
        if count != 0 && !ismissing(col[i])
            dif = (col[i]-col[i-count-1]) / (count + 1)
            for j = 1:count
                col[i-count-1+j] = col[i-count-2+j] + dif
            end
            count = 0
        end
    end
end

# count number of missing values
num = 0
for (name,col) in eachcol(f[:,2:Nc+1])
    for i=1:length(col)
        if ismissing(col[i])
            num += 1
        end
    end
end
println("number of missing elements: $num")

number of missing elements: 2332
number of missing elements: 0


In [3]:
# partition into 19 parts: 3 validation & 16 training
part = Int64(T / 19)

for i in 1:3
    df = f[part*(i-1)+1:part*i, :]
    CSV.write("./PressureData/Valid/valid$i.csv", df)
end

for i in 4:19
    df = f[part*(i-1)+1:part*i, :]
    CSV.write("./PressureData/Train/train$i.csv", df)
end

println("complete")

complete
