# Temp Data Analysis


These are essentials for running the program. This Notebook is for analyzing temperature data from RCP8.5 projections.
The csv of data being ran should have year values in the first column headed year. This is essential for the program to run properly.

In [1]:
#initiate Model
using CSV
using DataFrames
using Plots
using KernelDensity


After initiating the packages we will use we need to open and read the data.

In [10]:
dft85 = CSV.read("updated_projections_temperature_RCP85_sneasybrick.csv", DataFrame)
dft60 = CSV.read("updated_projections_temperature_RCP60_sneasybrick.csv", DataFrame)
dft45 = CSV.read("updated_projections_temperature_RCP45_sneasybrick.csv", DataFrame)
dft26 = CSV.read("updated_projections_temperature_RCP26_sneasybrick.csv", DataFrame)
dfg45 = CSV.read("updated_projections_gmsl_RCP45_sneasybrick.csv", DataFrame)

Open the respective components of sealevelrise csv's.

In [None]:
# GIS files
gis85 = CSV.read("dataFiles\\RCP85\\updated_projections_greenland_RCP85_sneasybrick.csv", DataFrame)
gis60 = CSV.read("dataFiles\\RCP60\\updated_projections_greenland_RCP60_sneasybrick.csv", DataFrame)
gis45 = CSV.read("dataFiles\\RCP45\\updated_projections_greenland_RCP45_sneasybrick.csv", DataFrame)
gis26 = CSV.read("dataFiles\\RCP26\\updated_projections_greenland_RCP26_sneasybrick.csv", DataFrame)

In [None]:
# AIS files
ais85 = CSV.read("dataFiles\\RCP85\\updated_projections_antarctic_RCP85_sneasybrick.csv", DataFrame)
ais60 = CSV.read("dataFiles\\RCP60\\updated_projections_antarctic_RCP60_sneasybrick.csv", DataFrame)
ais45 = CSV.read("dataFiles\\RCP45\\updated_projections_antarctic_RCP45_sneasybrick.csv", DataFrame)
ais26 = CSV.read("dataFiles\\RCP26\\updated_projections_antarctic_RCP26_sneasybrick.csv", DataFrame)

In [None]:
# LWS files
lws85 = CSV.read("dataFiles\\RCP85\\updated_projections_landwater_storage_sl_RCP85_sneasybrick.csv", DataFrame)
lws60 = CSV.read("dataFiles\\RCP60\\updated_projections_landwater_storage_sl_RCP60_sneasybrick.csv", DataFrame)
lws45 = CSV.read("dataFiles\\RCP45\\updated_projections_landwater_storage_sl_RCP45_sneasybrick.csv", DataFrame)
lws26 = CSV.read("dataFiles\\RCP26\\updated_projections_landwater_storage_sl_RCP26_sneasybrick.csv", DataFrame)

In [None]:
# GIC files
gic85 = CSV.read("dataFiles\\RCP85\\updated_projections_glaciers_RCP85_sneasybrick.csv", DataFrame)
gic60 = CSV.read("dataFiles\\RCP60\\updated_projections_glaciers_RCP60_sneasybrick.csv", DataFrame)
gic45 = CSV.read("dataFiles\\RCP45\\updated_projections_glaciers_RCP45_sneasybrick.csv", DataFrame)
gic26 = CSV.read("dataFiles\\RCP26\\updated_projections_glaciers_RCP26_sneasybrick.csv", DataFrame)

In [None]:
# TE files
te85 = CSV.read("dataFiles\\RCP85\\updated_projections_thermal_RCP85_sneasybrick.csv", DataFrame)
te60 = CSV.read("dataFiles\\RCP60\\updated_projections_thermal_RCP60_sneasybrick.csv", DataFrame)
te45 = CSV.read("dataFiles\\RCP45\\updated_projections_thermal_RCP45_sneasybrick.csv", DataFrame)
te26 = CSV.read("dataFiles\\RCP26\\updated_projections_thermal_RCP26_sneasybrick.csv", DataFrame)

## Primary Run

First we set our index variables.

In [None]:
colr = collect(2:ncol(dft85))
rowr = collect(1:nrow(dft85))

Then we solve for when find the rows within the timeframe of the years selected.

In [None]:
#create array list to house all years within the norm period
normp = []
#set normalization period
for r in rowr
    if 1850 <= dft85[r,:year] <= 1900
        push!(normp, r)
    end
end 

Next, we index through each temperature of the n ormalization period for each trial and collect their value.

In [None]:
# Set the index and array to solve for normalized value
sarray = []
i = 0

# Add each value to a sum
for r in normp
    for c in colr
        push!(sarray, dft85[r, c])
        i += 1
    end
end
sarray[1]

Next we find the mean.

In [None]:
# Collect the sum
summ = 0
for t in collect(1:i)
    summ += sarray[t]
end
# Divide by the number of terms(i)
avg = summ / i


Next we noramlize the data for each instance.

In [None]:
# change the value for each instance after the first 50 years
for r in rowr
    for c in colr
        dft85[r, c] = (dft85[r, c] - avg)
    end
end
dft85

Now to prep for graphing we create a empty dict to hold all the results each year.

In [None]:
yearVals = Dict{Any, Any}()
print("Dict values = ", yearVals)

Next we determine the year 2100 in the df.

In [None]:
#Set year we are looking for and initiate row
yr = 2100
rw = 0
#claculate what row we are looking for
for r in rowr
    if dft85[r,:year] == yr
        rw = r
    end
end
rw

Now we store all the values from that row into the yearVals Dict

In [None]:
x2100 = []
for c in colr
    push!(x2100, dft85[rw, c])
end
x2100

Make the histogram.

In [None]:

histogram(x2100, label = "RCP 8.5")
title!("Distribution of Temp Change values: yr 2100")
xlabel!("Value of temp change")
ylabel!("Num of trials ")
     

## Generalization of Primary

general function to do all of this.

In [None]:
function genp(dft, yr, ns, ne)
    #set boundaries
    colr = collect(2:ncol(dft))
    rowr = collect(1:nrow(dft))

    #create array list to house all years within the norm period
    normp = []
    #set normalization period
    for r in rowr
        if ns <= dft[r,:year] <= ne
            push!(normp, r)
        end
    end
    
    # Set the index and array to solve for normalized value
    sarray = []
    i = 0

    # Add each value to a sum
    for r in normp
        for c in colr
            push!(sarray, dft[r, c])
            i += 1
        end
    end

    # Collect the sum
    summ = 0
    for t in collect(1:i)
        summ += sarray[t]
    end
    # Divide by the number of terms(i)
    avg = summ / i

    # change the value for each instance
    for r in rowr
        for c in colr
            dft[r, c] = (dft[r, c] - avg)
        end
    end

    #initiate row
    rw = 0
    #claculate what row we are looking for
    for r in rowr
        if dft[r,:year] == yr
            rw = r
        end
    end

    x = []
    for c in colr
        push!(x, dft[rw, c])
    end

    return x
end

run function with different rcps for same year, create variable to house result.

In [None]:
r85 = Vector{Float64}(genp(dft85, 2100, 1850, 1900))
r60 = Vector{Float64}(genp(dft60, 2100,1850, 1900))
r45 = Vector{Float64}(genp(dft45, 2100, 1850, 1900))
r26 = Vector{Float64}(genp(dft26, 2100,1850, 1900))



Create a gen function for historgrams

In [None]:
function histg(data, rcp, tyear, op)
    darray = []
    #initiate hist array
    for t in collect(1:length(data))
        push!(darray, data[t])
    end

    #make histogram
    graph = histogram(darray, label = string("RCP ",rcp), opacity = op)
    title!(string("Distribution of Temp Change values: yr ",tyear))
    xlabel!("Value of temp change")
    ylabel!("Num of trials ")
    return graph
end

In [None]:

h4 = histg(r45, 4.5, 2100, 1)
h8 = histg(r85, 8.5, 2100, 1)
h2 = histg(r26, 2.6, 2100, 1)
h6 = histg(r60, 6.0, 2100, 1) 

In [None]:
k26 = kde(r26)
k45 = kde(r45)
k85 = kde(r85)
k60 = kde(r60)

In [None]:
p = plot(k85.x, k85.density, label = "RCP 8.5", fill = (0, 0.5), color = :maroon)
plot!(p, k60.x, k60.density, label = "RCP 6.0", fill = (0, 0.5), color = :chocolate)
plot!(p, k45.x, k45.density, label = "RCP 4.5", fill = (0, 0.5), color = :dodgerblue)
plot!(p, k26.x, k26.density, label = "RCP 2.6", fill = (0, 0.5), color = :blue)
title!(string("Distribution of Temp Change values: yr 2100"))
xlabel!("Value of temp change(C)")
ylabel!("Density")




## Threshold work

### Collect Column Names

create a function to find the all trials that pass that threshold.

In [None]:


function thresh(tdf, tcha, yr_t)
    #create dict to house all colomn names and column values that pass within the threshhold
    colnames = Dict{Any, Any}()
    #set boundaries
    colr = collect(2:ncol(tdf))
    rowr = collect(1:nrow(tdf))
    # find the row for threshhold year
    tr = 0
    for r in rowr
        if tdf[r, 1] == yr_t
            tr = r
        end
    end
    # new row boundary
    rowt = collect(1:tr)

    for c in colr
        t = true
        for r in rowt
            if tdf[r, c] >= tcha
                t = false
            end
        end
        if t == true
            merge!(colnames, Dict(names(tdf, c)=> c))
        end
    end
    return colnames
end

Using RCP 4.5 data Check the values for 3, 2, and 1.5 degree increase

In [None]:
tres = thresh(dft45, 3, 2100)
dos = thresh(dft45, 2, 2100)
uno = thresh(dft45, 1.5, 2100)

### Ready the Sealevel Rise Data

Normalize the sealevel data

In [None]:
function norm(df, ns, ne)
    #set boundaries
    colr = collect(2:ncol(df))
    rowr = collect(1:nrow(df))

    #create array list to house all years within the norm period
    normp = []
    #set normalization period
    for r in rowr
        if ns <= df[r,:year] <= ne
            push!(normp, r)
        end
    end
    
    # Set the index and array to solve for normalized value
    sarray = []
    i = 0

    # Add each value to a sum
    for r in normp
        for c in colr
            push!(sarray, df[r, c])
            i += 1
        end
    end

    # Collect the sum
    summ = 0
    for t in collect(1:i)
        summ += sarray[t]
    end
    # Divide by the number of terms(i)
    avg = summ / i
    
    # change the value for each instance
    for r in rowr
        for c in colr
            df[r, c] = (df[r, c] - avg)
        end
    end
    return df
end

Chech with sealeavel rise data

In [None]:
norm(dfg45,1995,2014)

### Get Values and Plot

create function to get values of sealevel rise for columns underneath the threshold

In [None]:
function valu(df, cnames, tyr)
    #set boundaries
    colr = collect(2:ncol(df))
    rowr = collect(1:nrow(df))
    
    vals = [] # this will house the values

    #initiate row
    rw = 0
    #calculate what row we are looking for
    for r in rowr
        if df[r,:year] == tyr
            rw = r
        end
    end

    for i in collect(values(cnames))
        push!(vals, df[rw, i])
    end
    return vals

end

Grab the values for the three thresholds

In [None]:
t3 = Vector{Float64}(valu(dfg45, tres, 2100))
t2 = Vector{Float64}(valu(dfg45, dos, 2100))
t1 = Vector{Float64}(valu(dfg45, uno, 2100))

grab their kde

In [None]:
kd3 = kde(t3)
kd2 = kde(t2)
kd1 = kde(t1)

Plot!!!

In [None]:
p = plot(kd3.x, kd3.density, label = "Thresh 3", fill = (0, 0.5))
plot!(p, kd2.x, kd2.density, label = "Thresh 2", fill = (0, 0.5))
plot!(p, kd1.x, kd1.density, label = "Thresh 1.5", fill = (0, 0.5))# wierd cuz 1.5 has just one value
title!(string("RCP 4.5 Distribution of Sealevel Change: yr 2100"))
xlabel!("Value of sealevel change(m)")
ylabel!("Density")

## CSV with Results

This is where I will pool the results from each RCP scenario for each Threshold(initially 2 and 3 degrees)

In [None]:
#calculate the diff values for threshhold 2 and three
etres = thresh(dft85, 3, 2100)
edos = thresh(dft85, 2, 2100)
stres = thresh(dft60, 3, 2100)
sdos = thresh(dft60, 2, 2100)
ttres = thresh(dft26, 3, 2100)
tdos = thresh(dft26, 2, 2100)
tres # rcp4.5 threshold three
dos # rcp4.5 threshold two

In [None]:
# get values from respective files for GIS