# Pairwise FST

In [None]:
using Pkg ; Pkg.activate(".")
Pkg.instantiate()
using PopGen
using DataFrames
using CSV
using MultipleTesting
ENV["COLUMNS"] = 200

## Load the data

In [3]:
bft = PopGen.read("../inputfiles/bft.kinless.gen", silent = true)

PopData{Diploid, 2139 SNP loci}
  Samples: 324
  Populations: 9

Rename populations, since genepop files don't preserve population name

In [4]:
popdict = Dict(
    "1" => "BRZ",
    "2" => "BRZSP",
    "3" => "KEY",
    "4" => "MRT",
    "5" => "PNS",
    "6" => "PR",
    "7" => "SCA",
    "8" => "TX",
    "9" => "VZ",
)
populations!(bft, popdict)
populations(bft, counts = true)




Unnamed: 0_level_0,population,count
Unnamed: 0_level_1,String,Int64
1,BRZ,23
2,BRZSP,14
3,KEY,55
4,MRT,40
5,PNS,30
6,PR,38
7,SCA,51
8,TX,28
9,VZ,45


## Summary Information

In [5]:
summarystats(bft)

Unnamed: 0_level_0,Het_obs,HS,HT,DST,HT′,DST′,FST,FST′,FIS,DEST
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64
1,0.0787,0.083,0.0833,0.0002,0.0833,0.0002,0.0025,0.0028,0.0523,0.0003


## Hudson pairwise FST

Calculate pairwise FST without significance testing

In [6]:
pairwisefst(bft, method = "Hudson").results

Unnamed: 0_level_0,BRZ,BRZSP,KEY,MRT,PNS,PR,SCA,TX,VZ
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.00322162,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.00275731,0.00491408,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0033492,0.00421018,0.00142712,0.0,0.0,0.0,0.0,0.0,0.0
5,0.00208144,0.00171124,0.0016122,0.00110864,0.0,0.0,0.0,0.0,0.0
6,0.00164961,0.00380532,0.00174722,0.00236584,0.00158027,0.0,0.0,0.0,0.0
7,0.00183854,0.00400323,0.00131807,0.00179355,0.00109236,0.00172863,0.0,0.0,0.0
8,0.00512836,0.00616829,0.00452151,0.00222685,0.00419737,0.00500045,0.00367676,0.0,0.0
9,0.00194382,0.003087,0.00125401,0.0013698,0.000711107,0.00109614,0.00080597,0.00391112,0.0


Calculate pairwise FST with significance testing

In [None]:
pfst = pairwisefst(bft, method = "Hudson", iterations = 999)

In [8]:
CSV.write("hudson.fst", pfst.results)
pfst.results

Unnamed: 0_level_0,BRZ,BRZSP,KEY,MRT,PNS,PR,SCA,TX,VZ
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64
1,0.0,0.021021,0.002002,0.001001,0.018018,0.254254,0.147147,0.001001,0.0790791
2,0.00322162,0.0,0.002002,0.013013,0.465465,0.033033,0.035035,0.002002,0.146146
3,0.00275731,0.00491408,0.0,0.014014,0.024024,0.00600601,0.00800801,0.001001,0.013013
4,0.0033492,0.00421018,0.00142712,0.0,0.179179,0.001001,0.003003,0.013013,0.025025
5,0.00208144,0.00171124,0.0016122,0.00110864,0.0,0.038038,0.288288,0.001001,0.544545
6,0.00164961,0.00380532,0.00174722,0.00236584,0.00158027,0.0,0.00700701,0.001001,0.148148
7,0.00183854,0.00400323,0.00131807,0.00179355,0.00109236,0.00172863,0.0,0.001001,0.201201
8,0.00512836,0.00616829,0.00452151,0.00222685,0.00419737,0.00500045,0.00367676,0.0,0.001001
9,0.00194382,0.003087,0.00125401,0.0013698,0.000711107,0.00109614,0.00080597,0.00391112,0.0


Function pulled from `PopGenCore.jl` to isolate the top triangle of the output matrix for P-value correction

In [10]:
function partitionarray(array::AbstractArray, steps::AbstractVector{<:Integer})
    v = axes(array,1)
    v == 1:sum(steps) || error("Steps provided do not sum to length of the first dimension")
    i = firstindex(v)
    tmp = (view(v, i:(i+=s)-1) for s in steps)
    [view(array,r,:) for r in tmp]
end

partitionarray (generic function with 1 method)

Function to perform a Benjamini-Hochberg FDR correction on the P-values in the upper triangle

In [11]:
function adjustpval(fstval::DataFrame)
    fst = deepcopy(fstval)
    rows = size(fst,1)
    pval = mapreduce(vcat, 1:rows-1) do i
        collect(fst[i,i+1:end])
    end
    posthoc = round.(adjust(pval, BenjaminiHochberg()),digits = 4)
    splitpart = partitionarray(posthoc, reverse(collect(1:(rows-1)))) .|> collect
    for i in 1:(rows-1)
        fst[i, (i+1):end] .= splitpart[i][:,1]
    end
    insertcols!(fst, 1, :pop => names(fst))
    return fst
end

adjustpval (generic function with 1 method)

## Correct the P-values for multiple testing

In [12]:
adjusted_pfst = adjustpval(pfst.results)

Unnamed: 0_level_0,pop,BRZ,BRZSP,KEY,MRT,PNS,PR,SCA,TX,VZ
Unnamed: 0_level_1,String,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64
1,BRZ,0.0,0.036,0.0066,0.0045,0.0324,0.2774,0.1778,0.0045,0.1054
2,BRZSP,0.00322162,0.0,0.0066,0.026,0.4788,0.0495,0.0505,0.0066,0.1778
3,KEY,0.00275731,0.00491408,0.0,0.0266,0.0392,0.0166,0.0192,0.0045,0.026
4,MRT,0.0033492,0.00421018,0.00142712,0.0,0.2081,0.0045,0.009,0.026,0.0392
5,PNS,0.00208144,0.00171124,0.0016122,0.00110864,0.0,0.0527,0.3052,0.0045,0.5445
6,PR,0.00164961,0.00380532,0.00174722,0.00236584,0.00158027,0.0,0.018,0.0045,0.1778
7,SCA,0.00183854,0.00400323,0.00131807,0.00179355,0.00109236,0.00172863,0.0,0.0045,0.2264
8,TX,0.00512836,0.00616829,0.00452151,0.00222685,0.00419737,0.00500045,0.00367676,0.0,0.0045
9,VZ,0.00194382,0.003087,0.00125401,0.0013698,0.000711107,0.00109614,0.00080597,0.00391112,0.0


In [13]:
CSV.write("hudson.fdr.fst", adjusted_pfst)

"hudson.fdr.fst"