# 高速フーリエ変換(FFT)の正確さ

## 高速フーリエ変換(FFT)とは
高速フーリエ変換(FFT:Fast Fourier Transform)とは、離散フーリエ変換(DFT:Discrete Fourier Transform)を高速に行うアルゴリズムを指す。

### 離散フーリエ変換(DFT)

$\newcommand{\C}{\mathbb{C}}$

データが $N$ 個の離散フーリエ変換を $X\in\C^N$ とした場合、以下のように書くことができる。

$$
\left(\begin{array}{c}
X_{0} \\
X_{1} \\
X_{2} \\
\vdots \\
X_{N-1}
\end{array}\right)=\left(\begin{array}{ccccc}
1 & 1 & 1 & \cdots & 1 \\
1 & e^{-i \frac{2 \pi}{N}} & e^{-i \frac{4 \pi}{N}} & \cdots & e^{-i \frac{2 \pi(N-1)}{N}} \\
1 & e^{-i \frac{4 \pi}{N}} & e^{-i \frac{8 \pi}{N}} & \cdots & e^{-i \frac{4 \pi(N-1)}{N}} \\
\vdots & \vdots & \vdots & \ddots & \vdots \\
1 & e^{-i \frac{2 \pi(N-1)}{N}} & e^{-i \frac{4 \pi(N-1)}{N}} & \cdots & e^{-i \frac{2 \pi(N-1)(N-1)}{N}}
\end{array}\right)\left(\begin{array}{c}
x_{0} \\
x_{1} \\
x_{2} \\
\vdots \\
x_{N-1}
\end{array}\right)
$$

$W^n_N$は回転因子という。
$$
W^n_N = e^{-i \frac{2 \pi n}{N}} = \cos \left(\frac{2 \pi n}{N}\right)-i \sin \left(\frac{2 \pi n}{N}\right)
$$

これを用いると、

$$
\left(\begin{array}{c}
X_{0} \\
X_{1} \\
X_{2} \\
\vdots \\
X_{N-1}
\end{array}\right)=\left(\begin{array}{ccccc}
1 & 1 & 1 & \cdots & 1 \\
1 & W_{N} & W_{N}^{2} & \cdots & W_{N}^{N-1} \\
1 & W_{N}^{2} & W_{N}^{4} & \cdots & W_{N}^{2(N-1)} \\
\vdots & \vdots & \vdots & \ddots & \vdots \\
1 & W_{N}^{(N-1)} & W_{N}^{2(N-1)} & \cdots & W_{N}^{(N-1)(N-1)}
\end{array}\right)\left(\begin{array}{c}
x_{0} \\
x_{1} \\
x_{2} \\
\vdots \\
x_{N-1}
\end{array}\right)
$$

$$
X_{n}=\sum_{k=0}^{N-1} x_{k} W_{N}^{n k}
$$

離散フーリエ変換では、 $X_0$ から $X_{N−1}$ の $N$ 回の離散フーリエ変換を行うために、 $N^2$ 回、複素数の四則演算を行う必要がある。しかし高速フーリエ変換では、この膨大な複素数の計算を$\frac{N}{2}( \log _{2} N-1)$回に減らすことができる。
高速フーリエ変換では、データ数が2の冪乗である必要がある。

$N=4$の時、
$$
\left(\begin{array}{c}
X_{0} \\
X_{1} \\
X_{2} \\
X_{3}
\end{array}\right)=\left(\begin{array}{ccccc}
1 & 1 & 1 &  1 \\
1 & W_{4}^{1} & W_{4}^{2} &  W_{4}^{3} \\
1 & W_{4}^{2} & W_{4}^{4} &  W_{4}^{6} \\
1 & W_{4}^{3} & W_{4}^{6} &  W_{4}^{9} \\
\end{array}\right)\left(\begin{array}{c}
x_{0} \\
x_{1} \\
x_{2} \\
x_{3}
\end{array}\right)
$$

$$
\left(\begin{array}{c}
X_{0} \\
X_{1} \\
X_{2} \\
X_{3}
\end{array}\right)=\left(\begin{array}{ccccc}
1 & 1 & 1 &  1 \\
1 & W_{4}^{1} & -1 &  W_{4}^{3} \\
1 & -1 & 1 & -1 \\
1 & W_{4}^{3} & -1 &  W_{4}^{9} \\
\end{array}\right)\left(\begin{array}{c}
x_{0} \\
x_{1} \\
x_{2} \\
x_{3}
\end{array}\right)
$$

ここで、ビットリバース(データの値の数を2進数にしてビットの0と1を逆にする)を行うと、

$$
\left(\begin{array}{c}
X_{0} \\
X_{2} \\
X_{1} \\
X_{3}
\end{array}\right)=\left(\begin{array}{ccccc}
1 & 1 & 1 &  1 \\
1 & -1 & 1 & -1 \\
1 & W_{4}^{1} & -1 &  W_{4}^{3} \\
1 & W_{4}^{3} & -1 &  W_{4}^{9} \\
\end{array}\right)\left(\begin{array}{c}
x_{0} \\
x_{1} \\
x_{2} \\
x_{3}
\end{array}\right)
$$

$$
\begin{array}{l}
X_{0}=x_{0}+x_{1}+x_{2}+x_{3} \\
X_{2}=x_{0}-x_{1}+x_{2}-x_{3} \\
\\
X_{1}=x_{0}+x_{1} W_{4}^{1}-x_{2}+x_{3} W_{4}^{3} \\
X_{3}=x_{0}+x_{1} W_{4}^{3}-x_{2}+x_{3} W_{4}^{9}=x_{0}+x_{1} W_{4}^{3}-x_{2}+x_{3} W_{4}^{1}
\end{array}
$$

整理すると、

$$
\begin{array}{l}
X_{0}=\left(x_{0}+x_{2}\right)+\left(x_{1}+x_{3}\right) \\
X_{2}=\left(x_{0}+x_{2}\right)-\left(x_{1}+x_{3}\right) \\
\\
X_{1}=\left(x_{0}-x_{2}\right)+W_{4}^{1}\left(x_{1}+x_{3} W_{4}^{2}\right)=W_{4}^{0}\left(x_{0}-x_{2}\right)+W_{4}^{1}\left(x_{1}-x_{3}\right) \\
X_{3}=\left(x_{0}-x_{2}\right)+W_{4}^{1}\left(x_{1} W_{4}^{2}+x_{3}\right)=W_{4}^{0}\left(x_{0}-x_{2}\right)-W_{4}^{1}\left(x_{1}-x_{3}\right)
\end{array}
$$

Juliaでは高速フーリエ変換のライブラリとして、FFTW.jlが準備されている(OCamlで最適なCのコードを自動生成する仕組み)。

In [None]:
using FFTW

In [None]:
?fft

In [None]:
?plan_fft#plan_fftは、最適化されたfft関数（技術的には、プランとfftw_execute_dftのラッパー）を返すだけ?

In [1]:
#Cooley-Tukey FFTアルゴリズム
function generic_fft_pow2!(x::Vector{T}) where T
    n,big2=length(x),2one(T)
    nn,j=n÷2,1
    for i=1:2:n-1
        if j>i
            x[j], x[i] = x[i], x[j]
            x[j+1], x[i+1] = x[i+1], x[j+1]
        end
        m = nn
        while m ≥ 2 && j > m
            j -= m
            m = m÷2
        end
        j += m
    end
    logn = 2
    while logn < n
        θ=-big2/logn
        wtemp = sinpi(θ/2)
        wpr, wpi = -2wtemp^2, sinpi(θ)
        wr, wi = one(T), zero(T)
        for m=1:2:logn-1
            for i=m:2logn:n
                j=i+logn
                mixr, mixi = wr*x[j]-wi*x[j+1], wr*x[j+1]+wi*x[j]
                x[j], x[j+1] = x[i]-mixr, x[i+1]-mixi
                x[i], x[i+1] = x[i]+mixr, x[i+1]+mixi
            end
            wr = (wtemp=wr)*wpr-wi*wpi+wr
            wi = wi*wpr+wtemp*wpi+wi
        end
        logn = logn << 1
    end
    return x
end

function interlace(a::Vector{S},b::Vector{V}) where {S,V}
    na=length(a);nb=length(b)
    T=promote_type(S,V)
    if nb≥na
        ret=zeros(T,2nb)
        ret[1:2:1+2*(na-1)]=a
        ret[2:2:end]=b
        ret
    else
        ret=zeros(T,2na-1)
        ret[1:2:end]=a
        if !isempty(b)
            ret[2:2:2+2*(nb-1)]=b
        end
        ret
    end
end

function generic_fft_pow2(x::Vector{Complex{T}}) where T
    y = interlace(real(x),imag(x))
    generic_fft_pow2!(y)
    return complex.(y[1:2:end],y[2:2:end])
end

generic_fft_pow2(x::Vector{T}) where {T} = generic_fft_pow2(complex(x))

# function generic_fft_pow2(x::Matrix{T}) where {T} 
#     row, col = size(x)
#     for i in 1:col
#         x[:,i] = generic_fft_pow2(complex(x[:,i]))
#     end
#     return x
# end

#Bluesteinのアルゴリズム

function washino_fft(x::Vector{T}) where T
#     T <: FFTW.fftwNumber && (@warn("Using generic fft for FFTW number type."))
    n = length(x)
    ispow2(n) && return generic_fft_pow2(x)
    ks = range(zero(real(T)),stop=n-one(real(T)),length=n)
    Wks = exp.((-im).*convert(T,π).*ks.^2 ./ n)
    xq, wq = x.*Wks, conj([exp(-im*convert(T,π)*n);reverse(Wks);Wks[2:end]])
    return Wks.*conv(xq,wq)[n+1:2n]
end

function conv(u::StridedVector{T}, v::StridedVector{T}) where T
    nu,nv = length(u),length(v)
    n = nu + nv - 1
    np2 = nextpow(2,n)
    append!(u,zeros(T,np2-nu)),append!(v,zeros(T,np2-nv))
    y = generic_ifft_pow2(generic_fft_pow2(u).*generic_fft_pow2(v))
    #TODO This would not handle Dual/ComplexDual numbers correctly
    y = T<:Real ? real(y[1:n]) : y[1:n]
end

function generic_ifft_pow2(x::Vector{Complex{T}}) where T
    y = interlace(real(x),-imag(x))
    generic_fft_pow2!(y)
    return complex.(y[1:2:end],-y[2:2:end])/length(x)
end

generic_ifft_pow2 (generic function with 1 method)

In [2]:
# using BenchmarkTools
using IntervalArithmetic, FFTW
N=2^17
A  = randn(N)
iA = map(Interval, A)
# fft(A_real)
# generic_fft_pow2(A_real)
@time generic_fft_pow2(iA)
@time generic_fft_pow2(iA)

# @time generic_fft_pow2(A)
@time fft(A)
@time fft(A);

  1.845605 seconds (4.00 M allocations: 203.853 MiB, 10.11% gc time)
  0.334384 seconds (503 allocations: 20.019 MiB)
  0.672263 seconds (1.72 M allocations: 92.692 MiB, 4.01% gc time)
  0.002963 seconds (35 allocations: 4.003 MiB)


In [None]:
using IntervalArithmetic
A  = randn(N)
iA = map(Interval, A)
N=2^17
@time washino_fft(A);

In [None]:
using LinearAlgebra
N=2^16
B  = randn(N)
iB = map(Interval, B)
@time a1 = fft(B)
@time a2 = washino_fft(iB);
# norm(a1-a2,Inf)
log2(sum(a1 .∈ a2))

以下Matlabのコードを書き換えたもの
参考ページのバイナリを動かそうと試みたが、複素数？区間？に対応していないのかエラーが。

In [None]:
@time open("test.dat", "w") do file
    for i in 1:150
        write(file, rand(Float64, 360, 640))
    end
end

In [37]:
r_real = Array{Float64}(undef, 2^14, 1)

file = open("INTLAB_CONST.FFTDATA_R_real.bin", "r")
read!(file, r_real)
close(file)

r_imag = Array{Float64}(undef, 2^14, 1)
file = open("INTLAB_CONST.FFTDATA_R_imag.bin", "r")
read!(file, r_imag)
close(file)

r = r_real + im*r_imag

d = Array{Float64}(undef, 14, 1)
file = open("INTLAB_CONST.FFTDATA_D.bin", "r")
read!(file, d)
close(file)

d



14×1 Array{Float64,2}:
 0.0
 6.835808657661924e-17
 6.835808657661924e-17
 6.835808657661924e-17
 6.835808657661924e-17
 6.835808657661924e-17
 6.917784572201795e-17
 7.113686303921852e-17
 7.113686303921852e-17
 7.113686303921852e-17
 7.241394152931138e-17
 7.472101018825856e-17
 7.54316629478651e-17
 7.733500711723886e-17

参考元：https://qiita.com/shachah-svaahaa/items/079b97aa52f8450146fd

In [None]:
n=8
h = (0:n-1)/n
rr = @interval(pi)
open("test_rr.bin", "w") do io
    write(io,n)
    write(io,h)
    write(rr, rr)
end

In [None]:
using Mmap
f = open("test_rr.bin", "r")
n=read(f, Int)
h=read(f,Int)
rr=read(rr,Array{Complex{Interval{Float64}}})
close(f)
h

In [None]:
using Random

# データ行列
n = 2_000_000
m = 100
A = randn(Float64,(n,m))

# 行列を1つのファイルに書き込む
# ヘッダーとして行数と列数を付加
open("test_mat.bin", "w") do io
    write(io, n)
    write(io, m)
    write(io, A)
end

# 行列を列ごとに個別のファイルに書き込む
# ヘッダーとして行数を付加
for i in 1:m
    open("test_mat$i.bin", "w") do io
        write(io, n)
        write(io, A[:,i])
    end
end

In [None]:
using Mmap
@time function test_mmap_mat()
    f = open("test_mat.bin", "r")
    row = read(f, Int)
    col = read(f, Int)
    A = Mmap.mmap(f, Matrix{Float64},(row,col))
    close(f)
    A'*A
    nothing
end

別の読み込み方を考える（JLD2）読み込み書き出しはできた.


In [1]:
using JLD2,FileIO
using IntervalArithmetic

n=8
h = (0:n-1)/n
rr = exp.(im*@interval(pi).*h)
@save "test1.jld2" n
@save "test2.jld2" h
@save "test3.jld2" rr

In [2]:
#@load "test1.jld2"
@load "test2.jld2"
@load "test3.jld2"

1-element Array{Symbol,1}:
 :rr

In [None]:
real(rr)

pakagecomplierを使用するとパッケージの呼び出しが早くなる？（パスを通す必要あり）


In [None]:
using PackageCompiler
create_sysimage(:IntervalArithmetic,sysimage_path="sys_IntervalArithmetic.so")

In [None]:
using IntervalArithmetic
@time exp.(im*@interval(pi).*h) 

以下もとのコード

In [2]:
# using JLD2,FileIO
using IntervalArithmetic

function verifyfft(z,sign=1)
    Z = similar(z)
    if isa(z,Array{T,1} where T)
        n = length(z); col = 1; array1 = true
    else
        n, col = size(z); array1 = false
    end
    
#     @load "test1.jld2"
    
    if n==1
        if col==1
            Z = map(interval,z)
            return Z
        else
            isrow_ = true
            z = z[:]
            n = col
            col = 1
        end
    else
        isrow_ = false
    end
    
    log2n = Int(round(log2(n))) #check dimension
    
    if 2^log2n ≠ n #2の倍数でない場合はエラー表示
        error("length must be power of 2")
    end
    
    #bit-reversal(ビットリバース)
    f = 2^(log2n-1)
    v = [0;f]
    for k = 1:log2n-1
        f = 0.5*f
        v = append!(v,f.+v)
    end
    
    z2 = zeros(n,col)

    if isa(z[1],Interval)
        z2 = map(Interval,z2)
    end
    
    #zを入れ替え
    for j = 1: n
        z2[j,:] = z[v[j]+1,:]
    end

    
    #Danielson-Lanczos algorithm
    Z = complex(map(interval,z2))
    Index = reshape([1:n*col;],n,col)
        
    h = (0:n-1)/n # division exact because n is power of 2
#     @load "test2.jld2"
#     @load "test3.jld2"
    ##############
    rr = exp.(im*@interval(pi).*h) # SLOW (INTLAB uses table is better...)   
    ##############
    if sign==-1
      rr = adjoint.(rr)      
    end
    
    v = [1:2:n;]
    w = [2:2:n;]
    t = Z[w,:]
    Z[w,:]  = Z[v,:] - t
    Z[v,:]  = Z[v,:] + t
    
    @time for index　in 1: (log2n-1)    
        m = 2^index
        m2 = 2*m
        vw = reshape([1:n;],m2,Int(n/m2))
        v = vw[1: m, :]
        w = vw[m+1: m2, : ]

        indexv = reshape(Index[v[:],:],m,Int(col*n/m2))
        indexw = reshape(Index[w[:],:],m,Int(col*n/m2))
    
        rr1 = repeat(rr[1:Int(n/m):end],outer=[1,Int(col*n/m2)])
        

        t = rr1 .*  Z[indexw]
        
        Z[indexw] = Z[indexv] - t #原因部分？
        Z[indexv] = Z[indexv] + t
end

    reverse(Z[2:end,:],dims=2)

     if sign==-1
        Z = Z/n
    end
    
    if isrow_
        Z = transpose(Z)　#転置
    end
    
    if array1
        Z = Z[:,1]
    end
    
    return Z
end

# real(verifyfft(ones(8,1),1))

verifyfft (generic function with 2 methods)

In [3]:
z = map(Interval,ones(8))
@show z = real(verifyfft(z))
# n = length(z)
# @show real(generic_fft_pow2(ones(8,1)))
# isa(z[1],Interval)

  0.584999 seconds (2.13 M allocations: 110.916 MiB, 3.78% gc time)
z = real(verifyfft(z)) = Interval{Float64}[[8, 8], [-0, 0], [-0, 0], [-0, 0], [-0, 0], [-0, 0], [-0, 0], [-0, 0]]


8-element Array{Interval{Float64},1}:
  [8, 8]
 [-0, 0]
 [-0, 0]
 [-0, 0]
 [-0, 0]
 [-0, 0]
 [-0, 0]
 [-0, 0]

In [5]:
using IntervalArithmetic, FFTW
N=2^13
A  = randn(N)
iA = map(Interval, A)

@time z1 = verifyfft(A)

# @time generic_fft_pow2(iA)
@time z2 = generic_fft_pow2(iA)

# @time verifyfft(A)
# @time verifyfft(A)

# @time fft(A)
# @time fft(A);


  0.026686 seconds (21.04 k allocations: 16.168 MiB, 24.66% gc time)
  0.080505 seconds (783.16 k allocations: 52.490 MiB, 18.17% gc time)


LoadError: UndefVarError: generic_fft_pow2 not defined

In [52]:
import IntervalArithmetic: mag, radius, mid
# function mag(v::Complex{Interval{T}}) where T# mag function for complex interval vectors
#     abs_v = abs(v);
#     return max(abs_v.lo,abs_v.hi)
# end

function radius(v::Complex{Interval{T}}) where T# mag function for complex interval vectors
    return sqrt(interval(radius(real(v)))^2 + interval(radius(imag(v)))^2)
end

# function mid(v::Complex{Interval{T}}) where T# mag function for complex interval vectors
#     return mid(real(v)) + mid(imag(v))*im
# end
maximum(radius.(z1)), maximum(radius.(z2))

([1.45678e-09, 1.45679e-09], [4.59469e-08, 4.5947e-08])

(メモ)虚数部分を含めた計算がうまく回っていない。下のコードはt(実数部分)のみで回したもの。

In [46]:
using IntervalArithmetic

function verifyfft(z,sign)
    Z = similar(z)
    n = size(z)[1]
    col = size(z)[2]
    
    if n==1
        if col==1
            Z = map(interval,z)
            return Z
        else
            z = z[:]
            n = col
            col = 1
        end
    end
    
    log2n = Int(round(log2(n))) #check dimension
    
    if 2^log2n ≠ n #2の倍数でない場合はエラー表示
        error("length must be power of 2")
    end
    
    #bit-reversal(ビットリバース)
    f = 2^(log2n-1)
    v = [0;f]
    for k = 1:log2n-1
        f = 0.5*f
        v = append!(v,f.+v)
    end
    
    z2 = zeros(n,col)
    
    #zを入れ替え
    for j = 1: n
        z2[j,:]=z[v[j]+1,:]
    end
    
    #Danielson-Lanczos algorithm
    Z = map(interval,z2)
    Index = reshape([1:n*col;],n,col)
        
    h = (0:n-1)/n # division exact because n is power of 2
    rr = exp.(im*@interval(pi).*h)    
    
    v = [1:2:n;]
    w = [2:2:n;]
    t = Z[w,:]
    Z[w,:]  = Z[v,:] - t
    Z[v,:]  = Z[v,:] + t
    
    for index　in 1: (log2n-1)    
        m = 2^index
        m2 = 2*m
        vw = reshape([1:n;],m2,Int(n/m2))
        v = vw[1: m, :]
        w = vw[m+1: m2, : ]

        indexv = reshape(Index[v[:],:],m,Int(col*n/m2))
        indexw = reshape(Index[w[:],:],m,Int(col*n/m2))
    
        rr1 = rr[1:Int(n/m):end]
        
        t = rr1 .*  Z[indexw]

        Z[indexw] = Z[indexv] - real(t)#実数
        Z[indexv] = Z[indexv] + real(t)#実数
end

    reverse(Z[2:end,:],dims=2)
    
     if sign==-1
        Z = Z/n
    end
    
    if col ==1
        Z = transpose(Z)　#転置
    end
    
  return Z
end

verifyfft (generic function with 1 method)

In [39]:
verifyfft(ones(8,8),1)#matlabと同じ結果(im部分が出せていない)

8×8 Array{Interval{Float64},2}:
  [8, 8]   [8, 8]   [8, 8]   [8, 8]   [8, 8]   [8, 8]   [8, 8]   [8, 8]
 [-0, 0]  [-0, 0]  [-0, 0]  [-0, 0]  [-0, 0]  [-0, 0]  [-0, 0]  [-0, 0]
 [-0, 0]  [-0, 0]  [-0, 0]  [-0, 0]  [-0, 0]  [-0, 0]  [-0, 0]  [-0, 0]
 [-0, 0]  [-0, 0]  [-0, 0]  [-0, 0]  [-0, 0]  [-0, 0]  [-0, 0]  [-0, 0]
 [-0, 0]  [-0, 0]  [-0, 0]  [-0, 0]  [-0, 0]  [-0, 0]  [-0, 0]  [-0, 0]
 [-0, 0]  [-0, 0]  [-0, 0]  [-0, 0]  [-0, 0]  [-0, 0]  [-0, 0]  [-0, 0]
 [-0, 0]  [-0, 0]  [-0, 0]  [-0, 0]  [-0, 0]  [-0, 0]  [-0, 0]  [-0, 0]
 [-0, 0]  [-0, 0]  [-0, 0]  [-0, 0]  [-0, 0]  [-0, 0]  [-0, 0]  [-0, 0]

In [40]:
verifyfft(reshape([1:64;],8,8),1)#微妙に違う

8×8 Array{Interval{Float64},2}:
 [36, 36]                   …  [484, 484]
      [-6.82843, -6.82842]            [-6.82843, -6.82842]
      [-4.00001, -3.99999]            [-4.00001, -3.99999]
      [-1.17158, -1.17157]            [-1.17158, -1.17157]
 [-4, -4]                        [-4, -4]
      [-1.17158, -1.17157]  …         [-1.17158, -1.17157]
      [-4.00001, -3.99999]            [-4.00001, -3.99999]
      [-6.82843, -6.82842]            [-6.82843, -6.82842]

In [41]:
fft(reshape([1:64;],8,8))

8×8 Array{Complex{Float64},2}:
 2080.0+0.0im      -256.0+618.039im  …  -256.0-256.0im  -256.0-618.039im
  -32.0+77.2548im     0.0+0.0im            0.0+0.0im       0.0+0.0im
  -32.0+32.0im        0.0+0.0im            0.0+0.0im       0.0+0.0im
  -32.0+13.2548im     0.0+0.0im            0.0+0.0im       0.0+0.0im
  -32.0+0.0im         0.0+0.0im            0.0+0.0im       0.0+0.0im
  -32.0-13.2548im     0.0+0.0im      …     0.0+0.0im       0.0+0.0im
  -32.0-32.0im        0.0+0.0im            0.0+0.0im       0.0+0.0im
  -32.0-77.2548im     0.0+0.0im            0.0+0.0im       0.0+0.0im

In [None]:
using IntervalArithmetic
# compute roots of unity
n = 2^4
h = (0:n-1)/n # division exact because n is power of 2
rr = exp.(im*@interval(pi).*h)

real(rr)

In [None]:
@interval(π)

In [12]:
n = 10; a = randn(n)+im*randn(n)
@show a

adjoint.(a)

a = Complex{Float64}[-1.3193507862363747 - 1.0561787397093225im, 0.2675739616614188 - 0.9043457440533725im, 0.18442910969097717 - 1.1387028686607847im, -0.16185124946497928 - 0.3591540125900511im, -1.7267308118821216 - 0.1347789239363774im, -0.2127236121050849 + 0.7707947889483675im, -0.09413913807605831 + 0.38078987799209835im, 0.7670255459342742 - 0.3220612429220512im, 0.023011666195862986 + 0.9155321276293177im, -1.6623981244765729 - 1.072573028080451im]


10-element Array{Complex{Float64},1}:
  -1.3193507862363747 + 1.0561787397093225im
   0.2675739616614188 + 0.9043457440533725im
  0.18442910969097717 + 1.1387028686607847im
 -0.16185124946497928 + 0.3591540125900511im
  -1.7267308118821216 + 0.1347789239363774im
  -0.2127236121050849 - 0.7707947889483675im
 -0.09413913807605831 - 0.38078987799209835im
   0.7670255459342742 + 0.3220612429220512im
 0.023011666195862986 - 0.9155321276293177im
  -1.6623981244765729 + 1.072573028080451im

In [None]:
N=2^11
C_real = rand(N, N)
FFT = fft(C_real);

In [None]:
@benchmark D = FFT * C_real

In [None]:
using FFTW

len = 5
x = [2pi*k/len for k = 0:len-1]
cos_x = cos.(x)
println(fft(cos_x))

In [None]:
using FFTW

len=5
x = [2pi*k/len for k = 0:len-1]
sin_x = sin.(x)
println(fft(sin_x))

### 早く計算するには
- 並列処理の設定を行う。export JULIA_NUM_THREADS=8などと、スレッド数を指定。
- planfftを利用し、inplace(計算機科学においてデータ構造の変換を行うにあたって、追加の記憶領域をほとんど使わずに行うアルゴリズム)を行う。
- Juiaでも、MATLABと同じくMLKを使用する。

下のコードはMATLABのverifyfftのコード

function Z = verifyfft(z,sign)
%VERIFYFFT    Verified forward and backward 1-dimensional FFT
%
%   res = verifyfft(z,sign)
%
%   z     input vector or matrix
%         length of z must be a power of 2 
%   sign   1 forward FFT (default)
%         -1 inverse FFT
% 
%As in Matlab, the inverse FFT is scaled such that forward and inverse FFT
%are inverse operations.
%For matrix input, FFT is performed on each column; row vector input
%is converted into column vector. 
%For N-dimensional FFT apply verifyfft N times.
% 

% written  09/24/14     S.M. Rump  (based no Marcio Gameiro's code)
% modified 01/16/16     S.M. Rump  improved error estimates
%

% data generated by fft_data_gen
%

  global INTLAB_CONST
  
  [n,col] = size(z);
  if n==1
    if col==1
      Z = intval(z);
      return
    else
      isrow = 1;
      z = z(:);
      n = col;
      col = 1;
    end
  else
    isrow = 0;
  end
    
  if nargin==1
    sign = 1;       % default: forward
  end
  
  % check dimension
  log2n = round(log2(n));
  if 2^log2n~=n
    error('length must be power of 2')
  end
  
  % bit-reversal
  % v = bin2dec(fliplr(dec2bin(0:n-1,log2n))) + 1
  f = 2^(log2n-1);
  v = [0;f]; 
  for k=1:log2n-1
    f = 0.5*f;
    v = [ v ; f+v ];
  end
  z = z(v+1,:);
  
  % Danielson-Lanczos algorithm
  Z = intval(z);
  Index = reshape(1:n*col,n,col);
  nmax = INTLAB_CONST.FFTDATA_NMAX; % maximum in fft_data
  if n<=nmax
    r = INTLAB_CONST.FFTDATA_R;     % roots of unity in  r +/- d
    d = INTLAB_CONST.FFTDATA_D(log2n);
    Phi = midrad(r(1:nmax/n:nmax),d);
    if sign==-1
      Phi = (Phi.')';      
    end
  else
    % compute roots of unity, division exact because n is power of 2
    theta = intval('pi') * ( sign*(0:(n-1))'/n ); 
    Phi = cos(theta) + 1i*sin(theta);
  end
  v = 1:2:n;
  w = 2:2:n;
  t = Z(w,:);
  Z(w,:) = Z(v,:) - t;
  Z(v,:) = Z(v,:) + t;
  
  for index=1:(log2n-1)     % Executed log2(n) times
    m = 2^index;
    m2 = 2*m;
    vw = reshape(1:n,m2,n/m2);
    v = vw(1:m,:);
    w = vw(m+1:m2,:);
%     t = bsxfun(@times,exp(1i*pi*(0:m-1)'/m),Z(w));  % doesn't work for intervals
%     theta = intval('pi') * (sign*(0:(m-1))'/m);     % division exact because m=2^p
%     t = exp(1i*theta) .* Z(w);
    indexv = reshape(Index(v(:),:),m,col*n/m2);
    indexw = reshape(Index(w(:),:),m,col*n/m2);
%     t = repmat(Phi(1:n/m:end),1,n/m2*col);
    t = Phi(1:n/m:end,ones(1,n/m2*col)) .* Z(indexw);   % Tony's trick
    Z(indexw) = Z(indexv) - t;
    Z(indexv) = Z(indexv) + t;
  end
  
  Z = [Z(1,:); flipud(Z(2:end,:))];
  if sign==-1
    Z = Z/n;        % error-free since n is a power of 2
  end
  
  if isrow          % change to row vector
    Z = transpose(Z);
  end
  
end


参考に読んでいるサイト

https://qiita.com/ageprocpp/items/0d63d4ed80de4a35fe79

https://cognicull.com/ja/f5q2jl62

・バタフライの説明
http://ysmr-ry.hatenablog.com/entry/2017/11/09/102008

https://wagtail.cds.tohoku.ac.jp/coda/python/p-8-function-part2-sup-fft.html



もしも2D fftが必要ならば`verifyfft(verifyfft(A,1).',1).'`を実装する