In [1]:
using LinearAlgebra
using Plots
import Base: getproperty, \, show

In [2]:

struct LU_Fac{T<:Real}
    lu::Matrix{T}
    p::Array{Int, 1}
end


function getproperty(F::LU_Fac, d::Symbol)
    
    if d === :L
        return UnitLowerTriangular(F.lu)
    elseif d === :U
        return UpperTriangular(F.lu)
    else
        getfield(F, d)
    end
end


function propertynames(F::LU_Fac, private::Bool=false)
    properties = (:L, :U)
    if private
        return (fieldnames(typeof(F))..., properties...)
    else
        return properties
    end
end

function show(io::IO, mime::MIME{Symbol("text/plain")}, F::LU_Fac)
    print(io, "L = ")
    show(io, mime, F.L)
    print(io, "\n\nU = ")
    show(io, mime, F.U)
end


show (generic function with 346 methods)

In [3]:

function lu_factorization!(A::AbstractMatrix{T}) where T <: AbstractFloat
    n = min(size(A)...)
    piv = collect(1:n)
    
    @inbounds begin
        for k = 1:n-1
            # find pivot element
            pivot = k
            max_elem = abs(A[pivot, k])
            for j = k+1:n
                row_elem = abs(A[j, k])
                if row_elem > max_elem
                    max_elem = row_elem
                    pivot = j
                end
            end
            # piv[k] = pivot
            if k != pivot
                piv[k], piv[pivot] = piv[pivot], piv[k]

                # change rows
                for j = 1:n
                    A[pivot, j], A[k, j] = A[k, j], A[pivot, j]
                end
            end
                 
            Akkinv = inv(A[k,k])
            l = @view(A[k+1:n,k])
            BLAS.scal!(length(l), Akkinv, l, 1)
            @views BLAS.ger!(-1.0, A[k+1:n,k], A[k,k+1:n], A[k+1:n,k+1:n])
        end
    end
    return LU_Fac{T}(
        A,
        piv
    )
end


lu_factorization(A) = lu_factorization!(copy(A))


lu_factorization (generic function with 1 method)

In [4]:


swap_rows(b::AbstractArray, piv::AbstractArray{<:Integer}) = b[piv]


swap_rows(b::AbstractMatrix, piv::AbstractArray{<:Integer}) = b[piv, :]


function _solve!(b::AbstractArray{T}, F::LU_Fac{T}) where T<:AbstractFloat
    BLAS.trsv!('L', 'N', 'U', F.lu, bp)
    BLAS.trsv!('U', 'N', 'N', F.lu, bp)
end


function _solve!(B::AbstractMatrix{T}, F::LU_Fac{T}) where T<:AbstractFloat
    BLAS.trsm!('L', 'L', 'N', 'U', 1.0, F.lu, B)
    BLAS.trsm!('L', 'U', 'N', 'N', 1.0, F.lu, B)
end


function lu_solve!(b::AbstractArray{T}, A::AbstractMatrix{T}) where T<:AbstractFloat
    lu = lu_factorization(A)
    bp = swap_rows(b, lu.p)
    _solve!(bp, lu)
end

function lu_solve!(b::AbstractArray{T}, lu::LU_Fac{T}) where T<:AbstractFloat
    bp = swap_rows(b, lu.p)
    _solve!(bp, lu)
end


\(A::LU_Fac, b::AbstractArray) = lu_solve(A, b);

In [16]:
A = [1 2 3 4 
     2 3 4 2
     1 1 -1 2
     0 2 1/3 3]

n = 1000
A = rand(n,n)
b = A * rand(n, n)
F = lu_factorization(A)
@time lu_solve!(copy(b), F)
F = lu(A)
@time F\b

  0.101401 seconds (10 allocations: 15.259 MiB, 0.76% gc time)
  0.103663 seconds (6 allocations: 7.630 MiB)


1000×1000 Array{Float64,2}:
 0.368663   0.150104   0.51081    …  0.496645   0.961989     0.77178  
 0.610571   0.811913   0.190196      0.114175   0.556954     0.327096 
 0.174942   0.949109   0.862564      0.174088   0.415398     0.130219 
 0.707539   0.130757   0.520073      0.394706   0.93171      0.0820572
 0.356156   0.606608   0.947434      0.825168   0.729873     0.186386 
 0.607635   0.512997   0.0671269  …  0.457738   0.0792677    0.54433  
 0.136308   0.599526   0.378024      0.356085   0.645454     0.613546 
 0.443773   0.970924   0.413325      0.567684   0.185507     0.432858 
 0.263105   0.275157   0.0592219     0.337839   0.149956     0.134922 
 0.0512851  0.343983   0.979091      0.631763   0.943798     0.263488 
 0.69728    0.971455   0.12068    …  0.0850632  0.0704903    0.252655 
 0.07222    0.439294   0.27807       0.61263    0.000916924  0.235852 
 0.358008   0.734605   0.605562      0.1891     0.0891402    0.240829 
 ⋮                                ⋱              

In [14]:
BLAS.set_num_threads(1)

In [22]:
A = rand(1000, 1000)
@time B = view(A, :, :)
@time A[1:100, 1:100]
@time B[1:100, 1:100]

  0.000023 seconds (7 allocations: 272 bytes)
  0.000067 seconds (8 allocations: 78.422 KiB)
  0.000055 seconds (8 allocations: 78.422 KiB)


100×100 Array{Float64,2}:
 0.546926   0.0147237  0.490005    …  0.659245   0.316036    0.472518 
 0.455999   0.769551   0.00761391     0.36143    0.904167    0.380178 
 0.978017   0.5249     0.555239       0.97144    0.937953    0.512563 
 0.580846   0.966916   0.291514       0.109487   0.688104    0.136585 
 0.969909   0.969459   0.740523       0.750824   0.348495    0.250818 
 0.871822   0.268373   0.802101    …  0.517787   0.00760553  0.0463582
 0.837971   0.377483   0.150536       0.598176   0.724203    0.0976341
 0.0460986  0.143948   0.643429       0.0223442  0.0219099   0.644593 
 0.182493   0.0314742  0.240363       0.667885   0.612291    0.439095 
 0.41424    0.544811   0.0396322      0.591162   0.785757    0.567989 
 0.60763    0.179467   0.0685132   …  0.722455   0.704469    0.600823 
 0.439479   0.348713   0.587737       0.486849   0.680098    0.892254 
 0.691046   0.50952    0.0203879      0.338941   0.929016    0.401539 
 ⋮                                 ⋱               