In [None]:
using LowRankApprox

using DataFrames

data = readtable("/Users/yosikim/Desktop/done/hw5/real.txt", separator  = ' ');
x = Array{Float64,1}(data[6]); s = Array{Float64,1}(data[7]);

function autoselectmixsd(x::Array{Float64,1},
                         s::Array{Float64,1} = ones(size(x));
                         gridmult::Float64 = 1.4,
                         nv::Int = 0)

  # Get the number of samples.
  n = length(x);
    
  # Check input "s"---it should be the same length.
  if length(s) != n
    throw(ArgumentError("Arguments \"x\" and \"s\" should have the same" *
                        "length"))
  elseif any(s .<= 0)
    throw(ArgumentError("All elements of \"s\" should be positive"))
  end
      
  # Check input "gridmult".
  if gridmult < 0
    throw(ArgumentError("Input \"gridmult\" should be non-negative"))
  end

  # Check input "nv".
  if !(nv == 0 || nv > 1)
    throw(ArgumentError("Input \"nv\" should be 0, or greater than 1"))
  end
    
  # Determine the minimum and maximum sigma settings.
  smin = sqrt(minimum(s.^2))/10;
  if all(x.^2 .<= s.^2)

    # This deals with the occasional boundary case.
    smax = 10 * smin; 
  else

    # This is, roughly, the largest value you'd want to use.
    smax = 2 * sqrt(maximum(x.^2 - s.^2)); 
  end
    
  # Choose the grid of sigmas.
  if nv > 0
    return vcat(0,logspace(log10(smin),log10(smax),nv - 1));
  elseif gridmult == 0
    return vcat(0,smax)
  else 
    m = ceil(Int,log2(smax/smin)/log2(gridmult));
    return vcat(0,logspace(log10(smin),log10(smax),m + 1));
  end
end

# Compute the n x k conditional likelihood matrix, where n is the
# number of samples and k is the number of mixture components, for the
# case when the likelihood is univariate normal and prior is a mixture
# of univariate normals.
#
# Entry (i,j) of the conditional likelihood matrix is equal to
# N(0,s[i]^2 + sd[j]^2), the normal density with zero mean and
# variance s[i]^2 + sd[j]^2.
#
# If normalize = true, each row of the likelihood matrix is divided by
# the largest value in the row. After normalization, the largest value
# in each row is 1.
function normlikmatrix(x::Array{Float64,1},
                       s::Array{Float64,1} = ones(size(x));
                       sd::Array{Float64,1} = autoselectmixsd(x,s),
                       normalizerows::Bool = true)

  # Get the number of samples (n) and the number of prior mixture
  # components (k).
  n = length(x);
  k = length(sd);
    
  # Check input "s"---it should be the same length
  if length(s) != n
    throw(ArgumentError("Arguments \"x\" and \"s\" should have the same" *
                        "length"))
  elseif any(s .<= 0)
    throw(ArgumentError("All elements of \"s\" should be positive"))
  end

  # Check input "sd".
  if any(sd .< 0)
    throw(ArgumentError("All elements of \"sd\" should be non-negative"))
  end
    
  # Compute the n x k matrix of standard deviations.
  S = sqrt.((s.^2) .+ (sd.^2)');

  # Compute the log-densities, and normalize the rows, if requested.
  L = -(x./S).^2/2 - log.(S) - log(2*pi)/2;
  if normalizerows

    # This is the same as
    #
    #   L = L - repmat(maximum(L,2),1,k);
    #
    # but uses memory more efficiently to complete the operation.
    L = broadcast(-,L,maximum(L,2));
  end
  return exp.(L)
end

function mixSQP_record_runtime(L; eps=1e-8, tol=1e-8, pqrtol = 1e-10, sptol=1e-3, lowrank = "svd")
  n = size(L,1); k = size(L,2);

  if lowrank == "qr"
      F = pqrfact(L, rtol=pqrtol);
      P = sparse(F[:P]);
  elseif lowrank == "svd"
      F = psvdfact(L, rtol=pqrtol);
      S = Diagonal(F[:S]);
  else
  end
    
  iter = 100;
  # initialize
  x = ones(k)/k;

  # QP subproblem start
  for i = 1:iter
    # gradient and Hessian computation -- Rank reduction method
    if lowrank == "qr"
        D = 1./(F[:Q]*(F[:R]*(P'*x)) + eps);
        g = -P * F[:R]' * (F[:Q]'*D)/n;
        H = P * F[:R]' * (F[:Q]'*Diagonal(D.^2)*F[:Q]) * F[:R] * P'/n + eps * eye(k);
    elseif lowrank == "svd"
        D = 1./(F[:U]*(S*(F[:Vt]*x)) + eps);
        g = -F[:Vt]'*(S * (F[:U]'*D))/n;
        H = (F[:V]*S*(F[:U]'*Diagonal(D.^2)*F[:U])* S*F[:Vt])/n + eps * eye(k);
    else
        D = 1./(L*x + eps);
        g = -L'*D/n;
        H = L'*Diagonal(D.^2)*L/n + eps * eye(k);
    end
        
    # initialize
    ind = find(x .> sptol);
    y = sparse(zeros(k)); y[ind] = 1/length(ind);

    # Active set method start
    for j = 1:100
      # define smaller problem
      s = length(ind);
      H_s = H[ind,ind];
      d = H * y + 2 * g + 1;
      d_s = d[ind];

      # solve smaller problem
      p = sparse(zeros(k));
      p_s = -H_s\d_s; p[ind] = p_s;

      # convergence check
      if norm(p_s) < tol
        ## Compute the Lagrange multiplier.
        lambda = d;
        if all(lambda .>= -tol)
          break;
        elseif length(ind) < k
            
          # TO DO: Explain what ind and ind_min are for.
          notind  = setdiff(1:k,ind);
          ind_min = notind[findmin(lambda[notind])[2]];
          ind     = sort([ind; ind_min]);
        end

      # do update otherwise
      else
        # retain feasibility
        alpha = 1;
        alpha_temp = -y[ind]./p_s;
        ind_block = find(p_s .< 0);
        alpha_temp = alpha_temp[ind_block];
        if ~isempty(ind_block)
          temp = findmin(alpha_temp);
          if temp[1] < 1
            ind_block = ind[ind_block[temp[2]]]; # blocking constraint
            alpha = temp[1];
            # update working set -- if there is a blocking constraint
            deleteat!(ind, find(ind - ind_block .== 0));
          end
        end
        # update
        y = y + alpha * p;
      end
    end
        
    # Perform backtracking line search
    for t = 1:10
        if lowrank == "qr"
            D_new = 1./(F[:Q]*(F[:R]*(P'*y)) + eps);
        elseif lowrank == "svd"
            D_new = 1./(F[:U]*(S*(F[:Vt]*y)) + eps);
        else
            D_new = 1./(L*x + eps);
        end
        if sum(log.(D)) - sum(log.(D_new)) > sum((x-y) .* g) / 2
            break;
        end
        y = (y-x)/2 + x;
    end
        
    # Update the solution to the original optimization problem.
    x = y;

    # convergence check
    if minimum(g + 1) >= -tol
      break;
    end
  end
  x[x .< sptol] = 0;
    
  return full(x/sum(x))
end


L = normlikmatrix(x,s,sd = autoselectmixsd(x,s,nv = 800));

In [31]:
function mixSQP_record_runtime(L; eps=1e-8, tol=1e-8, pqrtol = 1e-10, sptol=1e-3, lowrank = "svd")
  n = size(L,1); k = size(L,2);

  if lowrank == "qr"
      F = pqrfact(L, rtol=pqrtol);
      P = sparse(F[:P]);
  elseif lowrank == "svd"
      F = psvdfact(L, rtol=pqrtol);
      S = Diagonal(F[:S]);
  else
  end
    
  iter = 100;
  # initialize
  x = ones(k)/k;

  # QP subproblem start
  for i = 1:iter
    # gradient and Hessian computation -- Rank reduction method
    if lowrank == "qr"
        D = 1./(F[:Q]*(F[:R]*(P'*x)) + eps);
        g = -P * F[:R]' * (F[:Q]'*D)/n;
        H = P * F[:R]' * (F[:Q]'*Diagonal(D.^2)*F[:Q]) * F[:R] * P'/n + eps * eye(k);
    elseif lowrank == "svd"
        D = 1./(F[:U]*(S*(F[:Vt]*x)) + eps);
        g = -F[:Vt]'*(S * (F[:U]'*D))/n;
        H = (F[:V]*S*(F[:U]'*Diagonal(D.^2)*F[:U])* S*F[:Vt])/n + eps * eye(k);
    else
        D = 1./(L*x + eps);
        g = -L'*D/n;
        H = L'*Diagonal(D.^2)*L/n + eps * eye(k);
    end
        
    # initialize
    ind = find(x .> sptol);
    y = sparse(zeros(k)); y[ind] = 1/length(ind);

    # Active set method start
    for j = 1:100
      # define smaller problem
      s = length(ind);
      H_s = H[ind,ind];
      d = H * y + 2 * g + 1;
      d_s = d[ind];

      # solve smaller problem
      p = sparse(zeros(k));
      p_s = -H_s\d_s; p[ind] = p_s;

      # convergence check
      if norm(p_s) < tol
        ## Compute the Lagrange multiplier.
        lambda = d;
        if all(lambda .>= -tol)
          break;
        elseif length(ind) < k
            
          # TO DO: Explain what ind and ind_min are for.
          notind  = setdiff(1:k,ind);
          ind_min = notind[findmin(lambda[notind])[2]];
          ind     = sort([ind; ind_min]);
        end

      # do update otherwise
      else
        # retain feasibility
        alpha = 1;
        alpha_temp = -y[ind]./p_s;
        ind_block = find(p_s .< 0);
        alpha_temp = alpha_temp[ind_block];
        if ~isempty(ind_block)
          temp = findmin(alpha_temp);
          if temp[1] < 1
            ind_block = ind[ind_block[temp[2]]]; # blocking constraint
            alpha = temp[1];
            # update working set -- if there is a blocking constraint
            deleteat!(ind, find(ind - ind_block .== 0));
          end
        end
        # update
        y = y + alpha * p;
      end
    end
        
    # Perform backtracking line search
    for t = 1:10
        if lowrank == "qr"
            D_new = 1./(F[:Q]*(F[:R]*(P'*y)) + eps);
        elseif lowrank == "svd"
            D_new = 1./(F[:U]*(S*(F[:Vt]*y)) + eps);
        else
            D_new = 1./(L*x + eps);
        end
        if sum(log.(D)) - sum(log.(D_new)) > sum((x-y) .* g) / 2
            break;
        end
        y = (y-x)/2 + x;
    end
        
    # Update the solution to the original optimization problem.
    x = y;

    # convergence check
    if minimum(g + 1) >= -tol
      break;
    end
  end
  x[x .< sptol] = 0;
    
  return full(x/sum(x))
end

mixSQP_record_runtime (generic function with 1 method)

In [27]:
L2 = Array{Float64,2}(readtable("/Users/yosikim/Desktop/sample100000x100.txt", separator  = ' ', header = false));

In [30]:
@time sol = mixSQP_record_runtime(L2, lowrank = "qr");

  0.944105 seconds (66.18 k allocations: 491.182 MiB, 45.94% gc time)


In [None]:
@time sol = mixSQP_record_runtime(L, lowrank = "qr", eps = 1e-6, pqrtol = 1e-8, tol = 1e-6);

In [None]:
sparse(sol)