# Numerical Analysis Final Project: Lasso vs Ridge Regression

In [50]:
using Pkg
using Pandas
using LinearAlgebra

## Import data

In [51]:
df = read_csv("median_housing_cost_data.tsv", sep="\t");

In [35]:
x = iloc(df)[0]

## Motivating factor

In [None]:
# talk about median household costs and how maximizing posterior probabilities to estimate cost is goal

## Regression tasks derived from maximizing posterior probabilities

## Solutions to regression tasks

## Code solutions for regression tasks

In [49]:
function linear_regression_ridge(X,y,lambda)
    
    s = size(X)[1]
    Im =1* Matrix(I, s, s)
    
    w = inv(transpose(X)*X + lambda*Im) * transpose(X) * y
    
    pred = X * w
    
    err = y - pred
    
    return w, pred, err    
    
end

linear_regression_ridge (generic function with 1 method)

In [None]:
function linear_regression_lasso(X,y,lambda)
    
    
end

In [52]:
function linear_regression_test(X,w)
    
    pred = X * w
    
    return pred
    
end

linear_regression_test (generic function with 1 method)

## Analysis of algorithms: conditioning, complexities, and flop counts

In [None]:
function lasso_condition_num(X, y, lambda) 
    t1 = transpose(X); #Condition of transpose is the same as that of a matrix.
    t2 = t1 * X; 
    k1 = norm(t1 * X) * norm(inv(t1 * X)); # k(t2) = ||t1 *X|| * ||(t1 *X)^-1||
    t3 = inv(t2); #Condition number of inverse of a matrix is the same as that of a  matrix.
    t4 = t3 * t1;
    k2 = norm(t3 * t1) * norm(inv(t3 * t1)); # k(t4) = ||t3 * t1|| * ||(t3 * t1)^-1||
    t5 = t4 * y; 
    k3 = norm(t4 * y) * norm(inv(t4 * y)); #k(t5) = ||t4 * y|| * ||(t4 * y)^-1||
    t6 = abs.(t5) #Condition of absolute is the same as that of a matrix.
    t7 = t6 .- lambda;
    k4 = norm(t6 .- lambda) * norm(inv(t6 .- lmbda)); #k(t7) = ||t6 .- lambda|| * ||(t6 .- lambda)^-1||
    t8 = sign.(t5) .* t7; 
    k5 = norm(sign.(w) .* t7) * norm(inv(sign.(w) .* t7)); #k(t8) = ||sign.(w) .* t7|| * ||(sign.(w) .* t7)^-1||
    t9 = X * t8;
    k6 = norm(X * t8) * norm(inv(X * t8)); #k(t9) = ||X * t8|| * ||(X * t8)^-1||
    t10 = y - t9; 
    k7 = norm(y - t9) * norm(inv(y - t9)); #k(t10) = ||y - t9|| * ||(y - t9)||
    println(k1, k2, k3, k4, k5, k6, k7);
  
  end

In [None]:
function lasso_condition_fixed(X, y, lambda) {
    learning_rate = .025
    iterations = 8000
    l1_penalty = lambda
    n = size(X)[2] # feature number
    m = size(X)[1] #sample number
    w = zeros(n) # shape of the params (feature #)
    b = 0
    #Above declarations are all well-condtioned since they are assigning values to variables.
    k = Vector{Float64}(); #This var will contain largest condition number from all operations

    for i in 1:iterations
        y_pred = zeros(m); #Well-conditioned since it's assigning zeros to variable
        for k in 1:m
            eq1 = dot(X[k,:], w);
            k1 = (norm(X[k,:]) * norm(w)) / (X[k,:] * w); #Condition number for dot product is cond f(x) = 1/cos(x,y) = inverse cosine angle between both vectors
            push!(k,k1)

            eq2 = eq1 + b;
            y_pred[k] = eq2;
            k2 = abs(eq1 / (eq2)); # abs(eq1 / (eq1 + b))
            push!(k,k2)
        end

        #calculate gradients
        dW = zeros(n) # shape of the params (feature #)
        #Well-condtioned since it's assigning zeros to variable 
        for j in 1:n
            if w[j] > 0
                eq3 = y - y_pred; 
                #conditioning would be determined by the conditioning of of the subtraction of every respective element
                k3 = norm(y - y_pred) * norm(inv(y-y_pred));
                push!(k,k3)

                eq4 = dot(X[:,j], eq3);
                k4 = (norm(X[k,:]) * norm(w)) / (X[k,:] * w);
                push!(k,k4)

                eq5 = 2 * eq4; #Condition Number 1 since multiplying by scalar.
                k5 = 1;
                push!(k,k5)

                eq6 = -1 * eq5;
                #Condition Number 1 since multiplying by scalar.
                k6 = 1;
                push!(k,k6)

                eq7 = eq6 + l1_penalty;
                k7 = norm(eq6 + l1_penalty) * norm(inv(eq6 + l1_penalty));
                push!(k,k7)

                eq8 = eq7 ./ m;
                dW[j] = eq8;
                #Condition number is 1 since it is the element division of two variables
                k8 = 1;
                push!(k,k8)
            else
                eq9 = y - y_pred; 
                #conditioning would be determined by the conditioning of of the subtraction of every respective element
                k9 = norm(y - y_pred) * norm(inv(y-y_pred));
                push!(k,k9)

                eq10 = dot(X[:,j], eq3);
                k10 = (norm(X[k,:]) * norm(w)) / (X[k,:] * w);
                push!(k,k10)

                eq11 = 2 * eq4; #Condition Number 1 since multiplying by scalar.
                k11 = 1;
                push!(k,k11)

                eq12 = -1 * eq5;
                #Condition Number 1 since multiplying by scalar.
                k12 = 1;
                push!(k,k12)

                eq13 = eq6 - l1_penalty;
                k13 = norm(eq6 - l1_penalty) * norm(inv(eq6 - l1_penalty));
                push!(k,k13)

                eq14 = eq7 ./ m;
                dW[j] = eq14;
                #Condition number is 1 since it is the element division of two variables
                k14 = 1;
                push!(k,k14)
            end

            eq15 = y - y_pred;
            k15 = norm(y - y_pred) * norm(inv(y-y_pred));
            push!(k,k15)

            eq16 = sum(eq15);
            k16 = 1;
            push!(k,k16)

            eq17 = - 2 * eq16; #Scalar multiplicaiton in well-conditioned
            k17 = 1;
            push!(k,k17)

            eq18 = eq17 ./ m; # dividing by a scalar m is well-conditioned
            db = eq18;
            k18 = 1;
            push!(k,k18)

            eq19 = learning_rate*dW; #scalar multiplication is well-conditioned
            k19 = 1;
            push!(k,k19)

            eq20 = w - eq19;
            w = eq20;
            k20 = norm(w - eq19) * norm(inv(w - eq19));
            push!(k,k20)

            eq21 = learning_rate*db; #scalar multiplication is well-conditioned
            k21 = 1; 
            push!(k,k21)

            eq22 = b - eq21;
            b = eq22;
            k22 = norm(b - eq21) * norm(inv(b - eq21));
            push!(k,k22)

        end
        return k;
    end

}

## Perform regression tasks on data

## Evaluation of algorithm performance

## Conclusion