In [1]:
xls.prep <- function(formula,data,dependent_var){
    
    matrix.baked <- stats::model.matrix(formula,data)
    
    df.baked <- base::as.data.frame(matrix.baked)
    
    base::rm(matrix.baked)
    
    varnum <- base::ncol(df.baked)
    
    varnames <- base::colnames(df.baked)
    
    coefnames <- base::paste0('x[',1:varnum,']')
    
    regformat <- base::paste(base::rep('%s*%s',varnum),collapse = ' + ')
    
    coef_var_match <- NULL
    
    for(i in 1:varnum){
        coef_var_match[i] <- base::sprintf('"%s",df.baked[["%s"]]',coefnames[i],varnames[i])
    }
    
    coef_var_match <- base::paste(coef_var_match,collapse = ',')
    
    symbolic_error_calculation <- base::sprintf('base::sprintf("%s",%s)',regformat,coef_var_match)
    
    base::eval(base::parse(text = base::sprintf('df.baked[["error_symbolic"]] <- %s',symbolic_error_calculation)))
    
    df.baked[["error_symbolic"]] <- base::sprintf('(%s - %s)^2',df.baked[["error_symbolic"]],data[[dependent_var]])

    base::list(data=df.baked,independent_var=varnames)

}

In [2]:
xls.objfun <- function(data,error_column_name,args,error_weights,error_ahead_level){
    
    df <- data
    
    sample_size <- nrow(df)
    
    df$ahead_error_symbolic <- base::sapply(
      1:sample_size,
      function(x) paste(df[[error_column_name]][x:min(x+error_ahead_level-1, sample_size)], collapse = " add ")
    )
    
    df$ahead_num <- sapply(strsplit(df$ahead_error_symbolic,split = ' add '),length)
    
    df <- subset(df,ahead_num == error_ahead_level)
    
    df$ahead_error_symbolic <- strsplit(df$ahead_error_symbolic,' add ')
    
    new_sample_size <- nrow(df)
    
    df$error_weights <- rep(list(error_weights),new_sample_size)
    
    new_error_column <- paste0(error_column_name,'_weighted')
    
    df[[new_error_column]] <- mapply(
      paste,
      df$error_weights,
      df$ahead_error_symbolic,
      sep = "*",
      collapse = " + "
    )

    sum_of_errors <- base::paste(df[[new_error_column]],collapse = ' + ')
    
    #sum_of_errors <- as.character(Ryacas::yac_expr(sprintf('Simplify(%s)',sum_of_errors)))
    
    base::eval(base::parse(text = base::paste('objfun <- function(', args, ') { return(' , sum_of_errors , ')}', sep='')))
    
    #objfun <- Deriv::Simplify(objfun)
    
    base::list(objective = objfun,sum_of_errors = sum_of_errors)
}

In [3]:
xls.fit <- function(formula,
                    data,
                    lag_level=1,
                    lag_column = NULL,
                    error_weights = NULL,
                    error_ahead_level=4){
    
    if(base::nrow(data) < error_ahead_level){
        
        base::stop('The number of observations must be greater than error ahead level.')
        
    }
    
    
    dependent_var <- base::all.vars(formula)[1]
    
    if(base::is.null(lag_column)){
        
        lag_column <- dependent_var
        
    }
    
    if(base::is.null(error_weights)){
        
        dummy_weights <- base::seq(from = 0,to = 1,length.out = error_ahead_level + 1)
        error_weights <- dummy_weights[-1]/base::sum(dummy_weights[-1])
        error_weights <- rev(error_weights)
        base::rm(dummy_weights)
        
    }else if(base::length(error_weights) != error_ahead_level){
        
        base::stop('Error weights should have same length with ahead level.')
        
    }else if(base::sum(error_weights) != 1){
        
        base::stop('The sum of the error weights must be 1.')
        
    }
    
    prepared_obj <- xls.prep(formula,data,dependent_var)
    
    df <- prepared_obj$data
    
    independent_var <- prepared_obj$independent_var
    
    initial_solution <- base::rep(0,base::length(independent_var))

    objfun_object <- xls.objfun(data = df,error_column_name = 'error_symbolic',
                                args = 'x',error_weights = error_weights, error_ahead_level = error_ahead_level)
    
    objfun <- objfun_object$objective

    base::suppressWarnings(optimizing_parameters <- NlcOptim::solnl(X = initial_solution,objfun = objfun))
    
    coefficients <- base::as.data.frame(optimizing_parameters$par)
    
    base::colnames(coefficients) <- 'coef'
    
    base::rownames(coefficients) <- independent_var

    dummy_model <- stats::lm(formula,data)
    coefficients_vec <- coefficients$coef
    names(coefficients_vec) <- base::rownames(coefficients)
    
    dummy_model <- stats::lm(formula = formula,data = data)
    
    dummy_model$coefficients <- coefficients_vec
    
    #dummy_model$call <- 'eXtreme Least Squares Model'
    
    dummy_model

}

In [42]:
?lm

0,1
lm {stats},R Documentation

0,1
formula,"an object of class ""formula"" (or one that can be coerced to that class): a symbolic description of the model to be fitted. The details of model specification are given under ‘Details’."
data,"an optional data frame, list or environment (or object coercible by as.data.frame to a data frame) containing the variables in the model. If not found in data, the variables are taken from environment(formula), typically the environment from which lm is called."
subset,an optional vector specifying a subset of observations to be used in the fitting process.
weights,"an optional vector of weights to be used in the fitting process. Should be NULL or a numeric vector. If non-NULL, weighted least squares is used with weights weights (that is, minimizing sum(w*e^2)); otherwise ordinary least squares is used. See also ‘Details’,"
na.action,"a function which indicates what should happen when the data contain NAs. The default is set by the na.action setting of options, and is na.fail if that is unset. The ‘factory-fresh’ default is na.omit. Another possible value is NULL, no action. Value na.exclude can be useful."
method,"the method to be used; for fitting, currently only method = ""qr"" is supported; method = ""model.frame"" returns the model frame (the same as with model = TRUE, see below)."
"model, x, y, qr","logicals. If TRUE the corresponding components of the fit (the model frame, the model matrix, the response, the QR decomposition) are returned."
singular.ok,logical. If FALSE (the default in S but not in R) a singular fit is an error.
contrasts,an optional list. See the contrasts.arg of model.matrix.default.
offset,"this can be used to specify an a priori known component to be included in the linear predictor during fitting. This should be NULL or a numeric vector or matrix of extents matching those of the response. One or more offset terms can be included in the formula instead or as well, and if more than one are specified their sum is used. See model.offset."

0,1
coefficients,a named vector of coefficients
residuals,"the residuals, that is response minus fitted values."
fitted.values,the fitted mean values.
rank,the numeric rank of the fitted linear model.
weights,(only for weighted fits) the specified weights.
df.residual,the residual degrees of freedom.
call,the matched call.
terms,the terms object used.
contrasts,(only where relevant) the contrasts used.
xlevels,(only where relevant) a record of the levels of the factors used in fitting.
