In [1]:
(ns non-linear-regression
  (:require
      [clojure.set :as set]
      [clojure.string :as str]))

In [2]:
;;; Functions related to symbolic differentiation.

(defn variable? [e] (symbol? e))

;Should be a symbol first. And should be equal. 

(defn same-variable? [v1 v2] 
 (and (variable? v1) (variable? v2) (= v1 v2)))

;Equality check of two numbers

(defn =number? [x n] 
 (and (number? x) (= x n)))

(defn make-sum [a1 a2]
 (cond (=number? a1 0) a2
       (=number? a2 0) a1
       (and (number? a1) (number? a2)) (+ a1 a2)
       :else (list '+ a1 a2)))

(defn make-product [m1 m2]
 (cond (or (=number? m1 0) (=number? m2 0)) 0
       (=number? m1 1) m2
       (=number? m2 1) m1
       (and (number? m1) (number? m2)) (* m1 m2)
       :else (list '* m1 m2)))

(defn sum? [x]
 (and (seq? x) (= (first x) '+)))

;To get the first part of sum expression.

(defn addend [s] 
 (second s))

(defn augend [s]
 (last s))

(defn minus? [x]
 (and (seq? x) (= (first x) '-)))

(defn substract [a1 a2]
 (cond (=number? a1 0) (list '* '-1 a2)
       (=number? a2 0) a1
       (and (number? a1) (number? a2)) (- a1 a2)
       :else (list '- a1 a2)))

(defn product? [x]
 (and (seq? x) (= (first x) '*)))

(defn multiplier [p]
 (second p))

(defn multiplicand [p]
  (last p))

;Now to make exponential. 

(defn make-exponentiation [b e]
 (cond (=number? e 0) 1
       (=number? e 1) b
       (and (number? b) (number? e)) (Math/pow b e)
       :else (list '** b e)))

(defn exponentiation? [exp]
  (and (seq? exp) (= '** (first exp))))

(defn base [exp]
  (second exp))

(defn exponent [exp]
  (last exp))

(def trigonometry #{'sin 'cos 'tan 'exp 'log 'sig 'tanh})

(defn trigonometry? [exp]
    (contains? trigonometry (first exp)))

(defn deriv [exp var]  

 (cond (number? exp) 0 
       (variable? exp) (if (same-variable? exp var) 1 0)
     
       (sum? exp)
       (make-sum (deriv (addend exp) var)
          (deriv (augend exp) var))
       
       (minus? exp)
       (substract (deriv (addend exp) var)
          (list '- (deriv (augend exp) var)))
       
      (product? exp)
       (make-sum
        (make-product (multiplier exp)
             (deriv (multiplicand exp) var))
            (make-product (deriv (multiplier exp) var)
               (multiplicand exp)))
       
       (exponentiation? exp)
         (make-product 
          (make-product (deriv (base exp) var) (exponent exp))
          (make-exponentiation (base exp) (dec (exponent exp)))) 
       
       (trigonometry? exp)
          (let [rest-exp (if (= (count (rest exp)) 1) (first (rest exp)) (rest exp))]
                 (cond (= (first exp) 'sin) (list '* (list 'cos rest-exp) (deriv rest-exp var))
                       (= (first exp) 'cos) (list '* (- 1) (list 'sin rest-exp) (deriv rest-exp var))
                       (= (first exp) 'tan) (list '* (list '** (list 'sec rest-exp) 2) (deriv rest-exp var))
                       (= (first exp) 'exp) (list '* (list 'exp rest-exp) (deriv rest-exp var))
                       (= (first exp) 'log) (list '* (list '/ 1 rest-exp) (deriv rest-exp var))
                       (= (first exp) 'sig) (list '* (list 'sig rest-exp) (list '- 1 (list 'sig rest-exp)) (deriv rest-exp var))
                       (= (first exp) 'tanh) (list '* (list '- 1 (list '** (list 'tanh rest-exp) 2)) (deriv rest-exp var))
                       (= (first exp) 'relu) (prn "At least print something dude.")
                      
                       ))
      :else
     (prn "Invalid Operation")))

#'non-linear-regression/deriv

In [3]:
;;; My understand of the problem. (To be discussed with Suresh dai)

;;; We start with a vector of input features. (x1 x2 x3 x4 .... xn)

;;; We then choose a matrix of coefficients that we want to learn. It will be of the size n times k.
;;; k is the parameter we choose.

;;; We could think of one matrix m as one layer in Neural Network,
;;; and each columns as neurons with weights (m1 m2 m3 m4), (m5 m6 m7 m8), and so on.

;;; So, in a way, we are selecting the number of neurons in a layer with the value of k.

;;; The output of each neuron is a scalar (after the dot product of its weights with feature vector.)

;;; We then activate this scalar output depending on our activation function. 

;;; So, if we have 5 neurons in one layer, (that is k = 5), we will have 5 outputs after activation in each neurons.

;;; After a series of these operations in each layers (which is usually deep and hence the name deep learning),
;;; We finally multiply the output with a final-layer-vector to get our scalar output y. 
;;; Our final-layer will be a vector of the form (n1 n2 ......nk).

;;; The size of the final-layer depends on the size of the output of the preceding layer (that is penultimate layer).

;;; And hence, we obtain our scalar predicted y using feature vector (x1 x2 ...xn) with the help of matrix coefficients
;;; m and final-layer-vector n.

In [4]:
(def j 0.000001) ; Defining the learning rate. 

#'non-linear-regression/j

In [5]:
;;; Defining some mathematica operations that will be used later. 

(defn tanh [x]
    (Math/tanh x))
(defn sig [x]
    (/ 1 (+ 1 (Math/exp (- x)))))
(defn ** [x y]
    (Math/pow x y))

#'non-linear-regression/**

In [6]:
;Creating a min-max function which is used inside normalization function below. 
;num -min / max - min 

(defn min-max [v]
    (let [min-num (apply min v)
          max-num (apply max v)
          ]
        (map #(float (/ (- % min-num) (- max-num min-num))) v)))

#'non-linear-regression/min-max

In [7]:
;Creating a function that calculates the tranpose of a matrix. 

(defn transpose [m]
    (apply map vector m)
    )

#'non-linear-regression/transpose

In [8]:
;Creating a function that normalizes the data using minimum and maximum value.

(defn normalize [data]
    (transpose (map #(min-max %) (transpose data)))
    )

#'non-linear-regression/normalize

In [9]:
;Creating a function that cre different epochs of the data. 
;Concats data depending on the value of n.

(defn create-epochs [data n]
    (if (> n 1) (create-epochs (concat data data) (dec n))
        data))

#'non-linear-regression/create-epochs

In [10]:
;Creating a function that creates symbolic features depending on the value of size. 

(defn create-features [size]
    (map read-string (map #(str "x" %) (range 1 (+ size 1))))
    )

#'non-linear-regression/create-features

In [11]:
;For example, if the size of our input features is 4, then
(create-features 4) 

(x1 x2 x3 x4)

In [12]:
;Creating a function that creates a symbolic matrix depending on the value of row and column.

(defn create-symbolic-matrix [row column]
    (partition row (map read-string (map #(str "m" %) (range 1 (+ (* row column) 1)))))
    )

#'non-linear-regression/create-symbolic-matrix

In [13]:
; For example, if size of our matrix is 4 * 5
(create-symbolic-matrix 4 5) 

((m1 m2 m3 m4) (m5 m6 m7 m8) (m9 m10 m11 m12) (m13 m14 m15 m16) (m17 m18 m19 m20))

In [14]:
; Creating a function that creates a final layer depending on the value of size.
; Value of size is decided depending on the output of the preceding layer. 

(defn create-final-layer [size]
     (map read-string (map #(str "n" %) (range 1 (+ size 1))))
    )

#'non-linear-regression/create-final-layer

In [15]:
;For example, if our final layer is of size 5 * 1, 
;it means we are getting 5 outputs out of our final layer (after multiplication and activation). 

(create-final-layer 5)

(n1 n2 n3 n4 n5)

In [16]:
;# Creating three functions that assigns values to the features, coefficients matrix and final layer vector.

(defn assign-features [n features]
     (map #(str "(def x" %1 " " %2 ")") (range 1 (+ n 1)) features)
    )

(defn assign-coefficients [n coefficients]
     (map #(str "(def m" %1 " " %2 ")") (range 1 (+ n 1)) coefficients)
    )

(defn assign-final-layer [n final-layer]
     (map #(str "(def n" %1 " " %2 ")") (range 1 (+ n 1)) final-layer)
    )

#'non-linear-regression/assign-final-layer

In [17]:
;Creating a function that calculates symbolic dot product or numeric dot product depending on the arguments provided.

(defn dot-symbol [v1 v2]
    (if (or (symbol? (first v1)) (symbol? (first v2)))
        (conj (map #(list '* %1 %2)  v1 v2) '+)
        (reduce + (map * v1 v2))
        ))

#'non-linear-regression/dot-symbol

In [18]:
;This is an important operation. 
;We are calculating X times M here. 

(map #(dot-symbol (create-features 4) %)  (create-symbolic-matrix 4 5))

((+ (* x1 m1) (* x2 m2) (* x3 m3) (* x4 m4)) (+ (* x1 m5) (* x2 m6) (* x3 m7) (* x4 m8)) (+ (* x1 m9) (* x2 m10) (* x3 m11) (* x4 m12)) (+ (* x1 m13) (* x2 m14) (* x3 m15) (* x4 m16)) (+ (* x1 m17) (* x2 m18) (* x3 m19) (* x4 m20)))

In [19]:
;Creating a function that first calculates the dot product and then activates the output. 
;That is, X * M operation followed by activation. 

;Takes the input feature, coefficients matrix and activation-function as the arguments.

(defn multiply-and-activate [feature matrix activation-function]
    (let [multiply (map #(dot-symbol feature %) matrix)
          activate  (map #(read-string (str "(" activation-function, " ", %, ")")) multiply)]
        activate))

#'non-linear-regression/multiply-and-activate

In [20]:
;Creating a function that does final layer multiplication to give a scalar output y. 

(defn final-layer-multiplication [activated-output final-layer]
    (cons '+ (map #(read-string (str "( * ", %2, " ", %1, ")")) activated-output final-layer)))

#'non-linear-regression/final-layer-multiplication

In [21]:
;Creating a function that performs non-linear-regression on the given data. 
;Takes the data, hidden parameter k (column of matrix m that we want) and activation function as arguments.
;Sigmoid and Tanh activation functions are implemented inside this function for now. 

(defn non-linear-regression [data k activation-function]

    ;Initalizing m (which is a matrix) and n (which is a vector of coefficients in final layer) with 0.5
    ;Fixed value of 0.5 instead of randomly generated values is used, only so as to compare out results for different
    ;activation functions, different value of k, and different number of epochs. 
    
    (def m (vec (repeat (* k (count (rest (first data)) )) 0.5))) 
    (def n (vec (repeat k 0.5))) 
    ;Initializing error as 0 at the begining. 
    (def error 0)
    
    ;Running a loop across the whole data
    (loop [d  data]
        (when (> (count  d) 1)
            
            
            (do (def y (first (first d)));First element of the list is considered as y. 
                (def x (rest (first d)));Rest of the elements of the list is considered as x. (x is a vector)
                
                ;Creates x-features (x1 x2 ..xn) depending on the count of data.
                (def x-features (create-features (count (rest (first data)))) )
              
                ;Creates m-coefficients matrix ((m1 m2) (m3 m4) ...mn) depending on the count of data.
                (def m-matrix (create-symbolic-matrix (count (rest (first data))) k))
                
                ;Creates a final-layer vector depending on the value of k
                (def final-layer (create-final-layer k))
                
                ;Predicts y after performing 3 operations in series.
                ;First multiplication of x and m, followed by activation and again mulplication with final layer n.
                (def magical-predicted-y (final-layer-multiplication (multiply-and-activate x-features 
                                            m-matrix activation-function) final-layer))
                
                ;Calculates the loss function. 
                (def loss-function (read-string (str "(** (- y ", magical-predicted-y, ") 2)")))
                
                
                ;Calculates the derivate of loss function by both matrix of coefficients and final-layer-vector.
                ;I have used map operation and anonymous function to calculate derivate by vector and matrix.
                (def deriv-expressions-matrix (map #(deriv loss-function %) (flatten m-matrix)))
                (def deriv-expressions-n (map #(deriv loss-function %) final-layer))
                
                    
                ;Assigns the calculated value of x, m and n (vectors and matrix) to the symbols obtained above.
                ;Again using the map operation to loop over a vector and matrix.
                
                ;eval operation evaluates the value of the symbolic expression. 
                
                ;Also, we need to perform the side effect inside this map operation as map produces lazy sequence only.
                
                
                ;Therefore, we use dorun operation here, which performs side effect in clojure map operation. 
                (dorun (map eval (map read-string (assign-features (count x) x))))
                (dorun (map eval (map read-string (assign-coefficients (count m) m))))
                (dorun (map eval (map read-string (assign-final-layer (count final-layer) n))))
                
                
                
                ;Using eval operation again to evaluate the values
                (def ans-matrix (map eval deriv-expressions-matrix))
                (def ans-n (map eval deriv-expressions-n))
                
                
                
                
                ;Final calculation of m matrix, which is our coefficient matrix and final-layer vector n.
                ;This calculated value of m and n  is updated in every iteration. 
                (def m (map + (map #(* (- j) %) ans-matrix) m))
                (def n (map + (map #(* (- j) %) ans-n) n))
                
                ;Also calculating and updating our error value.
                (def error-evaluated (eval loss-function))
                (def error (+ error error-evaluated))
            )
            (recur (rest d))))
    (do
        (prn "The final calculated coefficient matrix is:" (partition (count (rest (first data))) m))
        (prn "The final calculated last layer vector is: " n)
        (prn "The final calculated error is: " error)
        )
     
         ; Returning the final value of m, n, and error. 
         ; m is a matrix of coefficients, n a vector of coefficients and error a scalar value. 
    
    )

#'non-linear-regression/non-linear-regression

In [22]:
;;; Use this in case you have problem loading csv data using fressian. 

;;; Using Motorcars dataset (popular dataset used in data analysis) to perform our non-linear-regression function.

;;; The first entry is mpg (miles per gallon) which we will be using as our output variable y.
;;; Rest of the entries (disp, hp, drat and wt) we will be using as our features. 

(def mtcars-data (drop 1 '([mpg disp hp drat wt] [21 160 110 3.9 2.62] [21 160 110 3.9 2.875] [22.8 108 93 3.85 2.32] [21.4 258 110 3.08 3.215] [18.7 360 175 3.15 3.44] [18.1 225 105 2.76 3.46] [14.3 360 245 3.21 3.57] [24.4 146.7 62 3.69 3.19] [22.8 140.8 95 3.92 3.15] [19.2 167.6 123 3.92 3.44] [17.8 167.6 123 3.92 3.44] [16.4 275.8 180 3.07 4.07] [17.3 275.8 180 3.07 3.73] [15.2 275.8 180 3.07 3.78] [10.4 472 205 2.93 5.25] [10.4 460 215 3 5.424] [14.7 440 230 3.23 5.345] [32.4 78.7 66 4.08 2.2] [30.4 75.7 52 4.93 1.615] [33.9 71.1 65 4.22 1.835] [21.5 120.1 97 3.7 2.465] [15.5 318 150 2.76 3.52] [15.2 304 150 3.15 3.435] [13.3 350 245 3.73 3.84] [19.2 400 175 3.08 3.845] [27.3 79 66 4.08 1.935] [26 120.3 91 4.43 2.14] [30.4 95.1 113 3.77 1.513] [15.8 351 264 4.22 3.17] [19.7 145 175 3.62 2.77] [15 301 335 3.54 3.57] [21.4 121 109 4.11 2.78])))



#'non-linear-regression/mtcars-data

In [23]:
mtcars-data

([21 160 110 3.9 2.62] [21 160 110 3.9 2.875] [22.8 108 93 3.85 2.32] [21.4 258 110 3.08 3.215] [18.7 360 175 3.15 3.44] [18.1 225 105 2.76 3.46] [14.3 360 245 3.21 3.57] [24.4 146.7 62 3.69 3.19] [22.8 140.8 95 3.92 3.15] [19.2 167.6 123 3.92 3.44] [17.8 167.6 123 3.92 3.44] [16.4 275.8 180 3.07 4.07] [17.3 275.8 180 3.07 3.73] [15.2 275.8 180 3.07 3.78] [10.4 472 205 2.93 5.25] [10.4 460 215 3 5.424] [14.7 440 230 3.23 5.345] [32.4 78.7 66 4.08 2.2] [30.4 75.7 52 4.93 1.615] [33.9 71.1 65 4.22 1.835] [21.5 120.1 97 3.7 2.465] [15.5 318 150 2.76 3.52] [15.2 304 150 3.15 3.435] [13.3 350 245 3.73 3.84] [19.2 400 175 3.08 3.845] [27.3 79 66 4.08 1.935] [26 120.3 91 4.43 2.14] [30.4 95.1 113 3.77 1.513] [15.8 351 264 4.22 3.17] [19.7 145 175 3.62 2.77] [15 301 335 3.54 3.57] [21.4 121 109 4.11 2.78])

In [24]:
;Peforming non-linear-regression on our data after normalizing.
; Activation function chosen is tanh.
;Value of k is given 10. 
(non-linear-regression (normalize mtcars-data) 10 'tanh)

"The final calculated coefficient matrix is:" ((0.5000179362045275 0.5 0.5 0.5000193728587182) (0.5 0.5 0.5 0.5) (0.5 0.5 0.5 0.5) (0.5 0.5 0.5 0.5) (0.5 0.5 0.5 0.5) (0.5 0.5 0.5 0.5) (0.5 0.5 0.5 0.5) (0.5 0.5 0.5 0.5) (0.5 0.5 0.5 0.5) (0.5000179362045275 0.5 0.5 0.5000193728587182))
"The final calculated last layer vector is: " (0.5001184163925638 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5001184163925638)
"The final calculated error is: " 258.64353062043114


In [25]:
;With sigmoid activation and rest of the arguments same for error comparison. 
(non-linear-regression (normalize mtcars-data) 10 'sig)

"The final calculated coefficient matrix is:" ((0.5000084754653766 0.5 0.5 0.5000089903688086) (0.5 0.5 0.5 0.5) (0.5 0.5 0.5 0.5) (0.5 0.5 0.5 0.5) (0.5 0.5 0.5 0.5) (0.5 0.5 0.5 0.5) (0.5 0.5 0.5 0.5) (0.5 0.5 0.5 0.5) (0.5 0.5 0.5 0.5) (0.5000084754653766 0.5 0.5 0.5000089903688086))
"The final calculated last layer vector is: " (0.5001310490002991 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5001310490002991)
"The final calculated error is: " 288.4312402356455


In [26]:
;With same parameters as earlier with tanh activation but with different number of epochs (chosen as 2 here).
(non-linear-regression (create-epochs (normalize mtcars-data) 2) 10 'tanh)

"The final calculated coefficient matrix is:" ((0.5000357631427715 0.5 0.5 0.500039110333436) (0.5 0.5 0.5 0.5) (0.5 0.5 0.5 0.5) (0.5 0.5 0.5 0.5) (0.5 0.5 0.5 0.5) (0.5 0.5 0.5 0.5) (0.5 0.5 0.5 0.5) (0.5 0.5 0.5 0.5) (0.5 0.5 0.5 0.5) (0.5000357631427715 0.5 0.5 0.500039110333436))
"The final calculated last layer vector is: " (0.5002352306963533 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5002352306963533)
"The final calculated error is: " 522.803984383881


In [27]:
;With same parameters as above for tanh but with sigmoid activation. (With number of epochs also 2).=
(non-linear-regression (create-epochs (normalize mtcars-data) 2) 10 'sig)

"The final calculated coefficient matrix is:" ((0.5000168049192577 0.5 0.5 0.5000180310082089) (0.5 0.5 0.5 0.5) (0.5 0.5 0.5 0.5) (0.5 0.5 0.5 0.5) (0.5 0.5 0.5 0.5) (0.5 0.5 0.5 0.5) (0.5 0.5 0.5 0.5) (0.5 0.5 0.5 0.5) (0.5 0.5 0.5 0.5) (0.5000168049192577 0.5 0.5 0.5000180310082089))
"The final calculated last layer vector is: " (0.5002637203345409 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5002637203345409)
"The final calculated error is: " 584.7443224139599


In [28]:
;;; As we can see here, tanh activation is giving us less error than sigmoid function in our dataset here,
;;; in both our cases (that is different number of epochs). 


In [29]:
;;; Now, let us compare out results with different number of k. 

In [30]:
;Peforming non-linear-regression on our data after normalizing.
; Activation function chosen is tanh.
;Value of k is given 5. 
(non-linear-regression (normalize mtcars-data) 5 'tanh)

"The final calculated coefficient matrix is:" ((0.500008144644854 0.5 0.5 0.500008554081008) (0.5 0.5 0.5 0.5) (0.5 0.5 0.5 0.5) (0.5 0.5 0.5 0.5) (0.500008144644854 0.5 0.5 0.500008554081008))
"The final calculated last layer vector is: " (0.5000524800675661 0.5 0.5 0.5 0.5000524800675661)
"The final calculated error is: " 53.07957886305005


In [31]:
;With sigmoid activation and rest of the arguments same for error comparison. 
(non-linear-regression (normalize mtcars-data) 5 'sig)

"The final calculated coefficient matrix is:" ((0.50000391434051 0.5 0.5 0.5000040835566656) (0.5 0.5 0.5 0.5) (0.5 0.5 0.5 0.5) (0.5 0.5 0.5 0.5) (0.50000391434051 0.5 0.5 0.5000040835566656))
"The final calculated last layer vector is: " (0.5000573984471849 0.5 0.5 0.5 0.5000573984471849)
"The final calculated error is: " 56.88449765506259


In [32]:
;;; As we see, our error is hugely reduced in both tanh and sigmoid activation after we have reduce the size of k.

;;; So maybe, for our small dataset, less number of neurons in a layer (that is less value of k) actually 
;;; peforms better. (This to be discussed and confirmed with Suresh Sir.)

;;; And as above, even for less value of k, tanh activation is giving less error than sigmoid. 

In [33]:
;;; Now, we will compare the result that we will get for k = 1.
;;; k =1 is equivalent of using a single neuron (that is a vector instead of a matrix) in our first layer.

In [34]:
;Peforming non-linear-regression on our data after normalizing.
; Activation function chosen is tanh.
;Value of k is given 1. 
(non-linear-regression (normalize mtcars-data) 1 'tanh)

"The final calculated coefficient matrix is:" ((0.5000006241324676 0.5 0.5 0.49999979952074447))
"The final calculated last layer vector is: " (0.4999994626532884)
"The final calculated error is: " 3.645511191012884


In [35]:
;With sigmoid activation and rest of the arguments same for error comparison. 
(non-linear-regression (normalize mtcars-data) 1 'sig)

"The final calculated coefficient matrix is:" ((0.5000005317723432 0.5 0.5 0.5000003171125073))
"The final calculated last layer vector is: " (0.4999969561442015)
"The final calculated error is: " 2.6323224896648707


In [None]:
;(linear-regression-multiple (normalize (drop 1 mtcars-data))

;"The final calculated coefficient vector is: " (0.4999839215052779 0.4999867851580205 0.49999523565657705 0.49998443109115)
;"The final error is: " 13.053241688148475


In [None]:
;##### IMPORTANT STUFF ##### COMPARISION BETWEEN LINEAR AND NON LINEAR ######; 

; As we see, error is more in linear using the same normalized data for same number of epochs that is 1.

In [36]:
;For k = 1, the error is again hugely reduced. 

;Also, as a contrast from earlier comparisons, sigmoid activation is giving less error than tanh activation.

In [37]:
;Now, one last exploration.
;For k = 1 (that is a vector), we increase the number of epochs 5 times. 
(non-linear-regression (create-epochs (normalize mtcars-data) 5) 1 'tanh)

"The final calculated coefficient matrix is:" ((0.5000075610356552 0.5 0.5 0.49999346881499274))
"The final calculated last layer vector is: " (0.4999678721489914)
"The final calculated error is: " 58.85304053854204


In [38]:
(non-linear-regression (create-epochs (normalize mtcars-data) 5) 1 'sig)

"The final calculated coefficient matrix is:" ((0.5000076199201023 0.5 0.5 0.5000039890583096))
"The final calculated last layer vector is: " (0.49993193693385934)
"The final calculated error is: " 42.41720756353565


In [39]:
;Things to learn and ask Suresh dai

;How to know the saturation point of coefficients that we are learning. (That is when to stop the updating process.)
;Other activation functions too and their implementations in different cases. 

;After learning more about that, I will implement that in our non-linear-regression function here. 