Permalink
Browse files

Fixed: Linear-regression t-statistics were computed incorrectly.

  • Loading branch information...
1 parent f75969c commit 379de83d76490e727ce10bcae84c30bda77f2a54 Florian Schoppmann committed May 18, 2011
@@ -29,13 +29,13 @@ COPY houses FROM STDIN DELIMITER '|';
select * from houses limit 5;
\qecho === Calculate Coefficients ======================================
-select madlib.mregr_coef(price, array[1, bedroom, bath, size])::REAL[] from houses;
+select madlib.linregr_coef(price, array[1, bedroom, bath, size])::REAL[] from houses;
\qecho === Calculate R square value ====================================
-select madlib.mregr_r2(price, array[1, bedroom, bath, size])::REAL from houses;
+select madlib.linregr_r2(price, array[1, bedroom, bath, size])::REAL from houses;
\qecho === Calculate t statistics ======================================
-select madlib.mregr_tstats(price, array[1, bedroom, bath, size])::REAL[] from houses;
+select madlib.linregr_tstats(price, array[1, bedroom, bath, size])::REAL[] from houses;
\qecho === Calculate p values ==========================================
-select madlib.mregr_pvalues(price, array[1, bedroom, bath, size])::REAL[] from houses;
+select madlib.linregr_pvalues(price, array[1, bedroom, bath, size])::REAL[] from houses;
@@ -214,18 +214,23 @@ AnyValue LinearRegression::final(AbstractDBInterface &db,
- ((state.y_sum * state.y_sum) / state.numRows)
);
+ // total sum of squares
+ double tss
+ = state.y_square_sum
+ - ((state.y_sum * state.y_sum) / state.numRows);
+
// coefficient of determination
- if (what == kRSquare) {
- // total sum of squares
- double tss
- = state.y_square_sum
- - ((state.y_sum * state.y_sum) / state.numRows);
-
+ if (what == kRSquare)
return ess / tss;
- }
+
+ // In the case of linear regression:
+ // residual sum of squares (rss) = total sum of squares (tss) - explained
+ // sum of squares (ess)
+ // Proof: http://en.wikipedia.org/wiki/Sum_of_squares
+ double rss = tss - ess;
// Variance is also called the mean square error
- double variance = ess / (state.numRows - state.widthOfX);
+ double variance = rss / (state.numRows - state.widthOfX);
// Precompute (X^T * X)^{-1}
mat inverse_of_X_transp_X = inv(state.X_transp_X);
@@ -255,8 +255,8 @@ RETURNS DOUBLE PRECISION[] AS $$
except:
sys.path.append("PLPYTHON_LIBDIR")
import logistic
+ global MADlibSchema
MADlibSchema = "MADLIB_SCHEMA"
-
return logistic.compute_logregr_coef(**globals())
$$ LANGUAGE plpythonu VOLATILE;
@@ -272,8 +272,8 @@ RETURNS DOUBLE PRECISION[] AS $$
except:
sys.path.append("PLPYTHON_LIBDIR")
import logistic
+ global MADlibSchema
MADlibSchema = "MADLIB_SCHEMA"
-
return logistic.compute_logregr_coef(**globals())
$$ LANGUAGE plpythonu VOLATILE;
@@ -290,8 +290,8 @@ RETURNS DOUBLE PRECISION[] AS $$
except:
sys.path.append("PLPYTHON_LIBDIR")
import logistic
+ global MADlibSchema
MADlibSchema = "MADLIB_SCHEMA"
-
return logistic.compute_logregr_coef(**globals())
$$ LANGUAGE plpythonu VOLATILE;
@@ -339,7 +339,7 @@ RETURNS DOUBLE PRECISION[] AS $$
except:
sys.path.append("PLPYTHON_LIBDIR")
import logistic
+ global MADlibSchema
MADlibSchema = "MADLIB_SCHEMA"
-
return logistic.compute_logregr_coef(**globals())
$$ LANGUAGE plpythonu VOLATILE;

0 comments on commit 379de83

Please sign in to comment.