# Multiple linear regression
### Can we predict the calorie content of a cereal? 

In [3]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

df = pd.read_csv("cereals.csv")

In [7]:
y = df.calories.values
X = df[['sugars', 'carbo', 'protein', 'fat', 'sodium', 'fiber', 'potass']].values

In [22]:
X

array([[  6. ,   5. ,   4. ,   1. , 130. ,  10. , 280. ],
       [  8. ,   8. ,   3. ,   5. ,  15. ,   2. , 135. ],
       [  5. ,   7. ,   4. ,   1. , 260. ,   9. , 320. ],
       [  0. ,   8. ,   4. ,   0. , 140. ,  14. , 330. ],
       [  8. ,  14. ,   2. ,   2. , 200. ,   1. ,  -1. ],
       [ 10. ,  10.5,   2. ,   2. , 180. ,   1.5,  70. ],
       [ 14. ,  11. ,   2. ,   0. , 125. ,   1. ,  30. ],
       [  8. ,  18. ,   3. ,   2. , 210. ,   2. , 100. ],
       [  6. ,  15. ,   2. ,   1. , 200. ,   4. , 125. ],
       [  5. ,  13. ,   3. ,   0. , 210. ,   5. , 190. ],
       [ 12. ,  12. ,   1. ,   2. , 220. ,   0. ,  35. ],
       [  1. ,  17. ,   6. ,   2. , 290. ,   2. , 105. ],
       [  9. ,  13. ,   1. ,   3. , 210. ,   0. ,  45. ],
       [  7. ,  13. ,   3. ,   2. , 140. ,   2. , 105. ],
       [ 13. ,  12. ,   1. ,   1. , 180. ,   0. ,  55. ],
       [  3. ,  22. ,   2. ,   0. , 280. ,   0. ,  25. ],
       [  2. ,  21. ,   2. ,   0. , 290. ,   1. ,  35. ],
       [ 12. ,

In [10]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25, random_state=0)

In [14]:
from sklearn.linear_model import LinearRegression

regressor = LinearRegression().fit(X_train, y_train)


In [15]:
y_pred = regressor.predict(X_test)

In [20]:
results = pd.DataFrame(y_test, columns= ['test'])
results['predicted'] = y_pred

In [21]:
results

Unnamed: 0,test,predicted
0,120,113.302739
1,100,101.700738
2,100,103.211016
3,50,56.666868
4,120,105.767878
5,100,96.31647
6,90,88.816987
7,110,93.165035
8,150,149.52254
9,110,113.712864


# Can we do it with SVR? 

In [54]:
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
X_scaler = StandardScaler()
X_scaled = X_scaler.fit_transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

y_scaler = StandardScaler()
y_scaled = y_scaler.fit_transform(y_train.reshape((-1, 1)))
y_test_scaled = y_scaler.transform(y_test.reshape((-1, 1)))

rbf_regressor = SVR(kernel="rbf")
rbf_regressor.fit(X_scaled, y_scaled.reshape(-1))

linear_regressor = SVR(kernel = "linear")
linear_regressor.fit(X_scaled, y_scaled.reshape(-1))



SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
  gamma='auto_deprecated', kernel='linear', max_iter=-1, shrinking=True,
  tol=0.001, verbose=False)

In [55]:
y_pred_rbf = rbf_regressor.predict(X_test_scaled)
y_pred_linear = linear_regressor.predict(X_test_scaled)

In [56]:
y_pred_unscaled_rbf = y_scaler.inverse_transform(y_pred_rbf)
y_pred_unscaled_linear = y_scaler.inverse_transform(y_pred_linear)

In [57]:
svr_results = pd.DataFrame(y_test, columns= ['test'])
svr_results['predicted RBF'] = y_pred_unscaled_rbf
svr_results['predicted linear'] = y_pred_unscaled_linear
svr_results

Unnamed: 0,test,predicted RBF,predicted linear
0,120,110.510515,112.769439
1,100,102.361341,101.894333
2,100,109.739232,103.302799
3,50,87.21047,57.196388
4,120,108.948746,106.377116
5,100,91.538518,95.993079
6,90,87.104084,89.161315
7,110,93.480743,93.191385
8,150,142.409231,148.656757
9,110,111.884691,113.194006


# What about Non linear regression?

In [58]:
from sklearn.tree import DecisionTreeRegressor

tree_regressor = DecisionTreeRegressor()
tree_regressor.fit(X_train, y_train)

DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,
           max_leaf_nodes=None, min_impurity_decrease=0.0,
           min_impurity_split=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           presort=False, random_state=None, splitter='best')

In [60]:
y_pred_decisionTree = tree_regressor.predict(X_test)

In [61]:
decisionTree_results = pd.DataFrame(y_test, columns= ['test'])
decisionTree_results['predicted decisionTree'] = y_pred_decisionTree
decisionTree_results

Unnamed: 0,test,predicted decisionTree
0,120,110.0
1,100,100.0
2,100,120.0
3,50,100.0
4,120,100.0
5,100,100.0
6,90,80.0
7,110,90.0
8,150,150.0
9,110,100.0
