# Multiple linear regression
### Can we predict the calorie content of a cereal? 

In [3]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

df = pd.read_csv("cereals.csv")

In [7]:
y = df.calories.values
X = df[['sugars', 'carbo', 'protein', 'fat', 'sodium', 'fiber', 'potass']].values

In [22]:
X

array([[  6. ,   5. ,   4. ,   1. , 130. ,  10. , 280. ],
       [  8. ,   8. ,   3. ,   5. ,  15. ,   2. , 135. ],
       [  5. ,   7. ,   4. ,   1. , 260. ,   9. , 320. ],
       [  0. ,   8. ,   4. ,   0. , 140. ,  14. , 330. ],
       [  8. ,  14. ,   2. ,   2. , 200. ,   1. ,  -1. ],
       [ 10. ,  10.5,   2. ,   2. , 180. ,   1.5,  70. ],
       [ 14. ,  11. ,   2. ,   0. , 125. ,   1. ,  30. ],
       [  8. ,  18. ,   3. ,   2. , 210. ,   2. , 100. ],
       [  6. ,  15. ,   2. ,   1. , 200. ,   4. , 125. ],
       [  5. ,  13. ,   3. ,   0. , 210. ,   5. , 190. ],
       [ 12. ,  12. ,   1. ,   2. , 220. ,   0. ,  35. ],
       [  1. ,  17. ,   6. ,   2. , 290. ,   2. , 105. ],
       [  9. ,  13. ,   1. ,   3. , 210. ,   0. ,  45. ],
       [  7. ,  13. ,   3. ,   2. , 140. ,   2. , 105. ],
       [ 13. ,  12. ,   1. ,   1. , 180. ,   0. ,  55. ],
       [  3. ,  22. ,   2. ,   0. , 280. ,   0. ,  25. ],
       [  2. ,  21. ,   2. ,   0. , 290. ,   1. ,  35. ],
       [ 12. ,

In [10]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25, random_state=0)

In [14]:
from sklearn.linear_model import LinearRegression

regressor = LinearRegression().fit(X_train, y_train)


In [15]:
y_pred = regressor.predict(X_test)

In [20]:
results = pd.DataFrame(y_test, columns= ['test'])
results['predicted'] = y_pred

In [21]:
results

Unnamed: 0,test,predicted
0,120,113.302739
1,100,101.700738
2,100,103.211016
3,50,56.666868
4,120,105.767878
5,100,96.31647
6,90,88.816987
7,110,93.165035
8,150,149.52254
9,110,113.712864


# Can we do it with SVR? 

In [47]:
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
X_scaler = StandardScaler()
X_scaled = X_scaler.fit_transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

y_scaler = StandardScaler()
y_scaled = y_scaler.fit_transform(y_train.reshape((-1, 1)))
y_test_scaled = y_scaler.transform(y_test.reshape((-1, 1)))

regressor = SVR()
regressor.fit(X_scaled, y_scaled.reshape(-1))



SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
  gamma='auto_deprecated', kernel='rbf', max_iter=-1, shrinking=True,
  tol=0.001, verbose=False)

In [48]:
y_pred = regressor.predict(X_test_scaled)

In [50]:
y_pred_unscaled = y_scaler.inverse_transform(y_pred)

In [53]:
svr_results = pd.DataFrame(y_test, columns= ['test'])
svr_results['predicted'] = y_pred_unscaled
svr_results

Unnamed: 0,test,predicted
0,120,110.510515
1,100,102.361341
2,100,109.739232
3,50,87.21047
4,120,108.948746
5,100,91.538518
6,90,87.104084
7,110,93.480743
8,150,142.409231
9,110,111.884691
