# Choosing the right Machine Learning Model

Machine Learning Model | Use Case
-----------------------|---------
Classification | Prediction a choice
Regression | Predicting a Number

In [21]:
import numpy as np
import pandas as pd
import matplotlib as plt
from sklearn.model_selection import train_test_split

## Machine Learning Model for Regression Problem

In [8]:
from sklearn.datasets import load_boston
boston = load_boston()
boston_df = pd.DataFrame(boston["data"], columns=boston["feature_names"])
boston_df["target"] = pd.Series(boston["target"])

In [9]:
boston_df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,target
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [10]:
boston_df.dtypes

CRIM       float64
ZN         float64
INDUS      float64
CHAS       float64
NOX        float64
RM         float64
AGE        float64
DIS        float64
RAD        float64
TAX        float64
PTRATIO    float64
B          float64
LSTAT      float64
target     float64
dtype: object

In [13]:
boston_df.isna().sum()

CRIM       0
ZN         0
INDUS      0
CHAS       0
NOX        0
RM         0
AGE        0
DIS        0
RAD        0
TAX        0
PTRATIO    0
B          0
LSTAT      0
target     0
dtype: int64

In [15]:
boston_df.shape

(506, 14)

In [25]:
np.random.seed(22)

X = boston_df.drop("target", axis=1)
Y = boston_df["target"]

X_TRAIN, X_TEST, Y_TRAIN, Y_TEST = train_test_split(X ,Y, test_size=0.2)

In [26]:
from sklearn.linear_model import Ridge 

model = Ridge()
model.fit(X_TRAIN, Y_TRAIN)

model.score(X_TEST, Y_TEST)

0.7630850497410883

In [34]:
from sklearn.ensemble import RandomForestRegressor

model = RandomForestRegressor(n_estimators=100)
model.fit(X_TRAIN, Y_TRAIN)

model.score(X_TEST, Y_TEST)

0.8199000697237587

## Machine Learning Model for Classification Problem

In [39]:
heart_disease = pd.read_csv("heart-disease.csv")
heart_disease.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [40]:
heart_disease.dtypes

age           int64
sex           int64
cp            int64
trestbps      int64
chol          int64
fbs           int64
restecg       int64
thalach       int64
exang         int64
oldpeak     float64
slope         int64
ca            int64
thal          int64
target        int64
dtype: object

In [38]:
heart_disease.isna().sum()

age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
target      0
dtype: int64

In [41]:
X = heart_disease.drop("target", axis=1)
Y = heart_disease["target"]

In [42]:
X_TRAIN, X_TEST, Y_TRAIN, Y_TEST = train_test_split(X, Y, test_size=0.2)

In [45]:
from sklearn import svm

model = svm.SVC()
model.fit(X_TRAIN, Y_TRAIN)

model.score(X_TEST, Y_TEST)

0.6065573770491803

In [47]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier()
model.fit(X_TRAIN, Y_TRAIN)

model.score(X_TEST, Y_TEST)

0.8688524590163934