<a href="https://colab.research.google.com/github/kgpark88/ems/blob/master/ml_summary.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 머신러닝 Summary

In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split

### 선형 회귀(Linear Regression)

In [2]:
from sklearn.linear_model import LinearRegression

In [3]:
X = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ]).reshape(-1,1)
y = np.array([13, 25, 34, 47, 59, 62, 79, 88, 90, 100])

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.3, 
                                                    random_state=42)

In [5]:
model = LinearRegression()
model.fit(X_train, y_train)
predictions = model.predict(X_test)

In [6]:
predictions

array([95.58168317, 25.84158416, 65.69306931])

In [7]:
y_test

array([90, 25, 62])

### 로지스틱 회귀(Logistic Regression)

In [8]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

In [9]:
X = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ]).reshape(-1,1)
y = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.3, 
                                                    random_state=42)

In [11]:
model =  LogisticRegression()
model.fit(X_train, y_train)
predictions = model.predict(X_test)

In [12]:
print(classification_report(y_test,predictions))

              precision    recall  f1-score   support

           0       0.50      1.00      0.67         1
           1       1.00      0.50      0.67         2

    accuracy                           0.67         3
   macro avg       0.75      0.75      0.67         3
weighted avg       0.83      0.67      0.67         3



### 결정 트리(Decision Tree)

In [13]:
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier

In [14]:
iris = load_iris()
X = iris.data[:, 2:] # petal length and width
y = iris.target

In [15]:
dtree_clf = DecisionTreeClassifier(max_depth=2)
dtree_clf.fit(X, y)

DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',
                       max_depth=2, max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort='deprecated',
                       random_state=None, splitter='best')

In [16]:
dtree_clf.predict_proba([[5, 1.5]])

array([[0.        , 0.90740741, 0.09259259]])

In [17]:
dtree_clf.predict([[5, 1.5]])

array([1])

### 랜덤 포레스트(Random Forest)

In [18]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [19]:
iris = datasets.load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)

In [20]:
X_train, X_test, y_train, y_test = train_test_split(
    df[iris.feature_names], iris.target, test_size=0.25, 	stratify=iris.target, random_state=42)

In [21]:
rf = RandomForestClassifier(n_estimators=50, 
                            max_depth=20, 
                            random_state=42)

In [22]:
rf.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=20, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=50,
                       n_jobs=None, oob_score=False, random_state=42, verbose=0,
                       warm_start=False)

In [23]:
predicted = rf.predict(X_test)

In [24]:
accuracy_score(y_test, predicted)

0.9210526315789473