# 머신러닝 Summary

In [5]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split

### 선형 회귀(Linear Regression)

In [6]:
from sklearn.linear_model import LinearRegression

In [7]:
X = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ]).reshape(-1,1)
y = np.array([13, 25, 34, 47, 59, 62, 79, 88, 90, 100])

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.3, 
                                                    random_state=42)

In [9]:
model = LinearRegression()
model.fit(X_train, y_train)
predictions = model.predict(X_test)

In [10]:
predictions

array([95.58168317, 25.84158416, 65.69306931])

In [11]:
y_test

array([90, 25, 62])

### 로지스틱 회귀(Logistic Regression)

In [12]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

In [13]:
X = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ]).reshape(-1,1)
y = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1])

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.3, 
                                                    random_state=42)

In [15]:
model =  LogisticRegression()
model.fit(X_train, y_train)
predictions = model.predict(X_test)

In [16]:
print(classification_report(y_test,predictions))

              precision    recall  f1-score   support

           0       0.50      1.00      0.67         1
           1       1.00      0.50      0.67         2

    accuracy                           0.67         3
   macro avg       0.75      0.75      0.67         3
weighted avg       0.83      0.67      0.67         3



### 결정 트리(Decision Tree)

In [17]:
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier

In [18]:
iris = load_iris()
X = iris.data[:, 2:] # petal length and width
y = iris.target

In [19]:
dtree_clf = DecisionTreeClassifier(max_depth=2)
dtree_clf.fit(X, y)

DecisionTreeClassifier(max_depth=2)

In [20]:
dtree_clf.predict_proba([[5, 1.5]])

array([[0.        , 0.90740741, 0.09259259]])

In [21]:
dtree_clf.predict([[5, 1.5]])

array([1])

### 랜덤 포레스트(Random Forest)

In [22]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [23]:
iris = datasets.load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)

In [24]:
X_train, X_test, y_train, y_test = train_test_split(
    df[iris.feature_names], iris.target, test_size=0.25, 	stratify=iris.target, random_state=42)

In [25]:
rf = RandomForestClassifier(n_estimators=50, 
                            max_depth=20, 
                            random_state=42)

In [26]:
rf.fit(X_train, y_train)

RandomForestClassifier(max_depth=20, n_estimators=50, random_state=42)

In [27]:
predicted = rf.predict(X_test)

In [28]:
accuracy_score(y_test, predicted)

0.9210526315789473