# 의사결정나무(decision trees) 비율 데이터 투자 종목 발굴


**2019-2024 [FinanceData.KR]()**

#  데이터셋 - 비율 데이터

http://nbviewer.jupyter.org/gist/FinanceData/0236c759eac4cbe2765b1d421aa8a797/readme.ipynb



In [None]:
import pandas as pd

url = 'https://goo.gl/sAMygH'
df = pd.read_csv(url, dtype={'종목코드':str}).dropna()
df

In [None]:
df = df.sort_values('시가총액', ascending=False)
df = df.set_index('종목명')
df.head()

In [None]:
import numpy as np

df["WIN"] = np.where((df['수익률_6'] > 0.1), 1, 0)
df["WIN"].value_counts()

In [None]:
feature_names = ["PER(배)", "PBR(배)", "ROE(%)"]
X = df[feature_names]
X

In [None]:
y = df['WIN']
y

In [None]:
y.value_counts()

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [None]:
from sklearn.tree import DecisionTreeClassifier

model = DecisionTreeClassifier(criterion='gini', max_depth=3, min_samples_leaf=5)
model.fit(X_train, y_train)

In [None]:
import matplotlib.pyplot as plt
from sklearn import tree

plt.figure(figsize=(18, 6))
annots = tree.plot_tree(model, class_names=['loss','profit'], feature_names=['PER','PBR','ROE'], filled=True, rounded=True)

In [None]:
model.score(X_test, y_test)

In [None]:
from sklearn.metrics import classification_report

print(classification_report(y_test, model.predict(X_test)))

In [None]:
from sklearn.metrics import confusion_matrix

confusion_matrix(y_test, model.predict(X_test))

**2019-2024 [FinanceData.KR]()**