In [1]:
from connector.pg_connector import get_data
from util.util import save_model, load_model
from conf.conf import logging, settings
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

In [2]:
def split(df):

    logging.info('Defining X and Y')
    # Variables
    X = df.iloc[:, :-1]
    y = df['target']

    logging.info('Splitting dataset begins...')

    # Split variables into train and test
    X_train, X_test, y_train, y_test = train_test_split(X,  # independent variables
                                                        y,  # dependent variable
                                                        random_state=3
                                                        )
    logging.info('Splitting dataset is over...')
    return X_train, X_test, y_train, y_test

In [3]:
def train_decision_tree(X_train, y_train):
    # Initialize the model
    clf = DecisionTreeClassifier(max_depth=3,
                                 random_state=3
                                 )
    # Train the model
    logging.info('Training model begins...')
    clf.fit(X_train, y_train)
    logging.info('Training model is over.')
    logging.info('Saving model...')
    save_model(dir='model/conf/decision_tree.pkl', model=clf)
    logging.info('Model is saved.')
    return clf

In [4]:
def predict(values, path_to_model):
    clf = load_model(path_to_model)
    return clf.predict(values)

In [5]:
print(f"parameter {settings.data.data_set}")
df = get_data(settings.data.data_set)
# print(df.info())
X_train, X_test, y_train, y_test = split(df)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

INFO:root:Extracting df


parameter https://raw.githubusercontent.com/5x12/ml-cookbook/master/supplements/data/heart.csv


INFO:root:Df is extracted
INFO:root:Defining X and Y
INFO:root:Splitting dataset begins...
INFO:root:Splitting dataset is over...


In [6]:
clf = train_decision_tree(X_train, y_train)
logging.info(f'Accuracy is {clf.score(X_test, y_test)}')

INFO:root:Training model begins...
INFO:root:Training model is over...
INFO:root:Saving model...
INFO:root:Model is saved...
INFO:root:Accuracy is 0.8287937743190662


In [7]:
responce = predict(X_test, 'model/conf/decision_tree.pkl')
logging.info(f'Prediction is {responce}')

INFO:root:Prediction is [1 0 1 0 0 1 0 1 1 1 0 1 0 1 0 1 0 1 1 0 0 0 1 1 1 1 0 1 1 1 1 1 1 0 0 1 1
 1 1 0 1 1 0 1 0 1 1 1 1 0 1 1 1 1 0 1 1 1 1 1 1 0 1 1 0 0 0 0 0 0 0 1 0 1
 1 0 0 0 1 1 1 1 1 1 0 1 0 1 1 1 1 0 1 0 1 0 0 1 0 0 1 1 1 1 0 1 1 1 1 0 1
 1 1 1 1 1 1 1 1 1 0 0 0 1 0 0 1 0 1 0 1 0 0 0 0 1 1 0 1 1 1 0 1 1 1 1 0 0
 1 1 0 0 0 1 0 1 0 1 1 1 0 0 0 1 1 1 0 1 1 1 0 1 0 1 0 1 1 1 0 0 0 0 1 1 1
 1 1 1 1 0 1 1 0 1 1 0 0 1 0 0 0 0 1 1 1 0 1 1 0 1 1 1 0 0 1 0 1 1 1 1 1 1
 1 1 0 0 0 1 1 0 0 1 1 1 0 1 0 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 0 1 0 1 1]
