# Huff, Puff and Classify

### Example Notebook: Randon Forest Classifier

In [None]:
#import package modules
import os, os.path
import sys
from sklearn.metrics import accuracy_score, classification_report


#import custom modules
sys.path.append('../hp_classify')
import prep.prep_data as prep
import model.rfc_build as rfc

#magik
%matplotlib inline

### User Gobal

In [None]:
#setup globals
#setup directories
CWD = os.getcwd()
HOME_DIR = os.path.abspath(os.path.join(CWD, os.pardir))
DATA_DIR = HOME_DIR + "/data"
DATA_FILENAME = "housing_data.csv"

LABEL = 'roof'
STR_VAR = ['roof', 'wall', 'floor']

### Read in and prep data for modeling

In [None]:
#load in dataset as pandas dataframe
df = prep.load_data(DATA_DIR + "/" + DATA_FILENAME)

In [None]:
#rank and truncate values outside 10 ~ 39
df = prep.ranking(df,STR_VAR)

In [None]:
df,_ = prep.shuffle_redistribute(df, LABEL, Redistribute=True)

In [None]:
FEATURES = prep.extract_features(df,LABEL)

In [None]:
FEATURES

### Start Model training

In [None]:
#spilt train and test set 
x_train, x_test, y_train, y_test = prep.train_test_split(df, FEATURES, LABEL)

In [None]:
#build rfc model and save to directory
RFC = rfc.rfc_model(x_train,y_train, LABEL)

### Results

In [None]:
#predict results
pred_test = RFC.predict(x_test)   
pred_train = RFC.predict(x_train)

#accuracy_score
test_score = accuracy_score(y_test, pred_test)
train_score = accuracy_score(y_train, pred_train)
print('Train set Accuracy score:{:.2f}%'.format(train_score*100))
print('Test set Accuracy score:{:.2f}%'.format(test_score*100))

In [None]:
print(classification_report(y_test, pred_test, target_names=['rank 1', 'rank 2', 'rank 3']))

### Confusion Matrix 

In [None]:
#Confusion Matrix with diagonial representing accurately predicted ranks
conf_matrix = rfc.confusion_matrix(y_test, pred_test, plot=True)

# DEMO

In [None]:
rfc.demo_rfc(DATA_DIR,FEATURES,RFC)