In [1]:
import pandas as pd
import numpy as np
import requests
import pickle as pkl
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.cross_validation import train_test_split
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import StandardScaler

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

%matplotlib inline

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))
plt.style.use("seaborn")



### Defining Features and Classifier

In [2]:
df_common = pd.read_csv("Trees_common.csv", index_col=False)

In [3]:
df_common.head(1)

Unnamed: 0,Elevation,Aspect,Slope,Horizontal_Distance_To_Hydrology,Vertical_Distance_To_Hydrology,Horizontal_Distance_To_Roadways,Hillshade_9am,Hillshade_Noon,Hillshade_3pm,Horizontal_Distance_To_Fire_Points,Cover_Type,Soil_Type
0,2596,51,3,258,0,510,221,232,148,6279,0,29


In [4]:
X = df_common.drop("Cover_Type",axis=1)
y = df_common["Cover_Type"]

In [5]:
#Splitting the set (test = 0.3) in a stratified manner
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.30,stratify=y, random_state=4444)

In [6]:
#scaling the features
sca = StandardScaler()
sca.fit_transform(X_train)
sca.transform(X_test);

In [7]:
def alg_class(estimator, X_train, y_train, X_test, y_test):
    estimator.fit(X_train, y_train)
    y_pred = estimator.predict(X_test)
    acc_train = estimator.score(X_train, y_train)
    acc = accuracy_score(y_pred,y_test)
    rec = recall_score(y_pred,y_test)
    prec = precision_score(y_pred,y_test)
    f1 = f1_score(y_pred,y_test)
    
    return acc_train, acc, rec, prec, f1

In [8]:
# Trying with logistic regression different regularization constants
acc_train, acc, rec, prec, f1 = alg_class(GaussianNB(), X_train, y_train, X_test, y_test)
print(("Accuracy train: " + str(acc_train)), ("Accuracy: " + str(acc)), ("Recall: " + str(rec)), ("Precision: " + str(prec)), ("F1: " + str(f1)))

Accuracy train: 0.6465449413338317 Accuracy: 0.6461584358362401 Recall: 0.6202421966868154 Precision: 0.7074949994116955 F1: 0.6610016709172457
