# Forest Cover Type

# Import Statements

In [None]:
import pandas as pd, numpy as np
import joblib

# Load Data

In [None]:
# Load the test data set
pd.set_option('max_columns', None)
test = pd.read_csv('../input/forest-cover-type-prediction/test.csv')
# Print the shape of the test set 
print(test.shape)

In [None]:
# view 
test.head()

In [None]:
# remove ID column from set
test = test.iloc[:, 1:]
test.head()

In [None]:
# add new features from feature engineering
test['Elev_to_Horizontal_Hyd'] = test.Elevation - 0.2 * test.Horizontal_Distance_To_Hydrology 
test['Elev_to_Horizontal_Road'] = test.Elevation - 0.05 * test.Horizontal_Distance_To_Roadways  
test['Elev_to_Verticle_Hyd'] = test.Elevation - test.Vertical_Distance_To_Hydrology 
test['Mean_Horizontal_Dist'] = (test.Horizontal_Distance_To_Fire_Points + test.Horizontal_Distance_To_Hydrology + 
                                 test.Horizontal_Distance_To_Roadways)/3 
test['Mean_Fire_Hydro'] = (test.Horizontal_Distance_To_Fire_Points + test.Horizontal_Distance_To_Hydrology)/2

# Check for Missing Values

In [None]:
# check for missing values
test.isnull().values.any()

# Preprocessing

In [None]:
# dimensions of data set 
print(test.shape) # 59 columns
# column names
print(test.columns)

In [None]:
# create cat and num
X_cat = test.iloc[:, 10:54].values
B = test.iloc[:, 54:59]
A = test.iloc[:, 0:10]
X_num = pd.concat([A, B], axis = 1).values

In [None]:
# load scaler
scaler = joblib.load('../input/forest-cover-models/forest_cover_scaler2.joblib')
# apply scaler to test data
X_num = scaler.transform(X_num)

In [None]:
# combine num and cat
X = np.hstack((X_num, X_cat))
print(X.shape)

# Model Predictions

In [None]:
# load model
model = joblib.load('../input/g2-forestcovertype-finalmodels/tree_model_final.joblib')

In [None]:
# make predictions using model
y_pred = model.predict(X)

# Submission

In [None]:
# write to submission
submission = pd.read_csv('../input/forest-cover-type-prediction/sampleSubmission.csv')
# view first few rows
submission.head()

In [None]:
# add predictions to file
submission['Cover_Type'] = y_pred
# view first few rows
submission.head()

In [None]:
# export to csv file
submission.to_csv('submission.csv', index = False, header = True)