In [49]:
import numpy as np
import warnings
warnings.filterwarnings('ignore')

from sklearn.base import BaseEstimator,TransformerMixin, ClassifierMixin
from sklearn.preprocessing import LabelEncoder
import xgboost as xgb
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.linear_model import ElasticNetCV, LassoLarsCV
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.pipeline import make_pipeline, make_union
from sklearn.utils import check_array
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.random_projection import GaussianRandomProjection
from sklearn.random_projection import SparseRandomProjection
from sklearn.decomposition import PCA, FastICA
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics import r2_score



## Define a stacking estimator class ##

In [2]:
class StackingEstimator(BaseEstimator, TransformerMixin):
    
    def __init__(self, estimator):
        self.estimator = estimator
    
    def fit(self, X, y=None, **fit_params):
        self.estimator.fit(X, y, **fit_params)
        return self
        
    def transform(self, X):    
        X = check_array(X)
        X_transformed = np.copy(X)
        if issubclass(self.estimator.__class__, ClassifierMixin) and hasattr(self.estimator, 'predict_proba'):
            X_transformed = np.hstack((self.estimator.predict_proba(X), X))
            
        X_transformed = np.hstack((np.reshape(self.estimator.predict(X), (-1, 1)), X_transformed))
        
        return X_transformed
    


## Read train and test dataset ##

In [25]:
train_org = pd.read_csv('../input/train.csv')
test_org = pd.read_csv('../input/test.csv')

In [26]:
train_org.head()

Unnamed: 0,ID,y,X0,X1,X2,X3,X4,X5,X6,X8,...,X375,X376,X377,X378,X379,X380,X382,X383,X384,X385
0,0,130.81,k,v,at,a,d,u,j,o,...,0,0,1,0,0,0,0,0,0,0
1,6,88.53,k,t,av,e,d,y,l,o,...,1,0,0,0,0,0,0,0,0,0
2,7,76.26,az,w,n,c,d,x,j,x,...,0,0,0,0,0,0,1,0,0,0
3,9,80.62,az,t,n,f,d,x,l,e,...,0,0,0,0,0,0,0,0,0,0
4,13,78.02,az,v,n,f,d,h,d,n,...,0,0,0,0,0,0,0,0,0,0


## Data preprocessing 1 ##

In [27]:
# Convert categorical features (Column X0:X8) to label values using LabelEncoder 
# Encode labels with values from n to n_classes - 1

train_label = train_org.copy()
test_label = test_org.copy()
for c in train_label.columns:
    if train_label[c].dtypes == 'object':
        lbe = LabelEncoder()
        lbe.fit(list(train_label[c].values) + list(test_label[c].values))
        train_label[c] = lbe.transform(list(train_label[c].values))
        test_label[c] = lbe.transform(list(test_label[c].values))
train_label.head()

Unnamed: 0,ID,y,X0,X1,X2,X3,X4,X5,X6,X8,...,X375,X376,X377,X378,X379,X380,X382,X383,X384,X385
0,0,130.81,37,23,20,0,3,27,9,14,...,0,0,1,0,0,0,0,0,0,0
1,6,88.53,37,21,22,4,3,31,11,14,...,1,0,0,0,0,0,0,0,0,0
2,7,76.26,24,24,38,2,3,30,9,23,...,0,0,0,0,0,0,1,0,0,0
3,9,80.62,24,21,38,5,3,30,11,4,...,0,0,0,0,0,0,0,0,0,0
4,13,78.02,24,23,38,5,3,14,3,13,...,0,0,0,0,0,0,0,0,0,0


## Data preprocessing 2 ##

In [54]:
# Convert categorical features (Column X0:X8) to Dummy Variables using One-Hot_Encoding

temp = pd.concat([train_org, test_org])
temp = pd.get_dummies(temp)
train_dummy = temp.iloc[:4209]
test_dummy = temp.iloc[4209:]

# Since train dataset has 'y' column and test has not, concatenate operation will
# create a new column with values for test data are all none
# When recreate the test dataset, this dummay column needs to be removed
test_dummy.drop('y', axis=1, inplace=True)

len(train_dummy.columns), len(test_dummy.columns)

(581, 580)

In [52]:
train_dummy.head()

Unnamed: 0,ID,X10,X100,X101,X102,X103,X104,X105,X106,X107,...,X8_p,X8_q,X8_r,X8_s,X8_t,X8_u,X8_v,X8_w,X8_x,X8_y
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,6,0,1,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,7,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
3,9,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,13,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Magic features ##

## Principle Components Analysis ##

## Cross Validation and Grid Search ##