In [73]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import itertools
import csv
import pandas as pd
import tensorflow as tf
import random

tf.logging.set_verbosity(tf.logging.INFO)

In [51]:
#Divide columns
CONTINUOUS_COLUMNS = ["LotFrontage","LotArea","OverallQual","OverallCond","YearBuilt","YearRemodAdd","MasVnrArea","ExterQual","ExterCond",
	"BsmtQual","BsmtCond","BsmtExposure","BsmtFinType1","BsmtFinSF1","BsmtFinType2","BsmtFinSF2","BsmtUnfSF","TotalBsmtSF","HeatingQC",
	"1stFlrSF","2ndFlrSF","LowQualFinSF","GrLivArea","BsmtFullBath","BsmtHalfBath","FullBath","HalfBath","BedroomAbvGr","KitchenAbvGr",
	"KitchenQual","TotRmsAbvGrd","Functional","Fireplaces","FireplaceQu","GarageYrBlt","GarageFinish","GarageCars",
	"GarageArea","GarageQual","GarageCond","WoodDeckSF","OpenPorchSF","EnclosedPorch","3SsnPorch","ScreenPorch","PoolArea","PoolQC",
	"MiscVal","MoSold","YrSold"]
CATEGORICAL_COLUMNS = ["MSSubClass","MSZoning","Street","Alley","LotShape","LandContour","Utilities","LotConfig","LandSlope","Neighborhood",
	"Condition1","Condition2","BldgType","HouseStyle","RoofStyle","RoofMatl","Exterior1st","Exterior2nd","MasVnrType",
	"Foundation","Heating","CentralAir","Electrical","GarageType","PavedDrive","Fence","MiscFeature","SaleType","SaleCondition"]
LABEL_COLUMN = "SalePrice"
COLUMNS = CONTINUOUS_COLUMNS + CATEGORICAL_COLUMNS + [LABEL_COLUMN]
print("Continuous columns {0:2d}".format(len(CONTINUOUS_COLUMNS)))
print("Categorical columns {0:2d}".format(len(CATEGORICAL_COLUMNS)))
print("Total useful columns {0:2d}".format(len(COLUMNS)))

#global variables
max_price = 755000.0
min_price = 34900.0
scale = 1.0
price_offset = min_price
price_range = (max_price-min_price)/scale


Continuous columns 50
Categorical columns 29
Total useful columns 80


In [52]:
def input_clean(raw_data):
    data = raw_data.drop('Id', 1).copy()
    data = data.replace({                            
                            'MSSubClass':{
                                            20:'class20',
                                            30:'class30',
                                            40:'class40',
                                            45:'class45',
                                            50:'class50',
                                            60:'class60',
                                            70:'class70',
                                            75:'class75',
                                            80:'class80',
                                            85:'class85',
                                            90:'class90',
                                            120:'class120',
                                            150:'class150',
                                            160:'class160',
                                            180:'class180',
                                            190:'class190'
                                            },
                            'MSZoning': {'C (all)': 'C'
                                            },
                            'ExterQual': {'Ex': 5, 
                                            'Gd': 4, 
                                            'TA': 3, 
                                            'Fa': 2,
                                            'Po': 1
                                            },
                            'ExterCond': {'Ex': 5, 
                                            'Gd': 4, 
                                            'TA': 3, 
                                            'Fa': 2,
                                            'Po': 1
                                            },
                            'BsmtQual':{ 
                                            'Ex':5,
                                            'Gd':4,
                                            'TA':3,
                                            'Fa':2,
                                            'Po':1,
                                            'NoBsmt': 0},
                            'BsmtCond': {'Ex': 5, 
                                            'Gd': 4, 
                                            'TA': 3, 
                                            'Fa': 2,
                                            'Po': 1,
                                            'NoBsmt': 0},
                            'BsmtExposure': {'Gd':4,
                                            'Av':3,
                                            'Mn':2,
                                            'No':1,
                                            'NoBsmt':0},
                            'BsmtFinType1':{'GLQ':6,
                                            'ALQ':5,
                                            'BLQ':4,
                                            'Rec':3,
                                            'LwQ':2,
                                            'Unf':1,
                                            'NoBsmt':0},
                            'BsmtFinType2':{
                                            'GLQ':6,
                                            'ALQ':5,
                                            'BLQ':4,
                                            'Rec':3,
                                            'LwQ':2,
                                            'Unf':1,
                                            'NoBsmt':0},
                            'HeatingQC': {'Ex': 5, 
                                            'Gd': 4, 
                                            'TA': 3, 
                                            'Fa': 2,
                                            'Po': 1
                                            },
                            'KitchenQual': {'Ex': 5, 
                                            'Gd': 4, 
                                            'TA': 3, 
                                            'Fa': 2,
                                            'Po': 1,
                                            'NoKit':0},
                            'Functional': {'Typ': 8,
                                            'Min1': 7,
                                            'Min2': 6,
                                            'Mod': 5,
                                            'Maj1': 4,
                                            'Maj2': 3,
                                            'Sev': 2,
                                            'Sal': 1,
                                            'NoFunc':0},
                            'FireplaceQu': {'Ex': 5, 
                                            'Gd': 4, 
                                            'TA': 3, 
                                            'Fa': 2,
                                            'Po': 1,
                                            'NoFireplace': 0 
                                            },
                            'GarageFinish': {
                                            'Fin':3,
                                            'RFn':2,
                                            'Unf':1,
                                            'NoGarage':0
                                             },
                            'GarageQual': {'Ex': 5, 
                                            'Gd': 4, 
                                            'TA': 3, 
                                            'Fa': 2,
                                            'Po': 1,
                                            'NoGarage': 0},
                            'GarageCond': {'Ex': 5, 
                                            'Gd': 4, 
                                            'TA': 3, 
                                            'Fa': 2,
                                            'Po': 1,
                                            'NoGarage': 0},
                            'PoolQC': {'Ex':4,
                                            'Gd':3,
                                            'TA':2,
                                            'Fa':1,
                                            'NoPool':0
                                       }
                            })
    #fill NaN
    #CONTINUOUS_COLUMNS
    data['LotFrontage']=data['LotFrontage'].fillna(0)
    data['MasVnrArea']=data['MasVnrArea'].fillna(0)
    data['GarageYrBlt']=data['GarageYrBlt'].fillna(1899)
    data['BsmtFinSF1']=data['BsmtFinSF1'].fillna(0)
    data['BsmtFinSF2']=data['BsmtFinSF2'].fillna(0)
    data['BsmtUnfSF']=data['BsmtUnfSF'].fillna(0)
    data['TotalBsmtSF']=data['TotalBsmtSF'].fillna(0)
    data['BsmtFullBath']=data['BsmtFullBath'].fillna(0)
    data['BsmtHalfBath']=data['BsmtHalfBath'].fillna(0)
    data['GarageCars']=data['GarageCars'].fillna(0)
    data['GarageArea']=data['GarageArea'].fillna(0)

    #CONTINUOUS_COLUMNS==>CATEGORICAL_COLUMNS
    data['BsmtQual']=data['BsmtQual'].fillna(0)
    data['BsmtCond']=data['BsmtCond'].fillna(0)
    data['BsmtExposure']=data['BsmtExposure'].fillna(0)
    data['BsmtFinType1']=data['BsmtFinType1'].fillna(0)
    data['BsmtFinType2']=data['BsmtFinType1'].fillna(0)
    data['FireplaceQu']=data['FireplaceQu'].fillna(0)
    data['GarageFinish']=data['GarageFinish'].fillna(0)
    data['GarageQual']=data['GarageQual'].fillna(0)
    data['GarageCond']=data['GarageCond'].fillna(0)
    data['PoolQC']=data['PoolQC'].fillna(0)
    data['KitchenQual']=data['KitchenQual'].fillna(0)
    data['Functional']=data['Functional'].fillna(0)
    
    #CATEGORICAL_COLUMNS
    data['MSZoning'] = data['MSZoning'].fillna('None')
    data['Utilities'] = data['Utilities'].fillna('None')
    data['Exterior1st'] = data['Exterior1st'].fillna('None')
    data['Exterior2nd'] = data['Exterior2nd'].fillna('None')
    data['SaleType'] = data['SaleType'].fillna('None')
    data['Alley']=data['Alley'].fillna('NoAlley')
    data['MasVnrType']=data['MasVnrType'].fillna('NoMasVnr')
    data['GarageType']=data['GarageType'].fillna('NoGarage')
    data['Electrical']=data['Electrical'].fillna('NoElec')
    data['Fence']=data['Fence'].fillna('NoFc')
    data['MiscFeature']=data['MiscFeature'].fillna('NoFtr')
    
    return data

In [53]:
#Check if the COLUMNS is match with the original data
folder = '/Users/lanxing/Desktop/Machine Learning/kaggle/'
check_columns = pd.read_csv(folder+"train.csv").columns.tolist()
check_dict = {}
for l in check_columns:
    check_dict[l] = 1
for l in COLUMNS:
    if l in check_dict:
        del check_dict[l]
    else:
        check_dict[l] = -1
print(check_dict)

{'Id': 1}


In [54]:
folder = '/Users/lanxing/Desktop/Machine Learning/kaggle/'
raw_train = pd.read_csv(folder+"train.csv")
#print(raw_train.shape)
#i=0
#while i<raw_train.describe().shape[1]:
#    print(raw_train.describe().iloc[:,i:min(i+9,raw_train.describe().shape[1])])
#    i=i+10


In [40]:
#Describe the training data after cleaning
train = input_clean(raw_train)
#print(train.describe().shape[1])
#print(train.iloc[36:45,21:40])
#i=0
#while i<train.describe().shape[1]:
#    print(train.describe().iloc[:,i:min(i+9,train.describe().shape[1])])
#    i=i+10

In [41]:
#Check the cleaning data
check_columns = train.describe().columns.tolist()
check_dict = {}
for l in check_columns:
    check_dict[l] = 1
for l in CONTINUOUS_COLUMNS:
    if l in check_dict:
        del check_dict[l]
    else:
        check_dict[l] = -1
print(check_dict)
for l in train.columns.tolist():
    if train.loc[:,l].isnull().values.any():
        print(l)

{'SalePrice': 1}


In [65]:
#If input_stat.csv file does not exist, uncomment this part to calculate statistical data

input_stat = train.describe().loc[['min','max'],:]
#max_price = input_stat.loc['max','SalePrice'].tolist()
#min_price = input_stat.loc['min','SalePrice'].tolist()
input_stat.to_csv(folder+"input_stat.csv")
#print(type(max_price),type(min_price))

In [66]:
#Define function to normalize data
def input_normalize(data,istrain):
    input_stat = pd.DataFrame.from_csv(folder+"input_stat.csv",index_col = 0)
    data_normal = data.copy()
    columns = input_stat.columns
    if (istrain == False):
        columns = columns.drop(u'SalePrice')
    for k in columns:
        col_offset = input_stat.loc['min',k]
        col_range = (input_stat.loc['max',k]-input_stat.loc['min',k])/scale
        #print(k,col_min,col_range)
        data_normal[k] = (data_normal[k]-col_offset)/col_range
    max_price = input_stat.loc['max','SalePrice']*scale
    min_price = input_stat.loc['min','SalePrice']*scale
    price_offset = min_price/2
    price_range = (max_price-min_price)/scale
    return data_normal

In [67]:
#Normalize data
train_normal = input_normalize(train,True)
#print(train_normal.iloc[0:20,0:20])

In [68]:
def input_pdtotf(df,istrain):
    # Creates a dictionary mapping from each continuous feature column name (k) to
    # the values of that column stored in a constant Tensor.
    continuous_cols = {k: tf.constant(df[k].values)
                     for k in CONTINUOUS_COLUMNS}
    # Creates a dictionary mapping from each categorical feature column name (k)
    # to the values of that column stored in a tf.SparseTensor.
    categorical_cols = {k: tf.SparseTensor(
        indices=[[i, 0] for i in range(df[k].size)],
        values=df[k].values,
        shape=[df[k].size, 1])
                      for k in CATEGORICAL_COLUMNS}
    # Merges the two dictionaries into one.
    feature_cols = dict(continuous_cols.items() + categorical_cols.items())
    #feature_cols = dict(continuous_cols.items())
    # Converts the label column into a constant Tensor.
    if(istrain == True):
        label = tf.constant(df[LABEL_COLUMN].values)
        # Returns the feature columns and the label.
        return feature_cols, label
    else:
        return feature_cols

In [69]:
#categorical_options = {}
#for l in CATEGORICAL_COLUMNS:
#    categorical_options[l]=train.loc[:,l].value_counts().index.values.tolist()
#print(categorical_options)
CATEGORICAL_OPTIONS = {'MasVnrType': ['None', 'BrkFace', 'Stone', 'BrkCmn', 'NoMasVnr'], 
                       'LotConfig': ['Inside', 'Corner', 'CulDSac', 'FR2', 'FR3'], 
                       'Exterior1st': ['VinylSd', 'HdBoard', 'MetalSd', 'Wd Sdng', 'Plywood', 'CemntBd', 'BrkFace', 'WdShing', 'Stucco', 'AsbShng', 'Stone', 'BrkComm', 'AsphShn', 'ImStucc', 'CBlock','None'], 
                       'Electrical': ['SBrkr', 'FuseA', 'FuseF', 'FuseP', 'Mix', 'NoElec'], 
                       'HouseStyle': ['1Story', '2Story', '1.5Fin', 'SLvl', 'SFoyer', '1.5Unf', '2.5Unf', '2.5Fin'], 
                       'Foundation': ['PConc', 'CBlock', 'BrkTil', 'Slab', 'Stone', 'Wood'], 
                       'GarageType': ['Attchd', 'Detchd', 'BuiltIn', 'NoGarage', 'Basment', 'CarPort', '2Types'], 
                       'RoofStyle': ['Gable', 'Hip', 'Flat', 'Gambrel', 'Mansard', 'Shed'], 
                       'CentralAir': ['Y', 'N'], 
                       'Utilities': ['AllPub', 'NoSeWa','None'], 
                       'LotShape': ['Reg', 'IR1', 'IR2', 'IR3'], 
                       'MiscFeature': ['NoFtr', 'Shed', 'Othr', 'Gar2', 'TenC'], 
                       'PavedDrive': ['Y', 'N', 'P'], 
                       'LandSlope': ['Gtl', 'Mod', 'Sev'], 
                       'SaleType': ['WD', 'New', 'COD', 'ConLD', 'ConLw', 'ConLI', 'CWD', 'Oth', 'Con','None'], 
                       'Heating': ['GasA', 'GasW', 'Grav', 'Wall', 'OthW', 'Floor'], 
                       'Exterior2nd': ['VinylSd', 'MetalSd', 'HdBoard', 'Wd Sdng', 'Plywood', 'CmentBd', 'Wd Shng', 'Stucco', 'BrkFace', 'AsbShng', 'ImStucc', 'Brk Cmn', 'Stone', 'AsphShn', 'Other', 'CBlock','None'], 
                       'MSSubClass': ['class20', 'class60', 'class50', 'class120', 'class30', 'class160', 'class70', 'class80', 'class90', 'class190', 'class85', 'class75', 'class45', 'class180', 'class40'], 
                       'Condition2': ['Norm', 'Feedr', 'Artery', 'RRNn', 'PosN', 'RRAn', 'RRAe', 'PosA'], 
                       'Condition1': ['Norm', 'Feedr', 'Artery', 'RRAn', 'PosN', 'RRAe', 'PosA', 'RRNn', 'RRNe'], 
                       'LandContour': ['Lvl', 'Bnk', 'HLS', 'Low'], 
                       'RoofMatl': ['CompShg', 'Tar&Grv', 'WdShngl', 'WdShake', 'Membran', 'Metal', 'ClyTile', 'Roll'], 
                       'Neighborhood': ['NAmes', 'CollgCr', 'OldTown', 'Edwards', 'Somerst', 'Gilbert', 'NridgHt', 'Sawyer', 'NWAmes', 'SawyerW', 'BrkSide', 'Crawfor', 'Mitchel', 'NoRidge', 'Timber', 'IDOTRR', 'ClearCr', 'SWISU', 'StoneBr', 'MeadowV', 'Blmngtn', 'BrDale', 'Veenker', 'NPkVill', 'Blueste'], 
                       'Fence': ['NoFc', 'MnPrv', 'GdPrv', 'GdWo', 'MnWw'], 
                       'SaleCondition': ['Normal', 'Partial', 'Abnorml', 'Family', 'Alloca', 'AdjLand'], 
                       'BldgType': ['1Fam', 'TwnhsE', 'Duplex', 'Twnhs', '2fmCon'], 
                       'Alley': ['NoAlley', 'Grvl', 'Pave'], 
                       'Street': ['Pave', 'Grvl'], 
                       'MSZoning': ['RL', 'RM', 'FV', 'RH', 'C','None']}

In [70]:
#Defining FeatureColumns and Creating the Regressor
# Creates features column from each continuous feature column name (k) to
continuous_feature = [tf.contrib.layers.real_valued_column(k)
                  for k in CONTINUOUS_COLUMNS]
# Creates features column from each categorical feature column name (k)
#categorical_feature = [tf.contrib.layers.embedding_column(tf.contrib.layers.sparse_column_with_keys(
#                      column_name=k, keys=CATEGORICAL_OPTIONS[k]),dimension=8) for k in CATEGORICAL_COLUMNS]
categorical_feature = [tf.contrib.layers.one_hot_column(tf.contrib.layers.sparse_column_with_keys(
                      column_name=k, keys=CATEGORICAL_OPTIONS[k])) for k in CATEGORICAL_COLUMNS]
#categorical_feature = [tf.contrib.layers.embedding_column(tf.contrib.layers.sparse_column_with_hash_bucket(k, hash_bucket_size=30),dimension=30) for k in CATEGORICAL_COLUMNS]


feature_cols = continuous_feature + categorical_feature
regressor = tf.contrib.learn.DNNRegressor(
    feature_columns=feature_cols, hidden_units=[100, 100, 100, 100])


INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'save_summary_steps': 100, '_num_ps_replicas': 0, '_task_type': None, '_environment': 'local', '_is_chief': True, 'save_checkpoints_secs': 600, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x1231b1d90>, 'tf_config': gpu_options {
  per_process_gpu_memory_fraction: 1
}
, '_task_id': 0, 'tf_random_seed': None, 'keep_checkpoint_every_n_hours': 10000, '_evaluation_master': '', 'save_checkpoints_steps': None, '_master': '', 'keep_checkpoint_max': 5}


In [71]:
#Training the Regressor
regressor.fit(input_fn=lambda: input_pdtotf(train_normal,True), steps=10000)

INFO:tensorflow:Summary name dnn/hiddenlayer_0:fraction_of_zero_values is illegal; using dnn/hiddenlayer_0_fraction_of_zero_values instead.
INFO:tensorflow:Summary name dnn/hiddenlayer_0:activation is illegal; using dnn/hiddenlayer_0_activation instead.
INFO:tensorflow:Summary name dnn/hiddenlayer_1:fraction_of_zero_values is illegal; using dnn/hiddenlayer_1_fraction_of_zero_values instead.
INFO:tensorflow:Summary name dnn/hiddenlayer_1:activation is illegal; using dnn/hiddenlayer_1_activation instead.
INFO:tensorflow:Summary name dnn/hiddenlayer_2:fraction_of_zero_values is illegal; using dnn/hiddenlayer_2_fraction_of_zero_values instead.
INFO:tensorflow:Summary name dnn/hiddenlayer_2:activation is illegal; using dnn/hiddenlayer_2_activation instead.
INFO:tensorflow:Summary name dnn/hiddenlayer_3:fraction_of_zero_values is illegal; using dnn/hiddenlayer_3_fraction_of_zero_values instead.
INFO:tensorflow:Summary name dnn/hiddenlayer_3:activation is illegal; using dnn/hiddenlayer_3_acti

DNNRegressor(hidden_units=[100, 100, 100, 100], dropout=None, optimizer=None, feature_columns=[_RealValuedColumn(column_name='LotFrontage', dimension=1, default_value=None, dtype=tf.float32, normalizer=None), _RealValuedColumn(column_name='LotArea', dimension=1, default_value=None, dtype=tf.float32, normalizer=None), _RealValuedColumn(column_name='OverallQual', dimension=1, default_value=None, dtype=tf.float32, normalizer=None), _RealValuedColumn(column_name='OverallCond', dimension=1, default_value=None, dtype=tf.float32, normalizer=None), _RealValuedColumn(column_name='YearBuilt', dimension=1, default_value=None, dtype=tf.float32, normalizer=None), _RealValuedColumn(column_name='YearRemodAdd', dimension=1, default_value=None, dtype=tf.float32, normalizer=None), _RealValuedColumn(column_name='MasVnrArea', dimension=1, default_value=None, dtype=tf.float32, normalizer=None), _RealValuedColumn(column_name='ExterQual', dimension=1, default_value=None, dtype=tf.float32, normalizer=None), _

In [72]:
#Evaluating the Model
raw_cv = raw_test = pd.read_csv(folder+"train_cv2.csv")
cv = input_clean(raw_cv)
cv_normal = input_normalize(cv,True)
ev = regressor.evaluate(input_fn=lambda: input_pdtotf(cv_normal ,True), steps=1)

loss_score = ev["loss"]
print("Loss: {0:f}".format(loss_score))

INFO:tensorflow:Summary name dnn/hiddenlayer_0:fraction_of_zero_values is illegal; using dnn/hiddenlayer_0_fraction_of_zero_values instead.
INFO:tensorflow:Summary name dnn/hiddenlayer_0:activation is illegal; using dnn/hiddenlayer_0_activation instead.
INFO:tensorflow:Summary name dnn/hiddenlayer_1:fraction_of_zero_values is illegal; using dnn/hiddenlayer_1_fraction_of_zero_values instead.
INFO:tensorflow:Summary name dnn/hiddenlayer_1:activation is illegal; using dnn/hiddenlayer_1_activation instead.
INFO:tensorflow:Summary name dnn/hiddenlayer_2:fraction_of_zero_values is illegal; using dnn/hiddenlayer_2_fraction_of_zero_values instead.
INFO:tensorflow:Summary name dnn/hiddenlayer_2:activation is illegal; using dnn/hiddenlayer_2_activation instead.
INFO:tensorflow:Summary name dnn/hiddenlayer_3:fraction_of_zero_values is illegal; using dnn/hiddenlayer_3_fraction_of_zero_values instead.
INFO:tensorflow:Summary name dnn/hiddenlayer_3:activation is illegal; using dnn/hiddenlayer_3_acti

In [60]:
raw_test = pd.read_csv(folder+"test.csv")
test_size = raw_test.shape[0]
test = input_clean(raw_test)
if "SalePrice" in test.columns:
    test_normal = input_normalize(test,True).drop("SalePrice",1)
else:
    test_normal = input_normalize(test,False)
#for k in test_normal.columns:
#    print(k,test_normal[k].isnull().sum())
#test_normal.isnull().values.any()    
y = regressor.predict(input_fn=lambda: input_pdtotf(test_normal,False))
predictions_scaled = list(itertools.islice(y, test_size))
#print ("Predictions: {}".format(str(predictions_scaled)))
predictions = pd.DataFrame(predictions_scaled) * price_range + price_offset
print(predictions)

Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
  est = Estimator(...) -> est = SKCompat(Estimator(...))
Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
  est = Estimator(...) -> est = SKCompat(Estimator(...))
Instructions for updating:
Estimator is decoupled from Scikit Learn interface by moving into
separate class SKCompat. Arguments x, y and batch_size are only
available in the SKCompat class, Estimator will only accept input_fn.
Example conversion:
  est = Estimator(...) -> est = SKCompat(Estimator(...))
INFO:tensorflow:Summary name dnn/hiddenlayer_0:fraction_of_zero_values is illegal; using

In [64]:
result_list = predictions[0].tolist()
id = 1461
filename = "result/lx_result3.csv"
with open(folder+filename, 'wb') as csvfile:
    fieldnames = ['Id', 'SalePrice']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    writer.writeheader()
    for row in result_list:
        writer.writerow({'Id': str(id), 'SalePrice': str(row)})
        id=id+1



In [85]:
raw_train_size = raw_train.shape[0]
use_rate = 0.8
rows = random.sample(train.index, int(raw_train_size*(1-use_rate)))

train_del = train.ix[rows]

train_rest = train.drop(rows)
print (train_del.iloc[:,0:5])

     MSSubClass MSZoning  LotFrontage  LotArea Street
447     class60       RL          0.0    11214   Pave
716     class70       RM         60.0    10800   Pave
170     class50       RM          0.0    12358   Pave
984     class90       RL         75.0    10125   Pave
321     class60       RL         99.0    12099   Pave
214     class60       RL          0.0    10900   Pave
467     class70       RL         79.0     9480   Pave
416     class60       RL         74.0     7844   Pave
998     class30       RM         60.0     9786   Pave
536     class60       RL         57.0     8924   Pave
337     class20       RL         70.0     9135   Pave
874     class50       RM         52.0     5720   Pave
186     class80       RL          0.0     9947   Pave
334     class60       RL         59.0     9042   Pave
824     class20       FV         81.0    11216   Pave
725     class20       RL         60.0     6960   Pave
1135    class30       RM         60.0     6180   Pave
397     class60       RL    

In [86]:
print(train_rest.iloc[0:20,0:5])

   MSSubClass MSZoning  LotFrontage  LotArea Street
1     class20       RL         80.0     9600   Pave
2     class60       RL         68.0    11250   Pave
3     class70       RL         60.0     9550   Pave
4     class60       RL         84.0    14260   Pave
5     class50       RL         85.0    14115   Pave
6     class20       RL         75.0    10084   Pave
7     class60       RL          0.0    10382   Pave
8     class50       RM         51.0     6120   Pave
9    class190       RL         50.0     7420   Pave
11    class60       RL         85.0    11924   Pave
12    class20       RL          0.0    12968   Pave
13    class20       RL         91.0    10652   Pave
14    class20       RL          0.0    10920   Pave
15    class45       RM         51.0     6120   Pave
16    class20       RL          0.0    11241   Pave
19    class20       RL         70.0     7560   Pave
20    class60       RL        101.0    14215   Pave
21    class45       RM         57.0     7449   Pave
22    class2