In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor


In [2]:
# importing data
# Path of the file to read. 
iowa_file_path = 'dataset/train.csv'
home_data = pd.read_csv(iowa_file_path)



In [3]:
home_data.columns

Index(['Id', 'MSSubClass', 'MSZoning', 'LotFrontage', 'LotArea', 'Street',
       'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig',
       'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType',
       'HouseStyle', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd',
       'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType',
       'MasVnrArea', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual',
       'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinSF1',
       'BsmtFinType2', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', 'Heating',
       'HeatingQC', 'CentralAir', 'Electrical', '1stFlrSF', '2ndFlrSF',
       'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath',
       'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'KitchenQual',
       'TotRmsAbvGrd', 'Functional', 'Fireplaces', 'FireplaceQu', 'GarageType',
       'GarageYrBlt', 'GarageFinish', 'GarageCars', 'GarageArea', 'GarageQual',
       'GarageCond', 'PavedDrive

In [4]:
home_data.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,0,,,,0,12,2008,WD,Normal,250000


In [32]:
# Create target object y
y = home_data.SalePrice
# Create X : relevant features list
features = ['LotArea', 'YearBuilt', '1stFlrSF', '2ndFlrSF', 'FullBath', 'BedroomAbvGr', 'TotRmsAbvGrd']

X = home_data[features]

def different_random_states(random_states_lst):
    mae_lst = []
    for random_state in random_states_lst:
        # Split into validation and training data
        train_X, val_X, train_y, val_y = train_test_split(X, y, random_state=random_state)

        # Define the model. Set random_state to 1
        rf_model = RandomForestRegressor(random_state=1)
        rf_model.fit(train_X, train_y)
        rf_val_predictions = rf_model.predict(val_X)
        rf_val_mae = mean_absolute_error(rf_val_predictions, val_y)
        mae_lst.append([rf_val_mae,random_state])
        #print("Validation MAE for Random Forest Model: {:,.0f}".format(rf_val_mae)) 
    return mae_lst


In [37]:
random_states = list(range(31))
values = different_random_states(random_states)

In [43]:
print("Different random states [mae, random state value] :")
for lst in values:
    print(lst[0],"\t",lst[1])
    
print("\n\n\n")
lst = sorted(values, key=lambda lst: lst[0])
for x in lst:
    print(x[0],"\t",x[1])

Different random states [mae, random state value] :
23009.206570906717 	 0
21857.15912981083 	 1
25050.85186692759 	 2
23845.218011741686 	 3
21395.41274063927 	 4
20346.30346692759 	 5
24480.964785388132 	 6
23855.328972602736 	 7
24366.80298630137 	 8
20889.93352276582 	 9
23820.532693150686 	 10
21112.480996216567 	 11
24701.39672433138 	 12
26201.151516634054 	 13
23164.297744292235 	 14
22792.569819178083 	 15
25236.450893933463 	 16
23861.031711676453 	 17
20834.293629223743 	 18
23145.04830332681 	 19
21360.10439791259 	 20
22467.34711937378 	 21
22100.08936073059 	 22
21256.730616379056 	 23
22352.22969080235 	 24
24329.570554990216 	 25
22867.75659634703 	 26
21460.910977168947 	 27
22448.818997390736 	 28
23354.34214181344 	 29
24411.75777364645 	 30




20346.30346692759 	 5
20834.293629223743 	 18
20889.93352276582 	 9
21112.480996216567 	 11
21256.730616379056 	 23
21360.10439791259 	 20
21395.41274063927 	 4
21460.910977168947 	 27
21857.15912981083 	 1
22100.08936073059 