In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import Ridge
from sklearn.linear_model import HuberRegressor
from sklearn.linear_model import Lasso
from sklearn.linear_model import BayesianRidge
from sklearn.linear_model import ARDRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LassoLars
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV
from scipy.stats import skew

## Zillow - Linear Models
We'll see how far we can get with some simple linear models here. First things first, load up and manipulate the data.

In [2]:
properties = pd.read_csv('../data/propertiesV2.csv')
transactions = pd.read_csv('../data/train_2016_v2.csv')

  interactivity=interactivity, compiler=compiler, result=result)


Let's do some additional preprocessing. We have a few categorical features that can probably be improved.

In [3]:
id_cols = [x for x in properties.columns if 'id' in x]
for c in id_cols:
    properties[c] = properties[c].astype('category')

In [4]:
properties['fips'] = properties['fips'].astype('category')
properties['rawcensustractandblock'] = properties['rawcensustractandblock'].astype('category')
properties['censustractandblock'] = properties['censustractandblock'].astype('category')

In [5]:
def cut_bad(series, count_value = 1000):
    bad_ids = (series.value_counts() < count_value)
    bad_ids = bad_ids[bad_ids == True]
    series = series.apply(lambda x: 32767 if x in bad_ids else x)
    return series

Here we're just creating dummy categories that contain categories with low value counts

In [6]:
%matplotlib inline
properties['airconditioningtypeid'] = cut_bad(properties['airconditioningtypeid'], 1000)
properties['architecturalstyletypeid'] = cut_bad(properties['architecturalstyletypeid'], 200)
properties['buildingclasstypeid'] = cut_bad(properties['buildingclasstypeid'], 3000)
properties['buildingqualitytypeid'] = cut_bad(properties['buildingqualitytypeid'], 3500)
properties['heatingorsystemtypeid'] = cut_bad(properties['heatingorsystemtypeid'], 1000)
properties['propertylandusetypeid'] = cut_bad(properties['propertylandusetypeid'])
properties['rawcensustractandblock'] = cut_bad(properties['rawcensustractandblock'], 400)
properties['regionidcity'] = cut_bad(properties['regionidcity'], 1500)
properties['regionidneighborhood'] = cut_bad(properties['regionidneighborhood'], 1500)
properties['regionidzip'] = cut_bad(properties['regionidzip'], 1500)
properties['typeconstructiontypeid'] = cut_bad(properties['typeconstructiontypeid'])
properties['censustractandblock'] = cut_bad(properties['censustractandblock'], 400)
properties['propertyzoningdesc'] = cut_bad(properties['propertyzoningdesc'], 1000)

In [7]:
num_ids = [c for c in properties.columns if str(properties[c].dtype) in ['float64','int64']]
bad_cols = (properties[num_ids].isnull().sum()/properties.shape[0] > 0.75)
bad_cols = bad_cols[bad_cols == True]
properties = properties.drop(bad_cols.index, axis=1)

In [8]:
from sklearn.linear_model import LinearRegression
train = transactions.merge(properties, how='left', on='parcelid')

In [17]:
def impute_nulls(series_name):

    lin = LinearRegression()
    avg_logerror = train[train[series_name].isnull()]['logerror'].abs().mean()
    df = train[train[series_name].notnull()][[series_name,'logerror']]
    df['logerror_abs'] = df['logerror'].abs()
    df['logerror_squared'] = df['logerror']**2
    lin.fit(df[['logerror_abs','logerror_squared']], df[series_name])
    impute_value = lin.predict([avg_logerror, avg_logerror**2])
    return impute_value
 
print (int(impute_nulls('yearbuilt')))
#print (impute_nulls('bathroom_num'))
print (int(impute_nulls('garage_num')))
print (impute_nulls('garage_area'))
print (int(impute_nulls('unitcnt')))
print (impute_nulls('fullbathcnt'))
print (int(impute_nulls('bedroom_num')))

train[train['garage_num'].isnull()].shape




1965
1
[ 348.48356738]
1
[ 2.25878704]




ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

In [31]:
properties['yearbuilt'] = properties['yearbuilt'].fillna(int(impute_nulls('yearbuilt')))
properties['garage_num'] = properties['garage_num'].fillna(int(impute_nulls('garage_num')))
properties['garage_area'] = properties['garage_area'].fillna(impute_nulls('garage_area'))
properties['unitcnt'] = properties['unitcnt'].fillna(int(impute_nulls('unitcnt')))
properties['fullbathcnt'] = properties['fullbathcnt'].fillna(impute_nulls('fullbathcnt'))
properties['bathroom_num'] = properties['bathroom_num'].fillna(int(impute_nulls('bathroom_num')))
properties['bedroom_num'] = properties['bedroom_num'].fillna(int(impute_nulls'bedroom_num'))
properties['hashottuborspa'] = properties['hashottuborspa'].fillna(False).astype(int)

In [21]:
num_ids = [c for c in properties.columns if str(properties[c].dtype) in ['float64','int64']]
properties[num_ids] = properties[num_ids].fillna(properties[num_ids].mean())

In [22]:
categories = properties.select_dtypes(include=['category']).columns

In [23]:
for c in categories:
    properties[c] = properties[c].cat.add_categories([32767]).fillna(32767)

In [24]:
properties['avg_garage_area'] = properties['avg_garage_area'].apply(lambda x: 0 if x == np.inf else x)
properties = properties.fillna(32767)

In [25]:
def is_numeric(value):
    try:
        float(value)
        return True
    except:
        return False

In [26]:
for c in properties.select_dtypes(include=['object','category']).columns:
    properties[c] = properties[c].apply(lambda x: int(float(x)) if is_numeric(x) else x)
    properties[c] = properties[c].astype('category')

In [27]:
properties.to_csv('../data/propertiesV3.csv', index=False)

## Load Here

In [3]:
properties = pd.read_csv('../data/propertiesV3.csv')
transactions = pd.read_csv('../data/train_2016_v2.csv')

KeyboardInterrupt: 

In [3]:
def fix_dtypes(df):
    id_cols = [x for x in df.columns if 'id' in x]
    for c in id_cols:
        df[c] = df[c].astype('category')

    df['fips'] = df['fips'].astype('category')
    df['rawcensustractandblock'] = df['rawcensustractandblock'].astype('category')
    df['censustractandblock'] = df['censustractandblock'].astype('category')

In [4]:
def ohe_chunk(df, chunks=10):
    current_index = 0
    count = 0
    chunk_size = df.shape[0]//chunks
    ohe = pd.DataFrame()
    
    while current_index < df.shape[0]:
        count += 1
        print ("Encoding Chunk {}".format(count))
        
        chunk_ohe = pd.get_dummies(df.iloc[current_index:current_index+chunk_size])
        
        ohe = pd.concat([ohe, chunk_ohe], ignore_index=True).fillna(0)
        
        current_index += chunk_size
        
    return ohe
        
        

In [5]:
def is_numeric(value):
    try:
        float(value)
        return True
    except:
        return False

In [6]:
fix_dtypes(properties)
for c in properties.select_dtypes(include=['object','category']).columns:
    properties[c] = properties[c].apply(lambda x: int(float(x)) if is_numeric(x) else x)
    properties[c] = properties[c].astype('category')
    
properties = properties.drop(['propertycountylandusecode','propertyzoningdesc'], axis=1)

In [7]:
properties_float = properties.select_dtypes(include=['float64']).drop(['latitude','longitude'], axis=1).columns
import seaborn as sns
i = 1

for c in properties_float:
    skewness = skew(properties[c])
    print ("Skewness of {}: {}".format(c, skewness))
    
    log_skewness = skew(np.log(properties[c] + 1))
    print ("Log Skewness of {}: {}".format(c, log_skewness))
    
    if not np.isnan(log_skewness):
        if abs(log_skewness) < abs(skewness):
            print ("Log Transforming {}".format(c))
            properties[c] = np.log(properties[c] + 1)
    

Skewness of bathroom_num: 1.1753124213630821
Log Skewness of bathroom_num: -0.8463518856676301
Log Transforming bathroom_num
Skewness of bedroom_num: 0.5022837557299848
Log Skewness of bedroom_num: -1.6878738002131608
Skewness of bathroom_area: 1.7277062581044489
Log Skewness of bathroom_area: 0.18272412099740729
Log Transforming bathroom_area
Skewness of calculated_home_area: 161.1725089827191
Log Skewness of calculated_home_area: 0.43536873177108926
Log Transforming calculated_home_area
Skewness of finished_area: 20.57865885985437
Log Skewness of finished_area: 0.2701517259854964
Log Transforming finished_area
Skewness of fullbathcnt: 1.2008703502933518
Log Skewness of fullbathcnt: -0.8181401434503617
Log Transforming fullbathcnt
Skewness of garage_num: 1.523623738532251
Log Skewness of garage_num: 1.0626358207685795
Log Transforming garage_num
Skewness of garage_area: 2.1193873994235997
Log Skewness of garage_area: 1.2911730267259134
Log Transforming garage_area
Skewness of lotsize_

  if __name__ == '__main__':
  if __name__ == '__main__':


Log Skewness of unfinished_area_pct: nan
Skewness of unfinished_area: 873.4643683661061
Log Skewness of unfinished_area: nan
Skewness of avg_garage_area: 1.4771700370915435
Log Skewness of avg_garage_area: 1.2888289148494283
Log Transforming avg_garage_area
Skewness of null_pct: 4.230849885543345
Log Skewness of null_pct: 3.61786632035335
Log Transforming null_pct


In [12]:
train = transactions.merge(properties, how='left', on='parcelid')
fix_dtypes(train)

In [11]:
del train

In [9]:
predictors = [c for c in train.columns if c not in ['parcelid','transactiondate','logerror']]
label = 'logerror'

In [None]:
ohe_properties = ohe_chunk(properties[predictors], chunks=20)

Encoding Chunk 1
Encoding Chunk 2
Encoding Chunk 3
Encoding Chunk 4
Encoding Chunk 5
Encoding Chunk 6
Encoding Chunk 7
Encoding Chunk 8
Encoding Chunk 9
Encoding Chunk 10
Encoding Chunk 11
Encoding Chunk 12
Encoding Chunk 13
Encoding Chunk 14
Encoding Chunk 15
Encoding Chunk 16
Encoding Chunk 17
Encoding Chunk 18
Encoding Chunk 19
Encoding Chunk 20
Encoding Chunk 21


In [None]:
ohe_properties.to_csv('../data/ohe_propertiesV1.csv', index=False)

### Load Here

In [4]:
transactions = pd.read_csv('../data/train_2016_v2.csv')
properties = pd.read_csv('../data/propertiesV3.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [5]:
ohe_properties = pd.read_csv('../data/ohe_propertiesV1.csv', chunksize=10000)

In [6]:
train_indices = properties[properties['parcelid'].isin(transactions['parcelid'])].index

In [7]:
i = 0
train_dfs = []
for chunk in ohe_properties:
    i += 1
    print('Extracting from chunk {}'.format(i))
    current_indices = train_indices[(train_indices <= max(chunk.index)) & (train_indices >= min(chunk.index))]
    current = chunk.ix[current_indices, :]
    current['parcelid'] = properties.iloc[current_indices]['parcelid']

    train_dfs.append(current)  

Extracting from chunk 1
Extracting from chunk 2
Extracting from chunk 3
Extracting from chunk 4
Extracting from chunk 5
Extracting from chunk 6
Extracting from chunk 7
Extracting from chunk 8
Extracting from chunk 9
Extracting from chunk 10
Extracting from chunk 11
Extracting from chunk 12
Extracting from chunk 13
Extracting from chunk 14
Extracting from chunk 15
Extracting from chunk 16
Extracting from chunk 17
Extracting from chunk 18
Extracting from chunk 19
Extracting from chunk 20
Extracting from chunk 21
Extracting from chunk 22
Extracting from chunk 23
Extracting from chunk 24
Extracting from chunk 25
Extracting from chunk 26
Extracting from chunk 27
Extracting from chunk 28
Extracting from chunk 29
Extracting from chunk 30
Extracting from chunk 31
Extracting from chunk 32
Extracting from chunk 33
Extracting from chunk 34
Extracting from chunk 35
Extracting from chunk 36
Extracting from chunk 37
Extracting from chunk 38
Extracting from chunk 39
Extracting from chunk 40
Extractin

In [8]:
ohe_train = pd.concat(train_dfs)
del train_dfs

In [9]:
ohe_train = ohe_train.merge(transactions, how='left', on='parcelid')

In [10]:
ohe_predictors = [c for c in ohe_train.columns if c not in ['parcelid','transactiondate','logerror']]

### Testing Linear Models
We'll test some linear models and linear transformations on the data

In [56]:
np.mean(cross_val_score(Ridge(), ohe_train[ohe_predictors], ohe_train['logerror'], scoring='neg_mean_absolute_error', cv=10))

-0.070289791140407495

In [57]:
pca = PCA(random_state=42)
non_outliers = ohe_train[ohe_train['logerror'].abs() < np.percentile(ohe_train['logerror'].abs(),98)].index
pca_reduced = pca.fit(ohe_train[ohe_predictors].iloc[non_outliers]).transform(ohe_train[ohe_predictors])

In [46]:
def find_optimal_component_num(clf, reduced, label, early_stopping=None):
    pf = PolynomialFeatures()
    count = 0
    last_best_score = -1.
    best_degree = 1
    last_best_n = 0
    for i in range(len(reduced)):
        for degree in [1,2]:
            print ("{} Components".format(i+1))
            print ("Basis Expansion (Degree 2):{}".format(degree))
            data = reduced[:, :i+1]
            if degree == 2:
                data = pf.fit_transform(data)

            score = np.mean(cross_val_score(clf, data, label, cv=5, scoring='neg_mean_absolute_error'))

            print ("Score: {}".format(score))
            if score > last_best_score:
                last_best_score = score
                last_best_n = i+1
                last_best_degree = best_degree
                count = 0
            else:
                count += 1

            if early_stopping:
                if count > early_stopping:
                    break
    
    return {'Components':last_best_n,'Score':last_best_score}

In [68]:
def linear_estimator_finder(clfs, pca_transformed, label, max_components=200,step=10):
    best_estimator = clfs[0]
    best_c_num = step
    best_score = -1
    
    for i in range(step, max_components+1, step):
        print ("Running Tests on {} Components".format(i))
        for clf in clfs:
            score = np.mean(cross_val_score(clf, pca_reduced[:,:i], label, scoring='neg_mean_absolute_error',cv=7))
            if score > best_score:
                best_score = score
                best_c_num = i
                best_estimator = clf
    
    return {'best_estimator':best_estimator, 'optimal_component_number':best_c_num, 'best_score':best_score}
    
    

In [90]:
classifiers = [Lasso(),
              Ridge(),
              ElasticNet(),
              HuberRegressor(),
              BayesianRidge(),
              LassoLars()]

optimal_results = linear_estimator_finder(classifiers, pca_reduced, ohe_train['logerror'])

Running Tests on 10 Components
Running Tests on 20 Components


KeyboardInterrupt: 

In [58]:
np.mean(cross_val_score(HuberRegressor(), pca_reduced[:,:2], ohe_train['logerror'], scoring='neg_mean_absolute_error',cv=7))

-0.068239718251451129

In [91]:
classifiers = [Lasso(),
              Ridge(),
              ElasticNet(),
              HuberRegressor(),
              BayesianRidge(),
              LassoLars()]

optimal_results = linear_estimator_finder(classifiers, pca_reduced, ohe_train['logerror'], max_components=10,step=1)

Running Tests on 1 Components
Running Tests on 2 Components
Running Tests on 3 Components
Running Tests on 4 Components
Running Tests on 5 Components
Running Tests on 6 Components
Running Tests on 7 Components
Running Tests on 8 Components
Running Tests on 9 Components
Running Tests on 10 Components


In [92]:
print (optimal_results)

{'best_score': -0.068164570338328428, 'optimal_component_number': 2, 'best_estimator': HuberRegressor(alpha=0.0001, epsilon=1.35, fit_intercept=True, max_iter=100,
        tol=1e-05, warm_start=False)}


Based on our preliminary tests, it looks like PCA reduced data with 2 components in use provided the best score.

* Best Score: 0.68205
* Best Algo: Huber
* Optimal Components: 2

We also tried it with PCA that had been fit on the data that represented 98% of the absolute logerror:

* Best Score: 0.68165
* Best Algo: Huber
* Optimal Components: 2

It looks as though eliminating those outliers helps PCA find more reasonable solutions here.

Now we'll tune the Huber Regressor to see if we can get the score even lower

In [107]:
N_COMPONENTS = 2
param_grid = {
    'alpha':[0.01,0.05,0.1,0.5,1.0,10.0],
    'epsilon':[1 + i/10 for i in range(0,30)]
}

gsearch = GridSearchCV(HuberRegressor(), param_grid, scoring='neg_mean_absolute_error', cv=10, verbose=2)

In [108]:
gsearch.fit(pca_reduced[:,:N_COMPONENTS], ohe_train['logerror'])

Fitting 10 folds for each of 180 candidates, totalling 1800 fits
[CV] alpha=0.01, epsilon=1.0 .........................................
[CV] ................................ alpha=0.01, epsilon=1.0 -   0.0s
[CV] alpha=0.01, epsilon=1.0 .........................................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.3s remaining:    0.0s


[CV] ................................ alpha=0.01, epsilon=1.0 -   0.0s
[CV] alpha=0.01, epsilon=1.0 .........................................
[CV] ................................ alpha=0.01, epsilon=1.0 -   0.0s
[CV] alpha=0.01, epsilon=1.0 .........................................
[CV] ................................ alpha=0.01, epsilon=1.0 -   0.0s
[CV] alpha=0.01, epsilon=1.0 .........................................
[CV] ................................ alpha=0.01, epsilon=1.0 -   0.0s
[CV] alpha=0.01, epsilon=1.0 .........................................
[CV] ................................ alpha=0.01, epsilon=1.0 -   0.0s
[CV] alpha=0.01, epsilon=1.0 .........................................
[CV] ................................ alpha=0.01, epsilon=1.0 -   0.0s
[CV] alpha=0.01, epsilon=1.0 .........................................
[CV] ................................ alpha=0.01, epsilon=1.0 -   0.0s
[CV] alpha=0.01, epsilon=1.0 .........................................
[CV] .

[CV] ................................ alpha=0.01, epsilon=1.5 -   0.0s
[CV] alpha=0.01, epsilon=1.6 .........................................
[CV] ................................ alpha=0.01, epsilon=1.6 -   0.0s
[CV] alpha=0.01, epsilon=1.6 .........................................
[CV] ................................ alpha=0.01, epsilon=1.6 -   0.0s
[CV] alpha=0.01, epsilon=1.6 .........................................
[CV] ................................ alpha=0.01, epsilon=1.6 -   0.0s
[CV] alpha=0.01, epsilon=1.6 .........................................
[CV] ................................ alpha=0.01, epsilon=1.6 -   0.0s
[CV] alpha=0.01, epsilon=1.6 .........................................
[CV] ................................ alpha=0.01, epsilon=1.6 -   0.0s
[CV] alpha=0.01, epsilon=1.6 .........................................
[CV] ................................ alpha=0.01, epsilon=1.6 -   0.0s
[CV] alpha=0.01, epsilon=1.6 .........................................
[CV] .

[CV] ................................ alpha=0.01, epsilon=2.1 -   0.0s
[CV] alpha=0.01, epsilon=2.1 .........................................
[CV] ................................ alpha=0.01, epsilon=2.1 -   0.0s
[CV] alpha=0.01, epsilon=2.1 .........................................
[CV] ................................ alpha=0.01, epsilon=2.1 -   0.0s
[CV] alpha=0.01, epsilon=2.2 .........................................
[CV] ................................ alpha=0.01, epsilon=2.2 -   0.0s
[CV] alpha=0.01, epsilon=2.2 .........................................
[CV] ................................ alpha=0.01, epsilon=2.2 -   0.0s
[CV] alpha=0.01, epsilon=2.2 .........................................
[CV] ................................ alpha=0.01, epsilon=2.2 -   0.0s
[CV] alpha=0.01, epsilon=2.2 .........................................
[CV] ................................ alpha=0.01, epsilon=2.2 -   0.0s
[CV] alpha=0.01, epsilon=2.2 .........................................
[CV] .

[CV] ................................ alpha=0.01, epsilon=2.7 -   0.0s
[CV] alpha=0.01, epsilon=2.7 .........................................
[CV] ................................ alpha=0.01, epsilon=2.7 -   0.0s
[CV] alpha=0.01, epsilon=2.7 .........................................
[CV] ................................ alpha=0.01, epsilon=2.7 -   0.0s
[CV] alpha=0.01, epsilon=2.7 .........................................
[CV] ................................ alpha=0.01, epsilon=2.7 -   0.0s
[CV] alpha=0.01, epsilon=2.7 .........................................
[CV] ................................ alpha=0.01, epsilon=2.7 -   0.0s
[CV] alpha=0.01, epsilon=2.8 .........................................
[CV] ................................ alpha=0.01, epsilon=2.8 -   0.0s
[CV] alpha=0.01, epsilon=2.8 .........................................
[CV] ................................ alpha=0.01, epsilon=2.8 -   0.0s
[CV] alpha=0.01, epsilon=2.8 .........................................
[CV] .

[CV] ................................ alpha=0.01, epsilon=3.3 -   0.0s
[CV] alpha=0.01, epsilon=3.3 .........................................
[CV] ................................ alpha=0.01, epsilon=3.3 -   0.0s
[CV] alpha=0.01, epsilon=3.3 .........................................
[CV] ................................ alpha=0.01, epsilon=3.3 -   0.0s
[CV] alpha=0.01, epsilon=3.3 .........................................
[CV] ................................ alpha=0.01, epsilon=3.3 -   0.0s
[CV] alpha=0.01, epsilon=3.3 .........................................
[CV] ................................ alpha=0.01, epsilon=3.3 -   0.0s
[CV] alpha=0.01, epsilon=3.3 .........................................
[CV] ................................ alpha=0.01, epsilon=3.3 -   0.0s
[CV] alpha=0.01, epsilon=3.3 .........................................
[CV] ................................ alpha=0.01, epsilon=3.3 -   0.0s
[CV] alpha=0.01, epsilon=3.4 .........................................
[CV] .

[CV] ................................ alpha=0.01, epsilon=3.9 -   0.0s
[CV] alpha=0.01, epsilon=3.9 .........................................
[CV] ................................ alpha=0.01, epsilon=3.9 -   0.0s
[CV] alpha=0.01, epsilon=3.9 .........................................
[CV] ................................ alpha=0.01, epsilon=3.9 -   0.0s
[CV] alpha=0.01, epsilon=3.9 .........................................
[CV] ................................ alpha=0.01, epsilon=3.9 -   0.0s
[CV] alpha=0.01, epsilon=3.9 .........................................
[CV] ................................ alpha=0.01, epsilon=3.9 -   0.0s
[CV] alpha=0.01, epsilon=3.9 .........................................
[CV] ................................ alpha=0.01, epsilon=3.9 -   0.0s
[CV] alpha=0.01, epsilon=3.9 .........................................
[CV] ................................ alpha=0.01, epsilon=3.9 -   0.0s
[CV] alpha=0.01, epsilon=3.9 .........................................
[CV] .

[CV] ................................ alpha=0.05, epsilon=1.4 -   0.0s
[CV] alpha=0.05, epsilon=1.5 .........................................
[CV] ................................ alpha=0.05, epsilon=1.5 -   0.0s
[CV] alpha=0.05, epsilon=1.5 .........................................
[CV] ................................ alpha=0.05, epsilon=1.5 -   0.0s
[CV] alpha=0.05, epsilon=1.5 .........................................
[CV] ................................ alpha=0.05, epsilon=1.5 -   0.0s
[CV] alpha=0.05, epsilon=1.5 .........................................
[CV] ................................ alpha=0.05, epsilon=1.5 -   0.0s
[CV] alpha=0.05, epsilon=1.5 .........................................
[CV] ................................ alpha=0.05, epsilon=1.5 -   0.0s
[CV] alpha=0.05, epsilon=1.5 .........................................
[CV] ................................ alpha=0.05, epsilon=1.5 -   0.0s
[CV] alpha=0.05, epsilon=1.5 .........................................
[CV] .

[CV] ................................ alpha=0.05, epsilon=2.0 -   0.0s
[CV] alpha=0.05, epsilon=2.0 .........................................
[CV] ................................ alpha=0.05, epsilon=2.0 -   0.0s
[CV] alpha=0.05, epsilon=2.0 .........................................
[CV] ................................ alpha=0.05, epsilon=2.0 -   0.0s
[CV] alpha=0.05, epsilon=2.1 .........................................
[CV] ................................ alpha=0.05, epsilon=2.1 -   0.0s
[CV] alpha=0.05, epsilon=2.1 .........................................
[CV] ................................ alpha=0.05, epsilon=2.1 -   0.0s
[CV] alpha=0.05, epsilon=2.1 .........................................
[CV] ................................ alpha=0.05, epsilon=2.1 -   0.0s
[CV] alpha=0.05, epsilon=2.1 .........................................
[CV] ................................ alpha=0.05, epsilon=2.1 -   0.0s
[CV] alpha=0.05, epsilon=2.1 .........................................
[CV] .

[CV] ................................ alpha=0.05, epsilon=2.6 -   0.0s
[CV] alpha=0.05, epsilon=2.6 .........................................
[CV] ................................ alpha=0.05, epsilon=2.6 -   0.0s
[CV] alpha=0.05, epsilon=2.6 .........................................
[CV] ................................ alpha=0.05, epsilon=2.6 -   0.0s
[CV] alpha=0.05, epsilon=2.6 .........................................
[CV] ................................ alpha=0.05, epsilon=2.6 -   0.0s
[CV] alpha=0.05, epsilon=2.6 .........................................
[CV] ................................ alpha=0.05, epsilon=2.6 -   0.0s
[CV] alpha=0.05, epsilon=2.7 .........................................
[CV] ................................ alpha=0.05, epsilon=2.7 -   0.0s
[CV] alpha=0.05, epsilon=2.7 .........................................
[CV] ................................ alpha=0.05, epsilon=2.7 -   0.0s
[CV] alpha=0.05, epsilon=2.7 .........................................
[CV] .

[CV] ................................ alpha=0.05, epsilon=3.2 -   0.0s
[CV] alpha=0.05, epsilon=3.2 .........................................
[CV] ................................ alpha=0.05, epsilon=3.2 -   0.0s
[CV] alpha=0.05, epsilon=3.2 .........................................
[CV] ................................ alpha=0.05, epsilon=3.2 -   0.0s
[CV] alpha=0.05, epsilon=3.2 .........................................
[CV] ................................ alpha=0.05, epsilon=3.2 -   0.0s
[CV] alpha=0.05, epsilon=3.2 .........................................
[CV] ................................ alpha=0.05, epsilon=3.2 -   0.0s
[CV] alpha=0.05, epsilon=3.2 .........................................
[CV] ................................ alpha=0.05, epsilon=3.2 -   0.0s
[CV] alpha=0.05, epsilon=3.2 .........................................
[CV] ................................ alpha=0.05, epsilon=3.2 -   0.0s
[CV] alpha=0.05, epsilon=3.3 .........................................
[CV] .

[CV] ................................ alpha=0.05, epsilon=3.8 -   0.0s
[CV] alpha=0.05, epsilon=3.8 .........................................
[CV] ................................ alpha=0.05, epsilon=3.8 -   0.0s
[CV] alpha=0.05, epsilon=3.8 .........................................
[CV] ................................ alpha=0.05, epsilon=3.8 -   0.0s
[CV] alpha=0.05, epsilon=3.8 .........................................
[CV] ................................ alpha=0.05, epsilon=3.8 -   0.0s
[CV] alpha=0.05, epsilon=3.8 .........................................
[CV] ................................ alpha=0.05, epsilon=3.8 -   0.0s
[CV] alpha=0.05, epsilon=3.8 .........................................
[CV] ................................ alpha=0.05, epsilon=3.8 -   0.0s
[CV] alpha=0.05, epsilon=3.8 .........................................
[CV] ................................ alpha=0.05, epsilon=3.8 -   0.0s
[CV] alpha=0.05, epsilon=3.8 .........................................
[CV] .

[CV] ................................. alpha=0.1, epsilon=1.3 -   0.0s
[CV] alpha=0.1, epsilon=1.4 ..........................................
[CV] ................................. alpha=0.1, epsilon=1.4 -   0.0s
[CV] alpha=0.1, epsilon=1.4 ..........................................
[CV] ................................. alpha=0.1, epsilon=1.4 -   0.0s
[CV] alpha=0.1, epsilon=1.4 ..........................................
[CV] ................................. alpha=0.1, epsilon=1.4 -   0.0s
[CV] alpha=0.1, epsilon=1.4 ..........................................
[CV] ................................. alpha=0.1, epsilon=1.4 -   0.0s
[CV] alpha=0.1, epsilon=1.4 ..........................................
[CV] ................................. alpha=0.1, epsilon=1.4 -   0.0s
[CV] alpha=0.1, epsilon=1.4 ..........................................
[CV] ................................. alpha=0.1, epsilon=1.4 -   0.0s
[CV] alpha=0.1, epsilon=1.4 ..........................................
[CV] .

[CV] ................................. alpha=0.1, epsilon=1.9 -   0.0s
[CV] alpha=0.1, epsilon=1.9 ..........................................
[CV] ................................. alpha=0.1, epsilon=1.9 -   0.0s
[CV] alpha=0.1, epsilon=1.9 ..........................................
[CV] ................................. alpha=0.1, epsilon=1.9 -   0.0s
[CV] alpha=0.1, epsilon=2.0 ..........................................
[CV] ................................. alpha=0.1, epsilon=2.0 -   0.0s
[CV] alpha=0.1, epsilon=2.0 ..........................................
[CV] ................................. alpha=0.1, epsilon=2.0 -   0.0s
[CV] alpha=0.1, epsilon=2.0 ..........................................
[CV] ................................. alpha=0.1, epsilon=2.0 -   0.0s
[CV] alpha=0.1, epsilon=2.0 ..........................................
[CV] ................................. alpha=0.1, epsilon=2.0 -   0.0s
[CV] alpha=0.1, epsilon=2.0 ..........................................
[CV] .

[CV] ................................. alpha=0.1, epsilon=2.5 -   0.0s
[CV] alpha=0.1, epsilon=2.5 ..........................................
[CV] ................................. alpha=0.1, epsilon=2.5 -   0.0s
[CV] alpha=0.1, epsilon=2.5 ..........................................
[CV] ................................. alpha=0.1, epsilon=2.5 -   0.0s
[CV] alpha=0.1, epsilon=2.5 ..........................................
[CV] ................................. alpha=0.1, epsilon=2.5 -   0.0s
[CV] alpha=0.1, epsilon=2.5 ..........................................
[CV] ................................. alpha=0.1, epsilon=2.5 -   0.0s
[CV] alpha=0.1, epsilon=2.6 ..........................................
[CV] ................................. alpha=0.1, epsilon=2.6 -   0.0s
[CV] alpha=0.1, epsilon=2.6 ..........................................
[CV] ................................. alpha=0.1, epsilon=2.6 -   0.0s
[CV] alpha=0.1, epsilon=2.6 ..........................................
[CV] .

[CV] ................................. alpha=0.1, epsilon=3.1 -   0.0s
[CV] alpha=0.1, epsilon=3.1 ..........................................
[CV] ................................. alpha=0.1, epsilon=3.1 -   0.0s
[CV] alpha=0.1, epsilon=3.1 ..........................................
[CV] ................................. alpha=0.1, epsilon=3.1 -   0.0s
[CV] alpha=0.1, epsilon=3.1 ..........................................
[CV] ................................. alpha=0.1, epsilon=3.1 -   0.0s
[CV] alpha=0.1, epsilon=3.1 ..........................................
[CV] ................................. alpha=0.1, epsilon=3.1 -   0.0s
[CV] alpha=0.1, epsilon=3.1 ..........................................
[CV] ................................. alpha=0.1, epsilon=3.1 -   0.0s
[CV] alpha=0.1, epsilon=3.1 ..........................................
[CV] ................................. alpha=0.1, epsilon=3.1 -   0.0s
[CV] alpha=0.1, epsilon=3.2 ..........................................
[CV] .

[CV] ................................. alpha=0.1, epsilon=3.7 -   0.0s
[CV] alpha=0.1, epsilon=3.7 ..........................................
[CV] ................................. alpha=0.1, epsilon=3.7 -   0.0s
[CV] alpha=0.1, epsilon=3.7 ..........................................
[CV] ................................. alpha=0.1, epsilon=3.7 -   0.0s
[CV] alpha=0.1, epsilon=3.7 ..........................................
[CV] ................................. alpha=0.1, epsilon=3.7 -   0.0s
[CV] alpha=0.1, epsilon=3.7 ..........................................
[CV] ................................. alpha=0.1, epsilon=3.7 -   0.0s
[CV] alpha=0.1, epsilon=3.7 ..........................................
[CV] ................................. alpha=0.1, epsilon=3.7 -   0.0s
[CV] alpha=0.1, epsilon=3.7 ..........................................
[CV] ................................. alpha=0.1, epsilon=3.7 -   0.0s
[CV] alpha=0.1, epsilon=3.7 ..........................................
[CV] .

[CV] ................................. alpha=0.5, epsilon=1.2 -   0.0s
[CV] alpha=0.5, epsilon=1.3 ..........................................
[CV] ................................. alpha=0.5, epsilon=1.3 -   0.0s
[CV] alpha=0.5, epsilon=1.3 ..........................................
[CV] ................................. alpha=0.5, epsilon=1.3 -   0.0s
[CV] alpha=0.5, epsilon=1.3 ..........................................
[CV] ................................. alpha=0.5, epsilon=1.3 -   0.0s
[CV] alpha=0.5, epsilon=1.3 ..........................................
[CV] ................................. alpha=0.5, epsilon=1.3 -   0.0s
[CV] alpha=0.5, epsilon=1.3 ..........................................
[CV] ................................. alpha=0.5, epsilon=1.3 -   0.0s
[CV] alpha=0.5, epsilon=1.3 ..........................................
[CV] ................................. alpha=0.5, epsilon=1.3 -   0.0s
[CV] alpha=0.5, epsilon=1.3 ..........................................
[CV] .

[CV] ................................. alpha=0.5, epsilon=1.8 -   0.0s
[CV] alpha=0.5, epsilon=1.8 ..........................................
[CV] ................................. alpha=0.5, epsilon=1.8 -   0.0s
[CV] alpha=0.5, epsilon=1.8 ..........................................
[CV] ................................. alpha=0.5, epsilon=1.8 -   0.0s
[CV] alpha=0.5, epsilon=1.9 ..........................................
[CV] ................................. alpha=0.5, epsilon=1.9 -   0.0s
[CV] alpha=0.5, epsilon=1.9 ..........................................
[CV] ................................. alpha=0.5, epsilon=1.9 -   0.0s
[CV] alpha=0.5, epsilon=1.9 ..........................................
[CV] ................................. alpha=0.5, epsilon=1.9 -   0.0s
[CV] alpha=0.5, epsilon=1.9 ..........................................
[CV] ................................. alpha=0.5, epsilon=1.9 -   0.0s
[CV] alpha=0.5, epsilon=1.9 ..........................................
[CV] .

[CV] ................................. alpha=0.5, epsilon=2.4 -   0.0s
[CV] alpha=0.5, epsilon=2.4 ..........................................
[CV] ................................. alpha=0.5, epsilon=2.4 -   0.0s
[CV] alpha=0.5, epsilon=2.4 ..........................................
[CV] ................................. alpha=0.5, epsilon=2.4 -   0.0s
[CV] alpha=0.5, epsilon=2.4 ..........................................
[CV] ................................. alpha=0.5, epsilon=2.4 -   0.0s
[CV] alpha=0.5, epsilon=2.4 ..........................................
[CV] ................................. alpha=0.5, epsilon=2.4 -   0.0s
[CV] alpha=0.5, epsilon=2.5 ..........................................
[CV] ................................. alpha=0.5, epsilon=2.5 -   0.0s
[CV] alpha=0.5, epsilon=2.5 ..........................................
[CV] ................................. alpha=0.5, epsilon=2.5 -   0.0s
[CV] alpha=0.5, epsilon=2.5 ..........................................
[CV] .

[CV] ................................. alpha=0.5, epsilon=3.0 -   0.0s
[CV] alpha=0.5, epsilon=3.0 ..........................................
[CV] ................................. alpha=0.5, epsilon=3.0 -   0.0s
[CV] alpha=0.5, epsilon=3.0 ..........................................
[CV] ................................. alpha=0.5, epsilon=3.0 -   0.0s
[CV] alpha=0.5, epsilon=3.0 ..........................................
[CV] ................................. alpha=0.5, epsilon=3.0 -   0.0s
[CV] alpha=0.5, epsilon=3.0 ..........................................
[CV] ................................. alpha=0.5, epsilon=3.0 -   0.0s
[CV] alpha=0.5, epsilon=3.0 ..........................................
[CV] ................................. alpha=0.5, epsilon=3.0 -   0.0s
[CV] alpha=0.5, epsilon=3.0 ..........................................
[CV] ................................. alpha=0.5, epsilon=3.0 -   0.0s
[CV] alpha=0.5, epsilon=3.1 ..........................................
[CV] .

[CV] ................................. alpha=0.5, epsilon=3.6 -   0.0s
[CV] alpha=0.5, epsilon=3.6 ..........................................
[CV] ................................. alpha=0.5, epsilon=3.6 -   0.0s
[CV] alpha=0.5, epsilon=3.6 ..........................................
[CV] ................................. alpha=0.5, epsilon=3.6 -   0.0s
[CV] alpha=0.5, epsilon=3.6 ..........................................
[CV] ................................. alpha=0.5, epsilon=3.6 -   0.0s
[CV] alpha=0.5, epsilon=3.6 ..........................................
[CV] ................................. alpha=0.5, epsilon=3.6 -   0.0s
[CV] alpha=0.5, epsilon=3.6 ..........................................
[CV] ................................. alpha=0.5, epsilon=3.6 -   0.0s
[CV] alpha=0.5, epsilon=3.6 ..........................................
[CV] ................................. alpha=0.5, epsilon=3.6 -   0.0s
[CV] alpha=0.5, epsilon=3.6 ..........................................
[CV] .

[CV] ................................. alpha=1.0, epsilon=1.1 -   0.0s
[CV] alpha=1.0, epsilon=1.2 ..........................................
[CV] ................................. alpha=1.0, epsilon=1.2 -   0.0s
[CV] alpha=1.0, epsilon=1.2 ..........................................
[CV] ................................. alpha=1.0, epsilon=1.2 -   0.0s
[CV] alpha=1.0, epsilon=1.2 ..........................................
[CV] ................................. alpha=1.0, epsilon=1.2 -   0.0s
[CV] alpha=1.0, epsilon=1.2 ..........................................
[CV] ................................. alpha=1.0, epsilon=1.2 -   0.0s
[CV] alpha=1.0, epsilon=1.2 ..........................................
[CV] ................................. alpha=1.0, epsilon=1.2 -   0.0s
[CV] alpha=1.0, epsilon=1.2 ..........................................
[CV] ................................. alpha=1.0, epsilon=1.2 -   0.0s
[CV] alpha=1.0, epsilon=1.2 ..........................................
[CV] .

[CV] ................................. alpha=1.0, epsilon=1.7 -   0.0s
[CV] alpha=1.0, epsilon=1.7 ..........................................
[CV] ................................. alpha=1.0, epsilon=1.7 -   0.0s
[CV] alpha=1.0, epsilon=1.7 ..........................................
[CV] ................................. alpha=1.0, epsilon=1.7 -   0.0s
[CV] alpha=1.0, epsilon=1.8 ..........................................
[CV] ................................. alpha=1.0, epsilon=1.8 -   0.0s
[CV] alpha=1.0, epsilon=1.8 ..........................................
[CV] ................................. alpha=1.0, epsilon=1.8 -   0.0s
[CV] alpha=1.0, epsilon=1.8 ..........................................
[CV] ................................. alpha=1.0, epsilon=1.8 -   0.0s
[CV] alpha=1.0, epsilon=1.8 ..........................................
[CV] ................................. alpha=1.0, epsilon=1.8 -   0.0s
[CV] alpha=1.0, epsilon=1.8 ..........................................
[CV] .

[CV] ................................. alpha=1.0, epsilon=2.3 -   0.0s
[CV] alpha=1.0, epsilon=2.3 ..........................................
[CV] ................................. alpha=1.0, epsilon=2.3 -   0.0s
[CV] alpha=1.0, epsilon=2.3 ..........................................
[CV] ................................. alpha=1.0, epsilon=2.3 -   0.0s
[CV] alpha=1.0, epsilon=2.3 ..........................................
[CV] ................................. alpha=1.0, epsilon=2.3 -   0.0s
[CV] alpha=1.0, epsilon=2.3 ..........................................
[CV] ................................. alpha=1.0, epsilon=2.3 -   0.0s
[CV] alpha=1.0, epsilon=2.4 ..........................................
[CV] ................................. alpha=1.0, epsilon=2.4 -   0.0s
[CV] alpha=1.0, epsilon=2.4 ..........................................
[CV] ................................. alpha=1.0, epsilon=2.4 -   0.0s
[CV] alpha=1.0, epsilon=2.4 ..........................................
[CV] .

[CV] ................................. alpha=1.0, epsilon=2.9 -   0.0s
[CV] alpha=1.0, epsilon=2.9 ..........................................
[CV] ................................. alpha=1.0, epsilon=2.9 -   0.0s
[CV] alpha=1.0, epsilon=2.9 ..........................................
[CV] ................................. alpha=1.0, epsilon=2.9 -   0.0s
[CV] alpha=1.0, epsilon=2.9 ..........................................
[CV] ................................. alpha=1.0, epsilon=2.9 -   0.0s
[CV] alpha=1.0, epsilon=2.9 ..........................................
[CV] ................................. alpha=1.0, epsilon=2.9 -   0.0s
[CV] alpha=1.0, epsilon=2.9 ..........................................
[CV] ................................. alpha=1.0, epsilon=2.9 -   0.0s
[CV] alpha=1.0, epsilon=2.9 ..........................................
[CV] ................................. alpha=1.0, epsilon=2.9 -   0.0s
[CV] alpha=1.0, epsilon=3.0 ..........................................
[CV] .

[CV] ................................. alpha=1.0, epsilon=3.5 -   0.0s
[CV] alpha=1.0, epsilon=3.5 ..........................................
[CV] ................................. alpha=1.0, epsilon=3.5 -   0.0s
[CV] alpha=1.0, epsilon=3.5 ..........................................
[CV] ................................. alpha=1.0, epsilon=3.5 -   0.0s
[CV] alpha=1.0, epsilon=3.5 ..........................................
[CV] ................................. alpha=1.0, epsilon=3.5 -   0.0s
[CV] alpha=1.0, epsilon=3.5 ..........................................
[CV] ................................. alpha=1.0, epsilon=3.5 -   0.0s
[CV] alpha=1.0, epsilon=3.5 ..........................................
[CV] ................................. alpha=1.0, epsilon=3.5 -   0.0s
[CV] alpha=1.0, epsilon=3.5 ..........................................
[CV] ................................. alpha=1.0, epsilon=3.5 -   0.0s
[CV] alpha=1.0, epsilon=3.5 ..........................................
[CV] .

[CV] ................................ alpha=10.0, epsilon=1.0 -   0.0s
[CV] alpha=10.0, epsilon=1.1 .........................................
[CV] ................................ alpha=10.0, epsilon=1.1 -   0.0s
[CV] alpha=10.0, epsilon=1.1 .........................................
[CV] ................................ alpha=10.0, epsilon=1.1 -   0.0s
[CV] alpha=10.0, epsilon=1.1 .........................................
[CV] ................................ alpha=10.0, epsilon=1.1 -   0.0s
[CV] alpha=10.0, epsilon=1.1 .........................................
[CV] ................................ alpha=10.0, epsilon=1.1 -   0.0s
[CV] alpha=10.0, epsilon=1.1 .........................................
[CV] ................................ alpha=10.0, epsilon=1.1 -   0.0s
[CV] alpha=10.0, epsilon=1.1 .........................................
[CV] ................................ alpha=10.0, epsilon=1.1 -   0.0s
[CV] alpha=10.0, epsilon=1.1 .........................................
[CV] .

[CV] ................................ alpha=10.0, epsilon=1.6 -   0.0s
[CV] alpha=10.0, epsilon=1.6 .........................................
[CV] ................................ alpha=10.0, epsilon=1.6 -   0.0s
[CV] alpha=10.0, epsilon=1.6 .........................................
[CV] ................................ alpha=10.0, epsilon=1.6 -   0.0s
[CV] alpha=10.0, epsilon=1.7 .........................................
[CV] ................................ alpha=10.0, epsilon=1.7 -   0.0s
[CV] alpha=10.0, epsilon=1.7 .........................................
[CV] ................................ alpha=10.0, epsilon=1.7 -   0.0s
[CV] alpha=10.0, epsilon=1.7 .........................................
[CV] ................................ alpha=10.0, epsilon=1.7 -   0.0s
[CV] alpha=10.0, epsilon=1.7 .........................................
[CV] ................................ alpha=10.0, epsilon=1.7 -   0.0s
[CV] alpha=10.0, epsilon=1.7 .........................................
[CV] .

[CV] ................................ alpha=10.0, epsilon=2.2 -   0.0s
[CV] alpha=10.0, epsilon=2.2 .........................................
[CV] ................................ alpha=10.0, epsilon=2.2 -   0.0s
[CV] alpha=10.0, epsilon=2.2 .........................................
[CV] ................................ alpha=10.0, epsilon=2.2 -   0.0s
[CV] alpha=10.0, epsilon=2.2 .........................................
[CV] ................................ alpha=10.0, epsilon=2.2 -   0.0s
[CV] alpha=10.0, epsilon=2.2 .........................................
[CV] ................................ alpha=10.0, epsilon=2.2 -   0.0s
[CV] alpha=10.0, epsilon=2.3 .........................................
[CV] ................................ alpha=10.0, epsilon=2.3 -   0.0s
[CV] alpha=10.0, epsilon=2.3 .........................................
[CV] ................................ alpha=10.0, epsilon=2.3 -   0.0s
[CV] alpha=10.0, epsilon=2.3 .........................................
[CV] .

[CV] ................................ alpha=10.0, epsilon=2.8 -   0.0s
[CV] alpha=10.0, epsilon=2.8 .........................................
[CV] ................................ alpha=10.0, epsilon=2.8 -   0.0s
[CV] alpha=10.0, epsilon=2.8 .........................................
[CV] ................................ alpha=10.0, epsilon=2.8 -   0.0s
[CV] alpha=10.0, epsilon=2.8 .........................................
[CV] ................................ alpha=10.0, epsilon=2.8 -   0.0s
[CV] alpha=10.0, epsilon=2.8 .........................................
[CV] ................................ alpha=10.0, epsilon=2.8 -   0.0s
[CV] alpha=10.0, epsilon=2.8 .........................................
[CV] ................................ alpha=10.0, epsilon=2.8 -   0.0s
[CV] alpha=10.0, epsilon=2.8 .........................................
[CV] ................................ alpha=10.0, epsilon=2.8 -   0.0s
[CV] alpha=10.0, epsilon=2.9 .........................................
[CV] .

[CV] ................................ alpha=10.0, epsilon=3.4 -   0.0s
[CV] alpha=10.0, epsilon=3.4 .........................................
[CV] ................................ alpha=10.0, epsilon=3.4 -   0.0s
[CV] alpha=10.0, epsilon=3.4 .........................................
[CV] ................................ alpha=10.0, epsilon=3.4 -   0.0s
[CV] alpha=10.0, epsilon=3.4 .........................................
[CV] ................................ alpha=10.0, epsilon=3.4 -   0.0s
[CV] alpha=10.0, epsilon=3.4 .........................................
[CV] ................................ alpha=10.0, epsilon=3.4 -   0.0s
[CV] alpha=10.0, epsilon=3.4 .........................................
[CV] ................................ alpha=10.0, epsilon=3.4 -   0.0s
[CV] alpha=10.0, epsilon=3.4 .........................................
[CV] ................................ alpha=10.0, epsilon=3.4 -   0.0s
[CV] alpha=10.0, epsilon=3.4 .........................................
[CV] .

[CV] ................................ alpha=10.0, epsilon=3.9 -   0.0s


[Parallel(n_jobs=1)]: Done 1800 out of 1800 | elapsed: 10.5min finished


GridSearchCV(cv=10, error_score='raise',
       estimator=HuberRegressor(alpha=0.0001, epsilon=1.35, fit_intercept=True, max_iter=100,
        tol=1e-05, warm_start=False),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'alpha': [0.01, 0.05, 0.1, 0.5, 1.0, 10.0], 'epsilon': [1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0, 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 2.9, 3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring='neg_mean_absolute_error', verbose=2)

In [109]:
print (gsearch.best_score_, gsearch.best_params_)

-0.0681396502468 {'alpha': 1.0, 'epsilon': 1.6}


Our chosen linear model is a huber regressor with an alpha of 1.0 and an epsilon of 1.6.

In [67]:
clf = HuberRegressor(alpha=1.0, epsilon=1.6)
clf.fit(pca_reduced[:, :2], ohe_train['logerror'])

HuberRegressor(alpha=1.0, epsilon=1.6, fit_intercept=True, max_iter=100,
        tol=1e-05, warm_start=False)

In [60]:
sample_submission = pd.read_csv('../data/sample_submission.csv')

In [71]:
sample_submission.head()

Unnamed: 0,ParcelId,201610,201611,201612,201710,201711,201712
0,10754147,0,0,0,0,0,0
1,10759547,0,0,0,0,0,0
2,10843547,0,0,0,0,0,0
3,10859147,0,0,0,0,0,0
4,10879947,0,0,0,0,0,0


In [75]:
ohe_properties = pd.read_csv('../data/ohe_propertiesV1.csv', chunksize=10000)

In [76]:
submission_dfs = []
i = 0
for chunk in ohe_properties:
    i += 1
    print ("Running Chunk {}".format(i))
    reduced = pca.transform(chunk[ohe_predictors])
    prediction = clf.predict(reduced[:, :2])
    
    pred_df = pd.DataFrame({'ParcelId': properties.ix[chunk.index, :]['parcelid'],
                           '201610':prediction, '201611': prediction, '201612':prediction,
                           '201710':prediction, '201711': prediction, '201712':prediction})
    submission_dfs.append(pred_df)
    

Running Chunk 1
Running Chunk 2
Running Chunk 3
Running Chunk 4
Running Chunk 5
Running Chunk 6
Running Chunk 7
Running Chunk 8
Running Chunk 9
Running Chunk 10
Running Chunk 11
Running Chunk 12
Running Chunk 13
Running Chunk 14
Running Chunk 15
Running Chunk 16
Running Chunk 17
Running Chunk 18
Running Chunk 19
Running Chunk 20
Running Chunk 21
Running Chunk 22
Running Chunk 23
Running Chunk 24
Running Chunk 25
Running Chunk 26
Running Chunk 27
Running Chunk 28
Running Chunk 29
Running Chunk 30
Running Chunk 31
Running Chunk 32
Running Chunk 33
Running Chunk 34
Running Chunk 35
Running Chunk 36
Running Chunk 37
Running Chunk 38
Running Chunk 39
Running Chunk 40
Running Chunk 41
Running Chunk 42
Running Chunk 43
Running Chunk 44
Running Chunk 45
Running Chunk 46
Running Chunk 47
Running Chunk 48
Running Chunk 49
Running Chunk 50
Running Chunk 51
Running Chunk 52
Running Chunk 53
Running Chunk 54
Running Chunk 55
Running Chunk 56
Running Chunk 57
Running Chunk 58
Running Chunk 59
Runnin

In [77]:
submissions = pd.concat(submission_dfs)
del submission_dfs

In [83]:
cols = submissions.columns.tolist()
cols = cols[-1:] + cols[:-1]
submissions = submissions[cols]
submissions.to_csv('../predictions/pca_huber.csv', index=False)

### Leaderboard Score
The score of this model on the public leaderboard was 0.0652306 :(

## Other Feature Reduction Techniques
Let's try a few other ones, like KernelPCA and maybe some manifold techniques.

In [15]:
from sklearn.decomposition import FastICA
ica = KernelPCA(n_components=10)

In [16]:
non_outliers = ohe_train[ohe_train['logerror'].abs() < np.percentile(ohe_train['logerror'].abs(),98)].index
ica_reduced = kpca.fit(ohe_train[ohe_predictors].iloc[non_outliers]).transform(ohe_train[ohe_predictors])

MemoryError: 