In [1]:
import pandas as pd

# Define years and file pattern
years = range(2009, 2023)
file_pattern = "../Data/{}_census_data.csv"

# List to store processed DataFrames
df_list = []

for year in years:
    file_name = file_pattern.format(year)

    try:
        # Read CSV file
        census_data = pd.read_csv(file_name)

        # Remove 'County' from county_name
        census_data['county_name'] = census_data['county_name'].str.rsplit(' ', n=1).str[0]

        # Save modified file back
        census_data.to_csv(file_name, index=False)
        print(f"Changes saved to: {file_name}")

        # Add 'Year' column and store in list
        census_data['Year'] = year
        df_list.append(census_data)

    except FileNotFoundError:
        print(f"Warning: Could not find {file_name}")
    except Exception as e:
        print(f"Error processing {file_name}: {str(e)}")


Changes saved to: ../Data/2009_census_data.csv
Changes saved to: ../Data/2010_census_data.csv
Changes saved to: ../Data/2011_census_data.csv
Changes saved to: ../Data/2012_census_data.csv
Changes saved to: ../Data/2013_census_data.csv
Changes saved to: ../Data/2014_census_data.csv
Changes saved to: ../Data/2015_census_data.csv
Changes saved to: ../Data/2016_census_data.csv
Changes saved to: ../Data/2017_census_data.csv
Changes saved to: ../Data/2018_census_data.csv
Changes saved to: ../Data/2019_census_data.csv
Changes saved to: ../Data/2020_census_data.csv
Changes saved to: ../Data/2021_census_data.csv
Changes saved to: ../Data/2022_census_data.csv


In [2]:
#Data preprocessing ensuring no - or NaN values
for i, df in enumerate(df_list):
    old = len(df)

    df = df[(df["unemployment %"] != "(X)") & (df["% below poverty line"] != "(X)")]
    df = df[df['median house value']>=0]
    df = df[df['median income']>=0]
    df = df.dropna()

    df_list[i] = df
    
    new = len(df)

    print('\n')
    print(df.min())
    print(df.max())
    if new == 0:
        print('all data dropped')
    else:
        print('rows reduced by: ', 1-new/old)




mil pop                 NaN
unemployment %          NaN
mean income             NaN
median income           NaN
vacant housing units    NaN
rental vacany rate      NaN
% below poverty line    NaN
median house value      NaN
NAME                    NaN
state                   NaN
county                  NaN
county_name             NaN
state_name              NaN
Year                    NaN
dtype: object
mil pop                 NaN
unemployment %          NaN
mean income             NaN
median income           NaN
vacant housing units    NaN
rental vacany rate      NaN
% below poverty line    NaN
median house value      NaN
NAME                    NaN
state                   NaN
county                  NaN
county_name             NaN
state_name              NaN
Year                    NaN
dtype: object
all data dropped


mil pop                                              0.0
unemployment %                                       0.0
mean income                                      1529

In [3]:
#Combine all years dataframes into one
df_full = pd.concat(df_list)

  df_full = pd.concat(df_list)


In [4]:
#Drop any counties that do not exist for every year of data
df_filter = df_full[df_full.groupby('NAME')['NAME'].transform('size').gt(12)]

In [5]:
#Group counties together by using a multiindex
df_filter = df_filter.set_index(['NAME', 'Year'])
df_filter = df_filter.sort_index(level=0)

In [21]:
df_filter[['unemployment %', '% below poverty line', 'rental vacany rate']] = df_filter[['unemployment %', '% below poverty line', 'rental vacany rate']]/100

In [22]:
df_filter['change in mil'] = df_filter['mil pop'].diff(1)
df_filter['change in mil'] = df_filter.groupby(level = 0)['change in mil'].shift(-1)

In [23]:
X = df_filter.reset_index()
X = X.loc[:, [*X.columns[0:10], 'change in mil']].dropna()
X = X.sort_values(by=['Year','NAME'])
display(X)

Unnamed: 0,NAME,Year,mil pop,unemployment %,mean income,median income,vacant housing units,rental vacany rate,% below poverty line,median house value,change in mil
0,"Abbeville County, South Carolina",2010,13.0,0.144,41816.0,33143.0,2201.0,0.059,0.163,85900.0,8.0
13,"Acadia Parish, Louisiana",2010,33.0,0.067,48844.0,37261.0,3080.0,0.078,0.172,86700.0,-20.0
26,"Accomack County, Virginia",2010,37.0,0.092,53240.0,41372.0,6843.0,0.036,0.097,149800.0,27.0
39,"Ada County, Idaho",2010,1204.0,0.066,72123.0,55835.0,10170.0,0.062,0.069,214500.0,-214.0
52,"Adair County, Iowa",2010,0.0,0.045,53911.0,45202.0,358.0,0.043,0.057,93800.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...
41533,"Yuma County, Arizona",2021,4001.0,0.084,68613.0,52563.0,18974.0,0.059,0.154,148900.0,-79.0
41546,"Yuma County, Colorado",2021,0.0,0.060,74972.0,56327.0,427.0,0.009,0.104,184400.0,0.0
41559,"Zapata County, Texas",2021,0.0,0.083,60557.0,34406.0,1781.0,0.043,0.276,82800.0,0.0
41572,"Zavala County, Texas",2021,13.0,0.037,54300.0,44573.0,574.0,0.060,0.206,63300.0,6.0


In [24]:
y = df_filter.groupby(level = 0).shift(-1).reset_index()
y = y.loc[:, ['NAME', 'Year',*X.columns[3:10]]].dropna()
y = y.sort_values(by=['Year','NAME'])
display(y)
y = y.loc[:, [*X.columns[3:10]]]

Unnamed: 0,NAME,Year,unemployment %,mean income,median income,vacant housing units,rental vacany rate,% below poverty line,median house value
0,"Abbeville County, South Carolina",2010,0.142,43780.0,34670.0,2271.0,0.050,0.149,89100.0
13,"Acadia Parish, Louisiana",2010,0.083,51108.0,37970.0,3178.0,0.087,0.169,90900.0
26,"Accomack County, Virginia",2010,0.076,55531.0,41595.0,7208.0,0.043,0.105,153400.0
39,"Ada County, Idaho",2010,0.078,72128.0,55304.0,10328.0,0.057,0.077,206200.0
52,"Adair County, Iowa",2010,0.044,55100.0,47623.0,450.0,0.059,0.069,93300.0
...,...,...,...,...,...,...,...,...,...
41533,"Yuma County, Arizona",2021,0.083,74377.0,56439.0,18130.0,0.058,0.148,173500.0
41546,"Yuma County, Colorado",2021,0.057,81329.0,60118.0,360.0,0.005,0.107,201500.0
41559,"Zapata County, Texas",2021,0.081,56419.0,35061.0,1603.0,0.016,0.315,84800.0
41572,"Zavala County, Texas",2021,0.058,60004.0,49243.0,529.0,0.048,0.210,77300.0


In [10]:
import numpy as np
from sklearn.model_selection import TimeSeriesSplit

tscv = TimeSeriesSplit(5)

all_splits = list(tscv.split(X, y))
train_0, test_0 = all_splits[0]

In [11]:
#Chekc to ensure all counties have 2 entries in first split
X.iloc[test_0].groupby('NAME').count().Year.unique()

array([2])

In [37]:
from sklearn.model_selection import cross_validate
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

mae_list = []

n_features = ['mean income', 'median income', 'vacant housing units', 'median house value', 'mil pop', 'change in mil']
n_pipeline = Pipeline([
    ('scaler', MinMaxScaler())
])

c_features = ['NAME']
c_pipeline = Pipeline([
    ('encoder',  OneHotEncoder())
])

preprocessor = ColumnTransformer(
    transformers = [
        ("num", n_pipeline, n_features),
        ("cat", c_pipeline, c_features),
    ]
)

model = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

for train_index, test_index in all_splits:
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    mae = mean_absolute_percentage_error(y_test, y_pred, multioutput = 'raw_values')
    mae_list.append(mae)

pd.DataFrame(mae_list, columns = y.columns)



Unnamed: 0,unemployment %,mean income,median income,vacant housing units,rental vacany rate,% below poverty line,median house value
0,29858590000.0,0.033342,0.035455,0.075086,2207949000000.0,4824460000.0,0.033155
1,81555090000.0,0.0314,0.033204,0.090011,2775567000000.0,38677280000.0,0.035333
2,405391600000.0,0.044462,0.04519,0.109276,3826272000000.0,89668790000.0,0.041622
3,407632900000.0,0.033224,0.037342,0.127716,6004314000000.0,162498500000.0,0.044161
4,462411500000.0,0.050201,0.052224,0.136592,8381150000000.0,170547600000.0,0.060192


In [36]:
pd.DataFrame(model.predict(X_test), columns = y.columns)

Unnamed: 0,unemployment %,mean income,median income,vacant housing units,rental vacany rate,% below poverty line,median house value
0,0.071169,63769.029821,45243.548798,2460.301321,0.014221,0.140730,110978.170475
1,0.076753,64651.335090,45781.596623,4056.450333,0.077448,0.161358,135775.086659
2,0.044147,64663.563942,47106.544053,7701.500814,0.033043,0.116186,179048.835112
3,0.031535,97221.467309,72750.225064,7377.681281,0.036718,0.055281,310150.618023
4,0.022750,69056.222940,56369.839878,465.463090,0.088160,0.043228,116398.305343
...,...,...,...,...,...,...,...
6393,0.088902,70118.402901,53497.761852,19083.502796,0.080142,0.152165,154335.386614
6394,-0.001484,77096.252151,58224.385718,379.757995,0.045406,0.058987,198615.091875
6395,0.079761,61858.360184,36848.688964,1741.473499,0.092448,0.281499,91126.617366
6396,0.059097,56930.358613,45050.968874,376.366404,0.055981,0.253451,76499.185776


In [38]:
from sklearn.model_selection import cross_validate
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

mae_list = []

n_features = ['mean income', 'median income', 'vacant housing units', 'median house value', 'mil pop', 'change in mil']
n_pipeline = Pipeline([
    ('scaler', MinMaxScaler())
])

c_features = ['NAME']
c_pipeline = Pipeline([
    ('encoder',  OneHotEncoder())
])

preprocessor = ColumnTransformer(
    transformers = [
        ("num", n_pipeline, n_features),
        ("cat", c_pipeline, c_features),
    ]
)

model = Pipeline([
    ('preprocessor', preprocessor),
    ('model', RandomForestRegressor(verbose = True))
])

for train_index, test_index in all_splits:
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    mae = mean_absolute_percentage_error(y_test, y_pred, multioutput = 'raw_values')
    mae_list.append(mae)

pd.DataFrame(mae_list, columns = y.columns)




[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   23.2s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   38.1s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:  1.1min
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:  1.5min
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.4s


Unnamed: 0,unemployment %,mean income,median income,vacant housing units,rental vacany rate,% below poverty line,median house value
0,107367000000.0,0.030016,0.03431,0.485301,5915905000000.0,36392010000.0,0.031349
1,251583500000.0,0.029905,0.032409,0.442693,4512793000000.0,13310890000.0,0.032352
2,714656100000.0,0.037652,0.037025,0.435211,5153060000000.0,181636300000.0,0.037764
3,878335700000.0,0.034133,0.039825,0.464786,7747888000000.0,267175100000.0,0.0409
4,863138300000.0,0.054945,0.053309,0.57272,10650380000000.0,187119700000.0,0.07054


In [44]:
display(pd.DataFrame(model.predict(X_test), columns = y.columns).min())
display(pd.DataFrame(model.predict(X_test), columns = y.columns).max())

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.1s


unemployment %              0.01417
mean income             19215.02000
median income           13639.64000
vacant housing units      192.18000
rental vacany rate          0.02168
% below poverty line        0.02544
median house value      27944.00000
dtype: float64

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.1s


unemployment %          2.690800e-01
mean income             1.737350e+05
median income           1.321849e+05
vacant housing units    2.044672e+05
rental vacany rate      2.338200e-01
% below poverty line    5.674100e-01
median house value      1.136943e+06
dtype: float64

In [46]:
y = df_filter.groupby(level = 0).shift(-1).reset_index()
y = y.loc[:, ['NAME', 'Year','mean income', 'median income', 'median house value']].dropna()
y = y.sort_values(by=['Year','NAME'])
display(y)
y = y.loc[:, ['mean income', 'median income', 'median house value']]

Unnamed: 0,NAME,Year,mean income,median income,median house value
0,"Abbeville County, South Carolina",2010,43780.0,34670.0,89100.0
13,"Acadia Parish, Louisiana",2010,51108.0,37970.0,90900.0
26,"Accomack County, Virginia",2010,55531.0,41595.0,153400.0
39,"Ada County, Idaho",2010,72128.0,55304.0,206200.0
52,"Adair County, Iowa",2010,55100.0,47623.0,93300.0
...,...,...,...,...,...
41533,"Yuma County, Arizona",2021,74377.0,56439.0,173500.0
41546,"Yuma County, Colorado",2021,81329.0,60118.0,201500.0
41559,"Zapata County, Texas",2021,56419.0,35061.0,84800.0
41572,"Zavala County, Texas",2021,60004.0,49243.0,77300.0


In [56]:
from sklearn.model_selection import cross_validate
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

mae_list = []

n_features = ['mean income', 'median income', 'vacant housing units', 'median house value', 'mil pop', 'change in mil']
n_pipeline = Pipeline([
    ('scaler', MinMaxScaler())
])

c_features = ['NAME']
c_pipeline = Pipeline([
    ('encoder',  OneHotEncoder())
])

preprocessor = ColumnTransformer(
    transformers = [
        ("num", n_pipeline, n_features),
        ("cat", c_pipeline, c_features),
    ]
)

model_linear = Pipeline([
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

for train_index, test_index in all_splits:
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
    model_linear.fit(X_train, y_train)
    y_pred = model_linear.predict(X_test)
    
    mae = mean_absolute_percentage_error(y_test, y_pred, multioutput = 'raw_values')
    mae_list.append(mae)

mae_linear = pd.DataFrame(mae_list, columns = y.columns)
mae_linear


Unnamed: 0,mean income,median income,median house value
0,0.033342,0.035455,0.033155
1,0.0314,0.033204,0.035333
2,0.044462,0.04519,0.041622
3,0.033224,0.037342,0.044161
4,0.050201,0.052224,0.060192


In [57]:
from sklearn.model_selection import cross_validate
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

mae_list = []

n_features = ['mean income', 'median income', 'vacant housing units', 'median house value', 'mil pop', 'change in mil']
n_pipeline = Pipeline([
    ('scaler', MinMaxScaler())
])

c_features = ['NAME']
c_pipeline = Pipeline([
    ('encoder',  OneHotEncoder())
])

preprocessor = ColumnTransformer(
    transformers = [
        ("num", n_pipeline, n_features),
        ("cat", c_pipeline, c_features),
    ]
)

model_forest = Pipeline([
    ('preprocessor', preprocessor),
    ('model', RandomForestRegressor(verbose = True))
])

for train_index, test_index in all_splits:
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
    model_forest.fit(X_train, y_train)
    y_pred = model_forest.predict(X_test)
    
    mae = mean_absolute_percentage_error(y_test, y_pred, multioutput = 'raw_values')
    mae_list.append(mae)

mae_forest = pd.DataFrame(mae_list, columns = y.columns)
mae_forest



[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    9.0s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   20.4s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   35.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:   54.7s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:  1.3min
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.2s


In [58]:
mae_forest.mean()

mean income           0.036104
median income         0.038280
median house value    0.042554
dtype: float64

In [59]:
mae_linear.mean()

mean income           0.038526
median income         0.040683
median house value    0.042893
dtype: float64

In [60]:
from sklearn.model_selection import cross_validate
from sklearn.neural_network import MLPRegressor
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

mae_list = []

n_features = ['mean income', 'median income', 'vacant housing units', 'median house value', 'mil pop', 'change in mil']
n_pipeline = Pipeline([
    ('scaler', MinMaxScaler())
])

c_features = ['NAME']
c_pipeline = Pipeline([
    ('encoder',  OneHotEncoder())
])

preprocessor = ColumnTransformer(
    transformers = [
        ("num", n_pipeline, n_features),
        ("cat", c_pipeline, c_features),
    ]
)

model_NN = Pipeline([
    ('preprocessor', preprocessor),
    ('model', MLPRegressor(verbose = True))
])

for train_index, test_index in all_splits:
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
    model_NN.fit(X_train, y_train)
    y_pred = model_forest.predict(X_test)
    
    mae = mean_absolute_percentage_error(y_test, y_pred, multioutput = 'raw_values')
    mae_list.append(mae)

mae_NN = pd.DataFrame(mae_list, columns = y.columns)
mae_NN

Iteration 1, loss = 4952576988.88075829
Iteration 2, loss = 4952522232.05840874
Iteration 3, loss = 4952418640.80808926
Iteration 4, loss = 4952258770.81443787
Iteration 5, loss = 4952036945.98450279
Iteration 6, loss = 4951751404.30079365
Iteration 7, loss = 4951399160.95083618
Iteration 8, loss = 4950977315.84046268
Iteration 9, loss = 4950480457.49707222
Iteration 10, loss = 4949909551.53738976
Iteration 11, loss = 4949265064.71854401
Iteration 12, loss = 4948550227.83151627
Iteration 13, loss = 4947769297.12945175
Iteration 14, loss = 4946920663.51515484
Iteration 15, loss = 4946008891.17806721
Iteration 16, loss = 4945034302.03838062
Iteration 17, loss = 4943997039.12733936
Iteration 18, loss = 4942891010.98555374
Iteration 19, loss = 4941710637.26155472
Iteration 20, loss = 4940461336.58902168
Iteration 21, loss = 4939153716.55593586
Iteration 22, loss = 4937781025.99345970
Iteration 23, loss = 4936350963.69846916
Iteration 24, loss = 4934862925.53289223
Iteration 25, loss = 4933

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.1s


Iteration 1, loss = 4875559066.18269348
Iteration 2, loss = 4875336182.46785450
Iteration 3, loss = 4874859161.84357738
Iteration 4, loss = 4874102540.43118763
Iteration 5, loss = 4873067841.63828278
Iteration 6, loss = 4871764096.60857677
Iteration 7, loss = 4870194778.56888771
Iteration 8, loss = 4868350638.00148201
Iteration 9, loss = 4866228249.11113834
Iteration 10, loss = 4863848857.22255611
Iteration 11, loss = 4861223357.57189941
Iteration 12, loss = 4858338550.28183842
Iteration 13, loss = 4855207200.81418228
Iteration 14, loss = 4851820695.25622845
Iteration 15, loss = 4848182916.28827095
Iteration 16, loss = 4844285358.92932510
Iteration 17, loss = 4840131780.21144104
Iteration 18, loss = 4835734820.35838985
Iteration 19, loss = 4831115400.10525513
Iteration 20, loss = 4826288243.15290737
Iteration 21, loss = 4821253501.27555084
Iteration 22, loss = 4816023337.91860676
Iteration 23, loss = 4810598093.94946480
Iteration 24, loss = 4804987514.83683777
Iteration 25, loss = 4799

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.1s


Iteration 1, loss = 4946274047.37269497
Iteration 2, loss = 4945749792.35629463
Iteration 3, loss = 4944611743.30773830
Iteration 4, loss = 4942842520.01728249
Iteration 5, loss = 4940463118.53194237
Iteration 6, loss = 4937500681.36598969
Iteration 7, loss = 4933905265.65973949
Iteration 8, loss = 4929690434.96903610
Iteration 9, loss = 4924925006.38066673
Iteration 10, loss = 4919635994.82126045
Iteration 11, loss = 4913833905.89912605
Iteration 12, loss = 4907499984.65166664
Iteration 13, loss = 4900643308.25616169
Iteration 14, loss = 4893278891.56663704
Iteration 15, loss = 4885434402.91401482
Iteration 16, loss = 4877101344.17529678
Iteration 17, loss = 4868327729.02145576
Iteration 18, loss = 4859139737.55544949
Iteration 19, loss = 4849555250.07485199
Iteration 20, loss = 4839589714.97350311
Iteration 21, loss = 4829243823.09040833
Iteration 22, loss = 4818540042.14822388
Iteration 23, loss = 4807485439.32402802
Iteration 24, loss = 4796084053.56020355
Iteration 25, loss = 4784

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.1s


Iteration 1, loss = 5179740859.56979275
Iteration 2, loss = 5178779854.91815758
Iteration 3, loss = 5176732873.94986820
Iteration 4, loss = 5173615036.97806644
Iteration 5, loss = 5169455176.83043385
Iteration 6, loss = 5164301185.79502773
Iteration 7, loss = 5158174558.87797546
Iteration 8, loss = 5151155360.41849041
Iteration 9, loss = 5143290787.35424423
Iteration 10, loss = 5134622338.62052250
Iteration 11, loss = 5125120777.63461685
Iteration 12, loss = 5114830653.15827560
Iteration 13, loss = 5103816548.17787266
Iteration 14, loss = 5092122079.92244148
Iteration 15, loss = 5079772094.53410244
Iteration 16, loss = 5066788018.75944710
Iteration 17, loss = 5053197587.91610146
Iteration 18, loss = 5039027389.97051525
Iteration 19, loss = 5024288285.67275620
Iteration 20, loss = 5008997323.99536800
Iteration 21, loss = 4993188932.53492451
Iteration 22, loss = 4976866258.80085659
Iteration 23, loss = 4960055770.85177708
Iteration 24, loss = 4942755499.38830471
Iteration 25, loss = 4924

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.4s


Iteration 1, loss = 5548103749.12808990
Iteration 2, loss = 5546890044.64430714
Iteration 3, loss = 5544129999.58772659
Iteration 4, loss = 5539788925.55797768
Iteration 5, loss = 5533983547.95479870
Iteration 6, loss = 5526795875.53816795
Iteration 7, loss = 5518317458.80540943
Iteration 8, loss = 5508516702.79718304
Iteration 9, loss = 5497515942.96322441
Iteration 10, loss = 5485435101.54518795
Iteration 11, loss = 5472344799.49166775
Iteration 12, loss = 5458294680.34160328
Iteration 13, loss = 5443323726.32645607
Iteration 14, loss = 5427476563.66490269
Iteration 15, loss = 5410796627.93731499
Iteration 16, loss = 5393311016.25364590
Iteration 17, loss = 5375047933.21545887
Iteration 18, loss = 5356037698.94520187
Iteration 19, loss = 5336310827.76307297
Iteration 20, loss = 5315893513.60260296
Iteration 21, loss = 5294800707.16772556
Iteration 22, loss = 5273062215.24280643
Iteration 23, loss = 5250692813.15678215
Iteration 24, loss = 5227709858.72408581
Iteration 25, loss = 5204

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.1s


Unnamed: 0,mean income,median income,median house value
0,0.010376,0.011463,0.012434
1,0.009903,0.011307,0.01087
2,0.010777,0.011752,0.012323
3,0.011582,0.013711,0.014213
4,0.053935,0.052868,0.070915


In [62]:
mae_NN.mean()

mean income           0.019315
median income         0.020220
median house value    0.024151
dtype: float64