# Neural Network

In [35]:
from keras.callbacks import ModelCheckpoint
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error 
import keras.optimizers
from matplotlib import pyplot as plt
import seaborn as sb
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.metrics import r2_score
import warnings 
from sklearn.model_selection import KFold
warnings.filterwarnings('ignore')
warnings.filterwarnings('ignore', category=DeprecationWarning)

### Read the Data

In [9]:
deepsolar_curated=pd.read_csv('../SharedDatasets/deepsolar_LW2.csv',encoding = "ISO-8859-1",index_col=0)

In [10]:
deepsolar_curated.shape

(71305, 60)

In [11]:
deepsolar_curated.columns

Index(['population_density', 'education_less_than_high_school_rate',
       'education_high_school_graduate_rate', 'education_college_rate',
       'education_bachelor_rate', 'education_master_rate', 'race_white_rate',
       'race_black_africa_rate', 'race_other_rate', 'race_two_more_rate',
       'poverty_family_below_poverty_level_rate', 'heating_fuel_gas_rate',
       'heating_fuel_electricity_rate', 'heating_fuel_fuel_oil_kerosene_rate',
       'heating_fuel_coal_coke_rate', 'heating_fuel_solar_rate',
       'heating_fuel_none_rate', 'median_household_income',
       'electricity_consume_residential', 'average_household_size',
       'housing_unit_median_gross_rent', 'lat', 'lon', 'frost_days',
       'relative_humidity', 'daily_solar_radiation', 'wind_speed',
       'age_18_24_rate', 'age_35_44_rate', 'age_45_54_rate', 'age_65_74_rate',
       'age_55_64_rate', 'household_type_family_rate',
       'occupation_information_rate', 'occupation_education_rate',
       'occupation_whol

## Split into Training/Test/Validation Sets

In [20]:
# Build Model
deepsolar=deepsolar_curated.copy()
deepsolar['solar_flag']=deepsolar['number_of_solar_system_per_household'].apply(lambda x: int(x>0))
Y_classifier = deepsolar['solar_flag'].values
Y_regressor = deepsolar['number_of_solar_system_per_household'].values
X = deepsolar.drop(labels=['solar_flag', 'number_of_solar_system_per_household'], axis=1).values
# set a random seed to keep the split the same 
np.random.seed(0)

# shuffle data
shuffle = np.random.permutation(np.arange(X.shape[0]))
X = X[shuffle]
Y_classifier = Y_classifier[shuffle]
Y_regressor = Y_regressor[shuffle]

# split data and labels into test set and initial training set
n_train = int(0.8*X.shape[0])
X_train1 = X[:n_train,:]
X_test = X[n_train:,:]
Y_classifier_train1 = Y_classifier[:n_train]
Y_classifier_test = Y_classifier[n_train:]
Y_regressor_train1 = Y_regressor[:n_train]
Y_regressor_test = Y_regressor[n_train:]

# split training data and labels into training and development sets
n_train = int(0.8*X_train1.shape[0])
X_train = X_train1[:n_train,:]
X_dev = X_train1[n_train:,:]
Y_classifier_train = Y_classifier_train1[:n_train]
#X_train_r=X_train[Y_classifier_train>0]
Y_classifier_dev = Y_classifier_train1[n_train:]
Y_regressor_train = Y_regressor_train1[:n_train]
#Y_regressor_train_r=Y_regressor_train[Y_classifier_train>0]
Y_regressor_dev = Y_regressor_train1[n_train:]

print("{:<35}\t{}".format("Training data shape:", X_train.shape))
print("{:<35}\t{}".format("Training outcome variable - classifier:",Y_classifier_train.shape ))
print("{:<35}\t{}".format("Training outcome variable - regressor:",Y_regressor_train.shape ))
print("{:<35}\t{}".format("Dev data shape:", X_dev.shape))
print("{:<35}\t{}".format("Dev outcome variable - classifier:",Y_classifier_dev.shape ))
print("{:<35}\t{}".format("Dev outcome variable - regressor:",Y_regressor_dev.shape ))
print("{:<35}\t{}".format("Test data shape:", X_test.shape))
print("{:<35}\t{}".format("Test outcome variable - classifier:",Y_classifier_test.shape ))
print("{:<35}\t{}".format("Test outcome variable - regressor:",Y_regressor_test.shape ))

Training data shape:               	(45635, 59)
Training outcome variable - classifier:	(45635,)
Training outcome variable - regressor:	(45635,)
Dev data shape:                    	(11409, 59)
Dev outcome variable - classifier: 	(11409,)
Dev outcome variable - regressor:  	(11409,)
Test data shape:                   	(14261, 59)
Test outcome variable - classifier:	(14261,)
Test outcome variable - regressor: 	(14261,)


### Build/Train the Model

In [44]:
NN_model = Sequential()
#adam=keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)

# The Input Layer :
NN_model.add(Dense(59, kernel_initializer='normal',input_dim = X_train.shape[1], activation='relu'))

# The Hidden Layers :
NN_model.add(Dense(59, kernel_initializer='normal',activation='relu'))
NN_model.add(Dense(59, kernel_initializer='normal',activation='relu'))
NN_model.add(Dense(59, kernel_initializer='normal',activation='relu'))
#NN_model.add(Dense(59, kernel_initializer='normal',activation='relu'))
#NN_model.add(Dense(59, kernel_initializer='normal',activation='relu'))

# The Output Layer :
NN_model.add(Dense(1, kernel_initializer='normal',activation='relu'))

# Compile the network :
NN_model.compile(loss='mean_absolute_error', optimizer='adam', metrics=['mean_absolute_error'])
NN_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_80 (Dense)             (None, 59)                3540      
_________________________________________________________________
dense_81 (Dense)             (None, 59)                3540      
_________________________________________________________________
dense_82 (Dense)             (None, 59)                3540      
_________________________________________________________________
dense_83 (Dense)             (None, 59)                3540      
_________________________________________________________________
dense_84 (Dense)             (None, 1)                 60        
Total params: 14,220
Trainable params: 14,220
Non-trainable params: 0
_________________________________________________________________


In [45]:
NN_model.fit(X_train1,Y_regressor_train1, epochs=50, batch_size=500, validation_split = 0.2)

Train on 45635 samples, validate on 11409 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50


Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x1c371e86d30>

### Assess Predictions on Test Set

In [46]:
predictions=NN_model.predict(X_test)

In [47]:
r2_score(Y_regressor_test,predictions)

0.714563644739935

### Try 10-Fold Validation

In [72]:
folds=KFold(n_splits=3, random_state=None, shuffle=True)
X_trains=[]
X_devs=[]
yr_trains=[]
yr_devs=[]

for train_index, dev_index in folds.split(X_train1):

    X_trains.append(X_train1[train_index])
    X_devs.append(X_train1[dev_index])
    yr_trains.append(Y_regressor_train1[train_index])
    yr_devs.append(Y_regressor_train1[dev_index])
    
r2s=[]

layer_size=59

for i in range(len(X_trains)):
    NN_model = Sequential()
    NN_model.add(Dense(layer_size, kernel_initializer='normal',input_dim = X_train.shape[1], activation='relu'))
    NN_model.add(Dense(layer_size, kernel_initializer='normal',activation='relu'))
    NN_model.add(Dense(layer_size, kernel_initializer='normal',activation='relu'))
    NN_model.add(Dense(layer_size, kernel_initializer='normal',activation='relu'))
    NN_model.add(Dense(layer_size, kernel_initializer='normal',activation='relu'))
    #NN_model.add(Dense(layer_size, kernel_initializer='normal',activation='relu'))
    NN_model.add(Dense(1, kernel_initializer='normal',activation='relu'))
    NN_model.compile(loss='mean_absolute_error', optimizer='adam', metrics=['mean_absolute_error'])
    NN_model.fit(X_trains[i],yr_trains[i], epochs=50, batch_size=500,verbose=0)
    dev_preds=NN_model.predict(X_devs[i])
    r2s.append(r2_score(yr_devs[i],dev_preds))
    
    
    

In [73]:
r2s

[0.6413270510948, 0.6930624220808366, -0.153651698498932]

In [68]:
np.mean(r2s)

0.6652407181487541