# Dimension Reduction and Neural Network

### Importing original dataset and splitting them for training and testing.

In [25]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression

#Dressing The Data Set
df = pd.read_csv('Most-Recent-Cohorts-Treasury-Elements.csv')
column_selector=[] # picks columns 5 thru 12 and 14 through the rest
for i in range(92):
        if(i<5 or i==13): #these columns are exclude from the dataset
            column_selector.append(False)
        else:
            column_selector.append(True) #these are accepted

    
PcaX = df.iloc[:, column_selector ].values   # features
Pcay = df.iloc[:, 13].values #target MEDIAN HH INC


LdaX = df.iloc[:, column_selector].values   
Lday = df.iloc[:, 13].values

FaX = df.iloc[:, column_selector].values  
Fay = df.iloc[:, 13].values

PcaX_train, PcaX_test, Pcay_train, Pcay_test = train_test_split(PcaX, Pcay, test_size=0.25)
LdaX_train, LdaX_test, Lday_train, Lday_test = train_test_split(LdaX, Lday, test_size=0.25)
FaX_train, FaX_test, Fay_train, Fay_test = train_test_split(FaX, Fay, test_size=0.25)


#### Principle Component Analysis 

In [26]:
#Principal Component Analysis
# Always scale data for good results on PCA

PcaX_scale = StandardScaler()
PcaX_train = PcaX_scale.fit_transform(PcaX_train)
PcaX_test = PcaX_scale.transform(PcaX_test)

#PCA Part
from sklearn.decomposition import PCA
pca = PCA(n_components=None)
pca.fit_transform(PcaX_train)
pca = PCA(n_components=2)
PcaX_train = pca.fit_transform(PcaX_train)
PcaX_test = pca.transform(PcaX_test)



#### Linear Discriminant Analysis

In [27]:

# Split in training and testing

# Scale
LdaX_scale = StandardScaler()
LdaX_train = LdaX_scale.fit_transform(LdaX_train)
LdaX_test = LdaX_scale.transform(LdaX_test)

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
lda = LinearDiscriminantAnalysis(n_components=2)
# since LDA is supervised, also need to pass y_train
LdaX_train = lda.fit_transform(LdaX_train.astype(int), Lday_train.astype(int))
LdaX_test = lda.transform(LdaX_test)





#### Factor Analysis

In [28]:

# Split in training and testing

# Always scale data for good results on PCA
FaX_scale = StandardScaler()
FaX_train = FaX_scale.fit_transform(FaX_train)
FaX_test = FaX_scale.transform(FaX_test)

from sklearn.decomposition import FactorAnalysis
fa = FactorAnalysis(n_components=2)
FaX_train = fa.fit_transform(FaX_train)
FaX_test = fa.transform(FaX_test)


### Testing The Three New Datasets by Training Linear Regression Models

In [29]:

clf = LinearRegression()
clf.fit(PcaX_train,Pcay_train)
clf.score(PcaX_test,Pcay_test)


0.4080340818290376

In [30]:
clf = LinearRegression()
clf.fit(FaX_train,Fay_train)
clf.score(FaX_test,Fay_test)

0.3964442468415504

In [31]:
#This is inaccurate because I had to convert the dataset into ints values be cautious 
clf = LinearRegression()
clf.fit(LdaX_train,Lday_train)
clf.score(LdaX_test,Lday_test)

0.6347963064084712

### Combining the New Datasets to Different Activation
Creating nine regression models. Three for each activation function: sigmoid, ReLU, and tanh. 

In [34]:
#Using sklearn instead
#Combining sigmoid function and the three dimension reduction
from sklearn.neural_network import MLPRegressor

NNsigmFA_reg = MLPRegressor(activation='logistic',solver='sgd',max_iter=1000)
NNsigmFA_reg.fit(FaX_train,Fay_train)
NNsigmFA_reg.predict(FaX_test)
print('Sigmoid and FA=',NNsigmFA_reg.score(FaX_test,Fay_test))

NNsigmPCA_reg = MLPRegressor(activation='logistic',solver='sgd',max_iter=1000)
NNsigmPCA_reg.fit(PcaX_train,Pcay_train)
NNsigmPCA_reg.predict(PcaX_test)
print('Sigmoid and PCA=',NNsigmPCA_reg.score(PcaX_test,Pcay_test))

NNsigmLDA_reg = MLPRegressor(activation='logistic',solver='sgd',max_iter=1000)
NNsigmLDA_reg.fit(LdaX_train,Lday_train)
NNsigmLDA_reg.predict(LdaX_test)
print('Sigmoid and LDA=',NNsigmLDA_reg.score(LdaX_test,Lday_test))

Sigmoid and FA= 0.0015637694633382493
Sigmoid and PCA= 0.3826017301670845
Sigmoid and LDA= 0.7547775925129232


In [36]:
#Combining relu function and the three dimension reduction
NNreluFA_reg = MLPRegressor(activation='relu',max_iter=1000)
NNreluFA_reg.fit(FaX_train,Fay_train)
NNreluFA_reg.predict(FaX_test)
print('ReLU and FA=',NNreluFA_reg.score(FaX_test,Fay_test))

NNreluPCA_reg = MLPRegressor(activation='relu',max_iter=1000)
NNreluPCA_reg.fit(PcaX_train,Pcay_train)
NNreluPCA_reg.predict(PcaX_test)
print('ReLU and PCA=',NNreluPCA_reg.score(PcaX_test,Pcay_test))

NNreluLDA_reg = MLPRegressor(activation='relu',max_iter=1000)
NNreluLDA_reg.fit(LdaX_train,Lday_train)
NNreluLDA_reg.predict(LdaX_test)
print('ReLU and LDA=',NNreluLDA_reg.score(LdaX_test,Lday_test))



ReLU and FA= 0.4400805863850834
ReLU and PCA= 0.5099716576135929
ReLU and LDA= 0.7754775300442343


In [37]:
#Combining tanh function and the three dimension reduction
NNtanhFA_reg = MLPRegressor(activation='tanh',solver='sgd',max_iter=1000)
NNtanhFA_reg.fit(FaX_train,Fay_train)
NNtanhFA_reg.predict(FaX_test)
print('Tanh and FA=', NNtanhFA_reg.score(FaX_test,Fay_test))

NNtanhPCA_reg = MLPRegressor(activation='tanh',solver='sgd',max_iter=1000)
NNtanhPCA_reg.fit(PcaX_train,Pcay_train)
NNtanhPCA_reg.predict(PcaX_test)
print('Tanh and PCA=',NNtanhPCA_reg.score(PcaX_test,Pcay_test))

NNtanhLDA_reg = MLPRegressor(activation='tanh',solver='sgd',max_iter=1000)
NNtanhLDA_reg.fit(LdaX_train,Lday_train)
NNtanhLDA_reg.predict(LdaX_test)
print('Tanh and LDA=',NNtanhLDA_reg.score(LdaX_test,Lday_test))

Tanh and FA= 0.392948179473934
Tanh and PCA= 0.5757658742720374
Tanh and LDA= 0.7238271957639453


### Experiment to see if dimension reduction was worth it.

In [40]:
X = df.iloc[:, column_selector ].values   # features
y = df.iloc[:, 13].values #target MEDIAN HH INC

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

clf = LinearRegression()
clf.fit(X_train,y_train)
print('Linear Regression and original=',clf.score(X_test,y_test))

#Neural Network with the original dataset without dimension recution
NNtanh_reg = MLPRegressor(activation='tanh',solver='sgd',max_iter=1000)
NNtanh_reg.fit(X_train,y_train)
NNtanh_reg.predict(X_test)
print('Tanh and orignal=',NNtanh_reg.score(X_test,y_test))

NNrelu_reg = MLPRegressor(activation='relu',max_iter=1000)
NNrelu_reg.fit(X_train,y_train)
NNrelu_reg.predict(X_test)
print('ReLU and original=', NNrelu_reg.score(X_test,y_test))

NNsigm_reg = MLPRegressor(activation='logistic',solver='sgd',max_iter=1000)
NNsigm_reg.fit(X_train,y_train)
NNsigm_reg.predict(X_test)
print('Sigmoid with original=', NNsigm_reg.score(X_test,y_test))

Linear Regression and original= 0.9837408949617984
Tanh and orignal= 0.5296111947642794
ReLU and original= 0.6784948607565653
Sigmoid with original= 0.681853868289383
