In [17]:
import numpy as np
import pandas as pd
import random 
from sklearn.model_selection import train_test_split

In [2]:
area_dict = {'California': 423967, 'Texas': 695662, 'Florida': 170312, 'New York': 141297, 'Pennsylvania': 119280}
population_dict = {'California' : 39538223, 'Texas' : 29145505 , 'Florida' : 21538187, 'New York' : 20201249, 'Pennsylvania' : 13002700}
density_dict = {'California' : 90.00, 'Texas' : 41.896 , 'Florida' : 126.5, 'New York' : 142.97, 'Pennsylvania' : 109.30}
area = pd.Series(area_dict)


In [3]:
#Run a dictionary list
area_dict = {area : amount for area,amount in area_dict.items()}
#Create a series so can convert to a dataframe
series_area = pd.Series(area_dict)
series_pop = pd.Series(population_dict)
series_density = pd.Series(density_dict)
#Check that it worked
print("Area")
print(series_area)
print("Population")
print(series_pop)

Area
California      423967
Texas           695662
Florida         170312
New York        141297
Pennsylvania    119280
dtype: int64
Population
California      39538223
Texas           29145505
Florida         21538187
New York        20201249
Pennsylvania    13002700
dtype: int64


In [4]:
df_area = series_area.to_frame(name="area").reset_index().rename(columns={"index": "state"})
df_pop = series_pop.to_frame(name="area").reset_index().rename(columns={"index": "state"})
df_density = series_density.to_frame(name="area").reset_index().rename(columns={"index": "state"})
print(df_area)
print(df_density)

          state    area
0    California  423967
1         Texas  695662
2       Florida  170312
3      New York  141297
4  Pennsylvania  119280
          state     area
0    California   90.000
1         Texas   41.896
2       Florida  126.500
3      New York  142.970
4  Pennsylvania  109.300


In [5]:
df_density.rename(columns={"area": "density"}, inplace=True)
df_pop.rename(columns={"area": "population"}, inplace=True)

In [6]:
data_1 = pd.merge(df_area, df_pop, on="state")


In [7]:
data = pd.merge(data_1, df_density, on="state")

In [8]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   state       5 non-null      object 
 1   area        5 non-null      int64  
 2   population  5 non-null      int64  
 3   density     5 non-null      float64
dtypes: float64(1), int64(2), object(1)
memory usage: 292.0+ bytes


In [9]:
data.loc[0:2]

Unnamed: 0,state,area,population,density
0,California,423967,39538223,90.0
1,Texas,695662,29145505,41.896
2,Florida,170312,21538187,126.5


In [10]:
data.iloc[0:2]

Unnamed: 0,state,area,population,density
0,California,423967,39538223,90.0
1,Texas,695662,29145505,41.896


In [11]:
#The : before 'Florida' means select all rows from the beginning up to and including the row with the index 'Florida'.
data.loc[:'Florida']

Unnamed: 0,state,area,population,density
0,California,423967,39538223,90.0
1,Texas,695662,29145505,41.896
2,Florida,170312,21538187,126.5
3,New York,141297,20201249,142.97
4,Pennsylvania,119280,13002700,109.3


In [12]:
index = pd.MultiIndex.from_product([[2013,2014], [1,2]], names=['year','visit'])
columns = pd.MultiIndex.from_product([['Bob','Lob','Sob'],['HR','Temp']], names=['subject','type'])


In [13]:
data = np.round(np.random.randn(4,6),1)
data[:,::2] *=10
data +=37

This is fundamentally four-dimensional data, where the dimensions are the subject,
the measurement type, the year, and the visit number.

In [14]:
#Create dataframe
health_data = pd.DataFrame(data, index=index, columns=columns)
health_data

Unnamed: 0_level_0,subject,Bob,Bob,Lob,Lob,Sob,Sob
Unnamed: 0_level_1,type,HR,Temp,HR,Temp,HR,Temp
year,visit,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
2013,1,35.0,36.9,27.0,36.9,41.0,36.9
2013,2,41.0,37.4,37.0,37.0,34.0,37.8
2014,1,51.0,38.4,45.0,35.8,33.0,36.2
2014,2,50.0,36.1,32.0,38.0,33.0,36.9


 With this in place we can, for
example, index the top-level column by the person’s name and get a full DataFrame
containing just that person’s information

In [15]:
health_data['Lob']

Unnamed: 0_level_0,type,HR,Temp
year,visit,Unnamed: 2_level_1,Unnamed: 3_level_1
2013,1,27.0,36.9
2013,2,37.0,37.0
2014,1,45.0,35.8
2014,2,32.0,38.0


In [16]:
health_data['Lob', 'HR']

year  visit
2013  1        27.0
      2        37.0
2014  1        45.0
      2        32.0
Name: (Lob, HR), dtype: float64

In [18]:
ad = pd.read_csv("https://tinyurl.com/y2qmhfsr")

In [26]:
#exract input columns, scale down by 255
ai = (ad.iloc[:, 0:3].values/255.0)
ao = (ad.iloc[:, -1].values)
#Split train and test data sets
X_train, X_test, Y_train, Y_test = train_test_split(ai, ao, test_size=1/3)
n = X_train.shape[0] #number of training records
print(Y_test)

[0 1 0 0 1 1 1 0 1 1 0 1 1 0 1 1 1 0 1 0 0 1 0 1 1 1 1 1 0 1 0 1 0 0 1 0 0
 1 0 1 0 1 0 0 0 1 1 1 1 1 0 1 1 1 0 1 0 0 0 0 0 0 1 1 1 0 1 1 1 0 1 1 0 1
 1 1 1 1 1 1 1 1 0 1 1 1 0 0 0 1 0 1 0 1 1 1 1 1 1 1 0 0 1 1 1 1 0 0 0 1 1
 1 1 0 1 1 0 1 0 1 1 1 1 0 0 0 0 0 1 0 0 1 1 1 1 1 0 1 0 1 0 0 0 0 0 1 1 0
 0 0 1 1 0 1 1 1 1 1 0 1 1 1 1 0 0 1 0 1 0 1 1 1 1 1 1 1 0 0 1 1 0 1 0 0 1
 1 1 1 1 1 1 1 1 1 1 0 1 1 0 0 0 0 1 0 0 0 0 0 0 1 1 1 1 1 1 0 0 1 0 0 1 0
 1 1 0 0 1 1 1 1 0 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 0 1 1 0 1 1 0 0 1 1 1 1 1
 1 1 1 1 1 1 1 0 1 1 0 1 0 1 1 1 0 0 0 0 1 1 1 1 1 1 0 1 0 0 0 0 1 1 1 0 0
 0 1 0 1 1 1 0 1 1 1 1 0 1 0 1 1 0 1 0 1 1 1 1 0 0 1 1 0 1 1 0 0 0 0 0 1 0
 1 0 0 0 1 0 1 1 0 1 1 1 0 1 0 0 1 1 1 1 0 1 0 0 0 0 0 1 0 0 1 1 0 0 1 1 1
 1 0 1 0 1 0 0 0 1 0 1 0 1 0 1 0 1 1 1 1 0 0 1 1 0 1 1 0 1 1 0 1 0 1 0 0 1
 1 1 1 1 0 1 1 1 0 1 1 0 1 0 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 0 1 1 0
 1 1 0 1 0]


In [37]:
#Build neural network with weights and biases
#with random initialization
whidden = np.random.rand(3,3)
woutput = np.random.rand(1,3)
bhidden = np.random.rand(3,1)
boutput = np.random.rand(1,1)

In [38]:
#Activation functions
relu = lambda x: np.maximum(x,0)
logistic = lambda x: 1/(1+np.exp(-x))

In [41]:
def forward_prop(X):
    z1 = whidden @ X + b_hidden
    a1 = relu(z1)
    z2 = woutput @ a1 + boutput
    a2 = logistic(z2)
    return z1, a1, z2, a2

In [42]:
# Calculate accuracy 
test_predictions = forward_prop(X_test.transpose())[3] 
# grab only output layer, A2 
test_comparisons = np.equal((test_predictions >= .5).flatten().astype(int), Y_test) 
accuracy = sum(test_comparisons.astype(int) / X_test.shape[0]) 
print("ACCURACY: ", accuracy)

ACCURACY:  0.6124721603563467


In [43]:
print(whidden)

[[0.14960274 0.26776492 0.55152764]
 [0.14665895 0.67861792 0.42953265]
 [0.02657676 0.45717897 0.89360827]]
