In [1]:
# Import our dependencies
import pandas as pd
import matplotlib as plt
from sklearn.datasets import make_blobs
import sklearn as skl
import tensorflow as tf

In [2]:
# Import our input dataset
charity_data_df = pd.read_csv('charity_data.csv')
charity_data_df.head()

Unnamed: 0,EIN,NAME,APPLICATION_TYPE,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL
0,10520599,BLUE KNIGHTS MOTORCYCLE CLUB,T10,Independent,C1000,ProductDev,Association,1,0,N,5000,1
1,10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,T3,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,1
2,10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,T5,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,0
3,10553066,SOUTHSIDE ATHLETIC ASSOCIATION,T3,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,1
4,10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,T3,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,1


In [3]:
#Bucketing
charity_data_df_count = charity_data_df.CLASSIFICATION.value_counts()
charity_data_df_count

C1000    17326
C2000     6074
C1200     4837
C3000     1918
C2100     1883
         ...  
C2380        1
C2600        1
C1570        1
C2170        1
C1283        1
Name: CLASSIFICATION, Length: 71, dtype: int64

In [4]:
# Encode categorical variables using one-hot encoding.Encode categorical variables using one-hot encoding.
replace_classification = list(charity_data_df_count[charity_data_df_count < 100].index)

# Replace in DataFrame
for classification in replace_classification:
    charity_data_df.CLASSIFICATION = charity_data_df.CLASSIFICATION.replace(classification,"Other")


# Check to make sure binning was successful
charity_data_df.CLASSIFICATION.value_counts()


C1000    17326
C2000     6074
C1200     4837
C3000     1918
C2100     1883
C7000      777
Other      669
C1700      287
C4000      194
C5000      116
C1270      114
C2700      104
Name: CLASSIFICATION, dtype: int64

In [5]:
USE_CASE_data_df_count = charity_data_df.USE_CASE.value_counts()
USE_CASE_data_df_count

Preservation     28095
ProductDev        5671
CommunityServ      384
Heathcare          146
Other                3
Name: USE_CASE, dtype: int64

In [6]:
ORGANIZATION_count = charity_data_df.ORGANIZATION.value_counts()
ORGANIZATION_count


Trust           23515
Association     10255
Co-operative      486
Corporation        43
Name: ORGANIZATION, dtype: int64

In [7]:
STATUS_count = charity_data_df.STATUS.value_counts()
STATUS_count


1    34294
0        5
Name: STATUS, dtype: int64

In [8]:
INCOME_AMT = charity_data_df.INCOME_AMT.value_counts()
INCOME_AMT

0                24388
25000-99999       3747
100000-499999     3374
1M-5M              955
1-9999             728
10000-24999        543
10M-50M            240
5M-10M             185
50M+               139
Name: INCOME_AMT, dtype: int64

In [9]:
APPLICATION_TYPE = charity_data_df.APPLICATION_TYPE.value_counts()
APPLICATION_TYPE

T3     27037
T4      1542
T6      1216
T5      1173
T19     1065
T8       737
T7       725
T10      528
T9       156
T13       66
T12       27
T2        16
T25        3
T14        3
T29        2
T15        2
T17        1
Name: APPLICATION_TYPE, dtype: int64

In [10]:
# Encode categorical variables using one-hot encoding.Encode categorical variables using one-hot encoding.
replace_application = list(APPLICATION_TYPE[APPLICATION_TYPE < 100].index)

# Replace in DataFrame
for application in replace_application:
    charity_data_df.APPLICATION_TYPE = charity_data_df.APPLICATION_TYPE.replace(application,"Other")


# Check to make sure binning was successful
charity_data_df.APPLICATION_TYPE.value_counts()


T3       27037
T4        1542
T6        1216
T5        1173
T19       1065
T8         737
T7         725
T10        528
T9         156
Other      120
Name: APPLICATION_TYPE, dtype: int64

In [11]:
# Create the OneHotEncoder instance
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(sparse=False)

# Fit the encoder and produce encoded DataFrame
encode_df = pd.DataFrame(enc.fit_transform(charity_data_df.APPLICATION_TYPE.values.reshape(-1,1)))

# Rename encoded columns
encode_df.columns = enc.get_feature_names(['APPLICATION_TYPE'])
encode_df.head()


Unnamed: 0,APPLICATION_TYPE_Other,APPLICATION_TYPE_T10,APPLICATION_TYPE_T19,APPLICATION_TYPE_T3,APPLICATION_TYPE_T4,APPLICATION_TYPE_T5,APPLICATION_TYPE_T6,APPLICATION_TYPE_T7,APPLICATION_TYPE_T8,APPLICATION_TYPE_T9
0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
# Merge the two DataFrames together and drop the Country column
New_charity = charity_data_df.merge(encode_df,left_index=True,right_index=True).drop("APPLICATION_TYPE",1)
New_charity

Unnamed: 0,EIN,NAME,AFFILIATION,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,...,APPLICATION_TYPE_Other,APPLICATION_TYPE_T10,APPLICATION_TYPE_T19,APPLICATION_TYPE_T3,APPLICATION_TYPE_T4,APPLICATION_TYPE_T5,APPLICATION_TYPE_T6,APPLICATION_TYPE_T7,APPLICATION_TYPE_T8,APPLICATION_TYPE_T9
0,10520599,BLUE KNIGHTS MOTORCYCLE CLUB,Independent,C1000,ProductDev,Association,1,0,N,5000,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,Independent,C2000,Preservation,Co-operative,1,1-9999,N,108590,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
2,10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,10553066,SOUTHSIDE ATHLETIC ASSOCIATION,CompanySponsored,C2000,Preservation,Trust,1,10000-24999,N,6692,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4,10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,Independent,C1000,Heathcare,Trust,1,100000-499999,N,142590,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34294,996009318,THE LIONS CLUB OF HONOLULU KAMEHAMEHA,Independent,C1000,ProductDev,Association,1,0,N,5000,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
34295,996010315,INTERNATIONAL ASSOCIATION OF LIONS CLUBS,CompanySponsored,C3000,ProductDev,Association,1,0,N,5000,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
34296,996012607,PTA HAWAII CONGRESS,CompanySponsored,C2000,Preservation,Association,1,0,N,5000,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
34297,996015768,AMERICAN FEDERATION OF GOVERNMENT EMPLOYEES LO...,Independent,C3000,ProductDev,Association,1,0,N,5000,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [13]:
# Create the OneHotEncoder instance
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(sparse=False)

# Fit the encoder and produce encoded DataFrame
aff_encode_df = pd.DataFrame(enc.fit_transform(charity_data_df.AFFILIATION.values.reshape(-1,1)))

# Rename encoded columns
aff_encode_df.columns = enc.get_feature_names(['AFFILIATION'])
aff_encode_df.head()


Unnamed: 0,AFFILIATION_CompanySponsored,AFFILIATION_Family/Parent,AFFILIATION_Independent,AFFILIATION_National,AFFILIATION_Other,AFFILIATION_Regional
0,0.0,0.0,1.0,0.0,0.0,0.0
1,0.0,0.0,1.0,0.0,0.0,0.0
2,1.0,0.0,0.0,0.0,0.0,0.0
3,1.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,1.0,0.0,0.0,0.0


In [14]:
# Merge the two DataFrames together and drop the Country column
New_charity_2 = New_charity.merge(aff_encode_df,left_index=True,right_index=True).drop("AFFILIATION",1)
New_charity_2

Unnamed: 0,EIN,NAME,CLASSIFICATION,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL,...,APPLICATION_TYPE_T6,APPLICATION_TYPE_T7,APPLICATION_TYPE_T8,APPLICATION_TYPE_T9,AFFILIATION_CompanySponsored,AFFILIATION_Family/Parent,AFFILIATION_Independent,AFFILIATION_National,AFFILIATION_Other,AFFILIATION_Regional
0,10520599,BLUE KNIGHTS MOTORCYCLE CLUB,C1000,ProductDev,Association,1,0,N,5000,1,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,C2000,Preservation,Co-operative,1,1-9999,N,108590,1,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,C3000,ProductDev,Association,1,0,N,5000,0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,10553066,SOUTHSIDE ATHLETIC ASSOCIATION,C2000,Preservation,Trust,1,10000-24999,N,6692,1,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,C1000,Heathcare,Trust,1,100000-499999,N,142590,1,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34294,996009318,THE LIONS CLUB OF HONOLULU KAMEHAMEHA,C1000,ProductDev,Association,1,0,N,5000,0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
34295,996010315,INTERNATIONAL ASSOCIATION OF LIONS CLUBS,C3000,ProductDev,Association,1,0,N,5000,0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
34296,996012607,PTA HAWAII CONGRESS,C2000,Preservation,Association,1,0,N,5000,0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
34297,996015768,AMERICAN FEDERATION OF GOVERNMENT EMPLOYEES LO...,C3000,ProductDev,Association,1,0,N,5000,1,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [15]:
# Create the OneHotEncoder instance
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(sparse=False)

# Fit the encoder and produce encoded DataFrame
class_encode_df = pd.DataFrame(enc.fit_transform(charity_data_df.CLASSIFICATION.values.reshape(-1,1)))

# Rename encoded columns
class_encode_df.columns = enc.get_feature_names(['AFFILIATION'])
class_encode_df.head()


Unnamed: 0,AFFILIATION_C1000,AFFILIATION_C1200,AFFILIATION_C1270,AFFILIATION_C1700,AFFILIATION_C2000,AFFILIATION_C2100,AFFILIATION_C2700,AFFILIATION_C3000,AFFILIATION_C4000,AFFILIATION_C5000,AFFILIATION_C7000,AFFILIATION_Other
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
# Merge the two DataFrames together and drop the Country column
New_charity_3 = New_charity_2.merge(aff_encode_df,left_index=True,right_index=True).drop("CLASSIFICATION",1)
New_charity_3

Unnamed: 0,EIN,NAME,USE_CASE,ORGANIZATION,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL,APPLICATION_TYPE_Other,...,AFFILIATION_Independent_x,AFFILIATION_National_x,AFFILIATION_Other_x,AFFILIATION_Regional_x,AFFILIATION_CompanySponsored_y,AFFILIATION_Family/Parent_y,AFFILIATION_Independent_y,AFFILIATION_National_y,AFFILIATION_Other_y,AFFILIATION_Regional_y
0,10520599,BLUE KNIGHTS MOTORCYCLE CLUB,ProductDev,Association,1,0,N,5000,1,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,Preservation,Co-operative,1,1-9999,N,108590,1,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,ProductDev,Association,1,0,N,5000,0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,10553066,SOUTHSIDE ATHLETIC ASSOCIATION,Preservation,Trust,1,10000-24999,N,6692,1,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,Heathcare,Trust,1,100000-499999,N,142590,1,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34294,996009318,THE LIONS CLUB OF HONOLULU KAMEHAMEHA,ProductDev,Association,1,0,N,5000,0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
34295,996010315,INTERNATIONAL ASSOCIATION OF LIONS CLUBS,ProductDev,Association,1,0,N,5000,0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
34296,996012607,PTA HAWAII CONGRESS,Preservation,Association,1,0,N,5000,0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
34297,996015768,AMERICAN FEDERATION OF GOVERNMENT EMPLOYEES LO...,ProductDev,Association,1,0,N,5000,1,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [17]:
# Create the OneHotEncoder instance
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(sparse=False)

# Fit the encoder and produce encoded DataFrame
class_encode_df = pd.DataFrame(enc.fit_transform(charity_data_df.ORGANIZATION.values.reshape(-1,1)))

# Rename encoded columns
class_encode_df.columns = enc.get_feature_names(['ORGANIZATION'])
class_encode_df.head()

Unnamed: 0,ORGANIZATION_Association,ORGANIZATION_Co-operative,ORGANIZATION_Corporation,ORGANIZATION_Trust
0,1.0,0.0,0.0,0.0
1,0.0,1.0,0.0,0.0
2,1.0,0.0,0.0,0.0
3,0.0,0.0,0.0,1.0
4,0.0,0.0,0.0,1.0


In [18]:
# Merge the two DataFrames together and drop the Country column
New_charity_4 = New_charity_3.merge(aff_encode_df,left_index=True,right_index=True).drop("ORGANIZATION",1)
New_charity_4

Unnamed: 0,EIN,NAME,USE_CASE,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL,APPLICATION_TYPE_Other,APPLICATION_TYPE_T10,...,AFFILIATION_Independent_y,AFFILIATION_National_y,AFFILIATION_Other_y,AFFILIATION_Regional_y,AFFILIATION_CompanySponsored,AFFILIATION_Family/Parent,AFFILIATION_Independent,AFFILIATION_National,AFFILIATION_Other,AFFILIATION_Regional
0,10520599,BLUE KNIGHTS MOTORCYCLE CLUB,ProductDev,1,0,N,5000,1,0.0,1.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,Preservation,1,1-9999,N,108590,1,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,ProductDev,1,0,N,5000,0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,10553066,SOUTHSIDE ATHLETIC ASSOCIATION,Preservation,1,10000-24999,N,6692,1,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,Heathcare,1,100000-499999,N,142590,1,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34294,996009318,THE LIONS CLUB OF HONOLULU KAMEHAMEHA,ProductDev,1,0,N,5000,0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
34295,996010315,INTERNATIONAL ASSOCIATION OF LIONS CLUBS,ProductDev,1,0,N,5000,0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
34296,996012607,PTA HAWAII CONGRESS,Preservation,1,0,N,5000,0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
34297,996015768,AMERICAN FEDERATION OF GOVERNMENT EMPLOYEES LO...,ProductDev,1,0,N,5000,1,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [19]:
# Create the OneHotEncoder instance
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(sparse=False)

# Fit the encoder and produce encoded DataFrame
class_encode_df = pd.DataFrame(enc.fit_transform(charity_data_df.USE_CASE.values.reshape(-1,1)))

# Rename encoded columns
class_encode_df.columns = enc.get_feature_names(['USE_CASE'])
class_encode_df.head()

Unnamed: 0,USE_CASE_CommunityServ,USE_CASE_Heathcare,USE_CASE_Other,USE_CASE_Preservation,USE_CASE_ProductDev
0,0.0,0.0,0.0,0.0,1.0
1,0.0,0.0,0.0,1.0,0.0
2,0.0,0.0,0.0,0.0,1.0
3,0.0,0.0,0.0,1.0,0.0
4,0.0,1.0,0.0,0.0,0.0


In [20]:
# Merge the two DataFrames together and drop the Country column
charity_df_5 = New_charity_4.merge(aff_encode_df,left_index=True,right_index=True).drop("USE_CASE",1)
charity_df_5

Unnamed: 0,EIN,NAME,STATUS,INCOME_AMT,SPECIAL_CONSIDERATIONS,ASK_AMT,IS_SUCCESSFUL,APPLICATION_TYPE_Other,APPLICATION_TYPE_T10,APPLICATION_TYPE_T19,...,AFFILIATION_Independent_x,AFFILIATION_National_x,AFFILIATION_Other_x,AFFILIATION_Regional_x,AFFILIATION_CompanySponsored_y,AFFILIATION_Family/Parent_y,AFFILIATION_Independent_y,AFFILIATION_National_y,AFFILIATION_Other_y,AFFILIATION_Regional_y
0,10520599,BLUE KNIGHTS MOTORCYCLE CLUB,1,0,N,5000,1,0.0,1.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,1,1-9999,N,108590,1,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,1,0,N,5000,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,10553066,SOUTHSIDE ATHLETIC ASSOCIATION,1,10000-24999,N,6692,1,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,1,100000-499999,N,142590,1,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34294,996009318,THE LIONS CLUB OF HONOLULU KAMEHAMEHA,1,0,N,5000,0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
34295,996010315,INTERNATIONAL ASSOCIATION OF LIONS CLUBS,1,0,N,5000,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
34296,996012607,PTA HAWAII CONGRESS,1,0,N,5000,0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
34297,996015768,AMERICAN FEDERATION OF GOVERNMENT EMPLOYEES LO...,1,0,N,5000,1,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [21]:
# Create the OneHotEncoder instance
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(sparse=False)

# Fit the encoder and produce encoded DataFrame
class_encode_df = pd.DataFrame(enc.fit_transform(charity_data_df.SPECIAL_CONSIDERATIONS.values.reshape(-1,1)))

# Rename encoded columns
class_encode_df.columns = enc.get_feature_names(['SPECIAL_CONSIDERATIONS'])
class_encode_df.head()

Unnamed: 0,SPECIAL_CONSIDERATIONS_N,SPECIAL_CONSIDERATIONS_Y
0,1.0,0.0
1,1.0,0.0
2,1.0,0.0
3,1.0,0.0
4,1.0,0.0


In [22]:
# Merge the two DataFrames together and drop the Country column
charity_df_6 = charity_df_5.merge(aff_encode_df,left_index=True,right_index=True).drop("SPECIAL_CONSIDERATIONS",1)
charity_df_6

Unnamed: 0,EIN,NAME,STATUS,INCOME_AMT,ASK_AMT,IS_SUCCESSFUL,APPLICATION_TYPE_Other,APPLICATION_TYPE_T10,APPLICATION_TYPE_T19,APPLICATION_TYPE_T3,...,AFFILIATION_Independent_y,AFFILIATION_National_y,AFFILIATION_Other_y,AFFILIATION_Regional_y,AFFILIATION_CompanySponsored,AFFILIATION_Family/Parent,AFFILIATION_Independent,AFFILIATION_National,AFFILIATION_Other,AFFILIATION_Regional
0,10520599,BLUE KNIGHTS MOTORCYCLE CLUB,1,0,5000,1,0.0,1.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,1,1-9999,108590,1,0.0,0.0,0.0,1.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,1,0,5000,0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,10553066,SOUTHSIDE ATHLETIC ASSOCIATION,1,10000-24999,6692,1,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,1,100000-499999,142590,1,0.0,0.0,0.0,1.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34294,996009318,THE LIONS CLUB OF HONOLULU KAMEHAMEHA,1,0,5000,0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
34295,996010315,INTERNATIONAL ASSOCIATION OF LIONS CLUBS,1,0,5000,0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
34296,996012607,PTA HAWAII CONGRESS,1,0,5000,0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
34297,996015768,AMERICAN FEDERATION OF GOVERNMENT EMPLOYEES LO...,1,0,5000,1,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [23]:
# Create the OneHotEncoder instance
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(sparse=False)

# Fit the encoder and produce encoded DataFrame
class_encode_df = pd.DataFrame(enc.fit_transform(charity_data_df.INCOME_AMT.values.reshape(-1,1)))

# Rename encoded columns
class_encode_df.columns = enc.get_feature_names(['INCOME_AMT'])
class_encode_df.head()

Unnamed: 0,INCOME_AMT_0,INCOME_AMT_1-9999,INCOME_AMT_10000-24999,INCOME_AMT_100000-499999,INCOME_AMT_10M-50M,INCOME_AMT_1M-5M,INCOME_AMT_25000-99999,INCOME_AMT_50M+,INCOME_AMT_5M-10M
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0


In [24]:
# Merge the two DataFrames together and drop the Country column
charity_df_7 = charity_df_6.merge(aff_encode_df,left_index=True,right_index=True).drop("INCOME_AMT",1)
charity_df_7

Unnamed: 0,EIN,NAME,STATUS,ASK_AMT,IS_SUCCESSFUL,APPLICATION_TYPE_Other,APPLICATION_TYPE_T10,APPLICATION_TYPE_T19,APPLICATION_TYPE_T3,APPLICATION_TYPE_T4,...,AFFILIATION_Independent_x,AFFILIATION_National_x,AFFILIATION_Other_x,AFFILIATION_Regional_x,AFFILIATION_CompanySponsored_y,AFFILIATION_Family/Parent_y,AFFILIATION_Independent_y,AFFILIATION_National_y,AFFILIATION_Other_y,AFFILIATION_Regional_y
0,10520599,BLUE KNIGHTS MOTORCYCLE CLUB,1,5000,1,0.0,1.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,10531628,AMERICAN CHESAPEAKE CLUB CHARITABLE TR,1,108590,1,0.0,0.0,0.0,1.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,10547893,ST CLOUD PROFESSIONAL FIREFIGHTERS,1,5000,0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,10553066,SOUTHSIDE ATHLETIC ASSOCIATION,1,6692,1,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
4,10556103,GENETIC RESEARCH INSTITUTE OF THE DESERT,1,142590,1,0.0,0.0,0.0,1.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34294,996009318,THE LIONS CLUB OF HONOLULU KAMEHAMEHA,1,5000,0,0.0,0.0,0.0,0.0,1.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
34295,996010315,INTERNATIONAL ASSOCIATION OF LIONS CLUBS,1,5000,0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
34296,996012607,PTA HAWAII CONGRESS,1,5000,0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
34297,996015768,AMERICAN FEDERATION OF GOVERNMENT EMPLOYEES LO...,1,5000,1,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


In [25]:
#FINAL_charity = charity_df_6.drop(columns=['EIN', 'NAME'])

In [26]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Preprocess numerical data for neural network

# Create a StandardScaler instance
scaler = StandardScaler()

y = charity_df_7.IS_SUCCESSFUL
X = charity_df_7.drop(columns=["EIN","NAME", "IS_SUCCESSFUL"])


# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)



In [27]:
# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [28]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
import pandas as pd
import tensorflow as tf
# Define the model - deep neural net
number_input_features = len(X_train_scaled[0])
hidden_nodes_layer1 =  8
hidden_nodes_layer2 = 5

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 8)                 392       
_________________________________________________________________
dense_1 (Dense)              (None, 5)                 45        
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 6         
Total params: 443
Trainable params: 443
Non-trainable params: 0
_________________________________________________________________


In [29]:
# Define the model - deep neural net
number_input_features = len(X_train_scaled[0])
hidden_nodes_layer1 =  8
hidden_nodes_layer2 = 5

nn = tf.keras.models.Sequential()

number_input_features = len(X_train_scaled[0])
# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the model
nn.summary()


Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 8)                 392       
_________________________________________________________________
dense_4 (Dense)              (None, 5)                 45        
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 6         
Total params: 443
Trainable params: 443
Non-trainable params: 0
_________________________________________________________________


In [30]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])


In [31]:
# Train the model
fit_model = nn.fit(X_train,y_train,epochs=100)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [32]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
import pandas as pd
import tensorflow as tf

In [37]:

# Create the SVM model
svm = SVC(kernel='linear')

In [38]:
from sklearn.preprocessing import MinMaxScaler
scaling = MinMaxScaler(feature_range=(-1,1)).fit(X_train)
X_train = scaling.transform(X_train)
X_test = scaling.transform(X_test)

In [39]:
# Train the model
svm.fit(X_train, y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [35]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

268/268 - 0s - loss: 0.6911 - accuracy: 0.5324
Loss: 0.6910514831542969, Accuracy: 0.5323615074157715


In [59]:
# Define the model - deep neural net
number_input_features = len(X_train_scaled[0])
hidden_nodes_layer1 =  
hidden_nodes_layer2 = 8


nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="relu"))

# Compile the Sequential model together and customize metrics
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])


In [60]:
from sklearn.ensemble import RandomForestClassifier

In [61]:
# Create a random forest classifier.
rf_model = RandomForestClassifier(n_estimators=128, random_state=78)

# Fitting the model
rf_model = rf_model.fit(X_train_scaled, y_train)

# Evaluate the model
y_pred = rf_model.predict(X_test_scaled)
print(f" Random forest predictive accuracy: {accuracy_score(y_test,y_pred):.3f}")


 Random forest predictive accuracy: 0.695
