In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn import svm
from sklearn.neural_network import MLPClassifier
import pandas

In [2]:
fertility_df = pandas.read_csv('fertility_Diagnosis.txt', header=-1)
labels = ['Season','Age','Childish diseases','Accident or serious trauma','Surgical intervention',
          'High fevers in the last year','Frequency of alcohol consumption','Smoking habit',
          'Number of hours spent sitting per day ene-16','Output']
fertility_df.columns = labels
fertility_df['Output'] = fertility_df['Output'].map({'N': 0, 'O': 1}).astype(int)
fertility_df

Unnamed: 0,Season,Age,Childish diseases,Accident or serious trauma,Surgical intervention,High fevers in the last year,Frequency of alcohol consumption,Smoking habit,Number of hours spent sitting per day ene-16,Output
0,-0.33,0.69,0,1,1,0,0.8,0,0.88,0
1,-0.33,0.94,1,0,1,0,0.8,1,0.31,1
2,-0.33,0.50,1,0,0,0,1.0,-1,0.50,0
3,-0.33,0.75,0,1,1,0,1.0,-1,0.38,0
4,-0.33,0.67,1,1,0,0,0.8,-1,0.50,1
5,-0.33,0.67,1,0,1,0,0.8,0,0.50,0
6,-0.33,0.67,0,0,0,-1,0.8,-1,0.44,0
7,-0.33,1.00,1,1,1,0,0.6,-1,0.38,0
8,1.00,0.64,0,0,1,0,0.8,-1,0.25,0
9,1.00,0.61,1,0,0,0,1.0,-1,0.25,0


In [3]:
training_data, test_data, training_output, test_output = train_test_split(fertility_df, fertility_df['Output'], test_size=0.3)
del training_data['Output']
del test_data['Output']

In [4]:
def normalize_features(feature_matrix):
    norms = np.linalg.norm(feature_matrix, axis=0)
    return feature_matrix / norms, norms

In [5]:
normalized_training_data, norms = normalize_features(training_data)

In [6]:
print(normalized_training_data)

      Season       Age  Childish diseases  Accident or serious trauma  \
90 -0.048222  0.143896           0.131306                    0.182574   
88 -0.048222  0.147449           0.131306                    0.182574   
71  0.048222  0.122578           0.131306                    0.000000   
47 -0.048222  0.127908           0.131306                    0.182574   
44 -0.146127  0.094154           0.131306                    0.182574   
17  0.146127  0.122578           0.131306                    0.000000   
6  -0.048222  0.119025           0.000000                    0.000000   
72  0.146127  0.099484           0.131306                    0.000000   
41 -0.146127  0.099484           0.131306                    0.182574   
23  0.146127  0.122578           0.131306                    0.000000   
0  -0.048222  0.122578           0.000000                    0.182574   
61 -0.146127  0.113696           0.131306                    0.000000   
68 -0.048222  0.088825           0.131306          

In [7]:
clf = MLPClassifier(alpha=1e-5)
clf.fit(training_data, training_output)
print(clf)

MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)




In [8]:
clf.get_params()

{'activation': 'relu',
 'alpha': 1e-05,
 'batch_size': 'auto',
 'beta_1': 0.9,
 'beta_2': 0.999,
 'early_stopping': False,
 'epsilon': 1e-08,
 'hidden_layer_sizes': (100,),
 'learning_rate': 'constant',
 'learning_rate_init': 0.001,
 'max_iter': 200,
 'momentum': 0.9,
 'nesterovs_momentum': True,
 'power_t': 0.5,
 'random_state': None,
 'shuffle': True,
 'solver': 'adam',
 'tol': 0.0001,
 'validation_fraction': 0.1,
 'verbose': False,
 'warm_start': False}

In [9]:
training_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 70 entries, 90 to 84
Data columns (total 9 columns):
Season                                          70 non-null float64
Age                                             70 non-null float64
Childish diseases                               70 non-null int64
Accident or serious trauma                      70 non-null int64
Surgical intervention                           70 non-null int64
High fevers in the last year                    70 non-null int64
Frequency of alcohol consumption                70 non-null float64
Smoking habit                                   70 non-null int64
Number of hours spent sitting per day ene-16    70 non-null float64
dtypes: float64(4), int64(5)
memory usage: 5.5 KB


In [11]:
training_output.describe()

count    70.000000
mean      0.128571
std       0.337142
min       0.000000
25%       0.000000
50%       0.000000
75%       0.000000
max       1.000000
Name: Output, dtype: float64

In [12]:
test_output.describe()

count    30.000000
mean      0.100000
std       0.305129
min       0.000000
25%       0.000000
50%       0.000000
75%       0.000000
max       1.000000
Name: Output, dtype: float64