In [None]:
# import packages
import csv
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
from sklearn.model_selection import GridSearchCV

In [None]:
# read data
training_features_data = pd.read_csv("../input/flu-shot-learning-h1n1-seasonal-flu-vaccines/training_set_features.csv",
                    sep=',')


test_features_data = pd.read_csv("../input/flu-shot-learning-h1n1-seasonal-flu-vaccines/test_set_features.csv",
                    sep=',')



training_set_labels = pd.read_csv("../input/flu-shot-learning-h1n1-seasonal-flu-vaccines/training_set_labels.csv",
                    sep=',')



In [None]:
#eliminate null values

#for float types
training_features_data=training_features_data.fillna(training_features_data.mean())

#for string types
training_features_data=training_features_data.fillna('out-of-category')

In [None]:
#check no missing values are left 
training_features_data.isna().sum()

In [None]:
#encoding categorical features (str-->float)
enc = OrdinalEncoder()

enc.fit(training_features_data)
training_features_data_arr=enc.transform(training_features_data)

col_names_list=training_features_data.columns
encoded_categorical_df=pd.DataFrame(training_features_data_arr, columns=col_names_list)

In [None]:
#normalization(make all values bet. 0-1)
scaler = StandardScaler()
scaler.fit(encoded_categorical_df)
normalized_arr=scaler.transform(encoded_categorical_df)

normalized_df=pd.DataFrame(normalized_arr, columns=col_names_list)

In [None]:
#check if data types are correct or not 
normalized_df.info()

test dataset

In [None]:
#check types of test dataset
test_features_data.info()

In [None]:
#eliminate null values

#for float types
test_features_data=test_features_data.fillna(test_features_data.mean())

#for string types
test_features_data=test_features_data.fillna('out-of-category')

In [None]:
#check no missing values are left 
test_features_data.isna().sum()

In [None]:
#encoding categorical features  (str-->float)
enc = OrdinalEncoder()
enc.fit(test_features_data)
test_features_data_arr=enc.transform(test_features_data)

col_names_list=test_features_data.columns
test_encoded_categorical_df=pd.DataFrame(test_features_data_arr, columns=col_names_list)

In [None]:
#check data types
test_encoded_categorical_df.info()

In [None]:
#normalization(bet. 0-1)

#using minmax scaler(look up)
test_normalized_arr=scaler.transform(test_encoded_categorical_df)
test_normalized_df=pd.DataFrame(test_normalized_arr, columns=col_names_list)

In [None]:
# split df to X and Y
y = training_set_labels.loc[:, 'h1n1_vaccine'].values
X = normalized_df

In [None]:
# split data into 80-20 for training set / test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, stratify = y)

# cross-validation with 5 splits
cv = StratifiedShuffleSplit(n_splits=5, random_state = 42)

In [None]:
# display test scores and return result string and indexes of false samples
def display_test_scores(test, pred):
    str_out = ""
    str_out += ("TEST SCORES\n")
    str_out += ("\n")

    #print AUC score
    auc = roc_auc_score(test, pred)
    str_out += ("AUC: {:.4f}\n".format(auc))
    str_out += ("\n")
    
    false_indexes = np.where(test != pred)
    return str_out, false_indexes

# neural network #1 - hidden layer

In [None]:
# NN with 1 layer
nn1 = MLPRegressor(tol=1e-5, hidden_layer_sizes=10, random_state=0, solver='adam', activation='logistic', max_iter=1000, batch_size=256)
nn1.fit(X_train, y_train)

# prediction results
y_pred1 = nn1.predict(X_test)

# print accuracy metrics
results1, false1 = display_test_scores(y_test, y_pred1)
print(results1)

In [None]:
# NN with 1 layer
nn2 = MLPRegressor(tol=1e-5, hidden_layer_sizes=10, random_state=0, solver='adam', activation='logistic', max_iter=1000, batch_size=512)
nn2.fit(X_train, y_train)

# prediction results
y_pred2 = nn2.predict(X_test)

# print accuracy metrics
results2, false2 = display_test_scores(y_test, y_pred2)
print(results2)

In [None]:
# NN with 1 layer
nn3 = MLPRegressor(tol=1e-5, hidden_layer_sizes=10, random_state=0, solver='adam', activation='logistic', max_iter=1000, batch_size=1024)


nn3.fit(X_train, y_train)

# prediction results
y_pred3 = nn3.predict(X_test)

# print accuracy metrics
results3, false3 = display_test_scores(y_test, y_pred3)
print(results3)

In [None]:
# NN with 1 layer
nn4 = MLPRegressor(tol=1e-5, hidden_layer_sizes=10, random_state=0, solver='adam', activation='relu', max_iter=1000, batch_size=512)


nn4.fit(X_train, y_train)

# prediction results
y_pred4 = nn4.predict(X_test)

# print accuracy metrics
results4, false4 = display_test_scores(y_test, y_pred4)
print(results4)

In [None]:
# NN with 1 layer
nn5 = MLPRegressor(tol=1e-5, hidden_layer_sizes=10, random_state=0, solver='adam', activation='logistic', max_iter=1000, batch_size=512)

nn5.fit(X, y)

# prediction results
y_pred5 = nn5.predict(test_normalized_df)


In [None]:
import numpy as np

np.sum(np.logical_or(np.array(y_pred5) > 1, np.array(y_pred5) < 0), axis=0)

In [None]:
y_pred5 = 1/(1+np.exp(-y_pred5))


In [None]:
#pred sonuçlarını dosyaya yazdırma

df_pred_h1n1=pd.DataFrame(y_pred5, columns=['h1n1_vaccine'])
df_pred_h1n1["respondent_id"] = df_pred_h1n1.index

df_pred_h1n1_nn=df_pred_h1n1[['respondent_id', 'h1n1_vaccine']]

df_pred_h1n1.to_csv('/kaggle/working/df_h1n1_nn_log_son.csv', columns=['respondent_id', 'h1n1_vaccine'], 
                            index=False, sep=',')

df_pred_h1n1.head()