# Neural Networks 
Neural network for the Spotify dataset 

In [None]:
# Import the basic packages for ML
import numpy as np
import matplotlib.pyplot as plt
from sklearn import neighbors
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

import pandas as pd
%matplotlib inline

# visualization library
import seaborn as sns
sns.set(style="white", color_codes=True)
sns.set_context(rc={"font.family":'sans',"font.size":24,"axes.titlesize":24,"axes.labelsize":24}) 

#Libraries to create the Multi-class Neural Network
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils

#Import tensorflow and disable the v2 behavior and eager mode
import tensorflow as tf
tf.compat.v1.disable_eager_execution()
tf.compat.v1.disable_v2_behavior()

In [None]:
# download test data 
df = pd.read_csv (r'C:\Users\Emelie\Documents\Modern method in ML\Project\project_train.csv')
# download train data 
X_testR = pd.read_csv (r'C:\Users\Emelie\Documents\Modern method in ML\Project\project_test.csv')

# take away outliers
#df = df.drop([68,94,84], axis=0)

#seperating the data  
X = df.iloc[:, :-1].values
y = df.iloc[:, 11].values

# we split the training data into validation and training data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)


### Scaling for training 

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X) # scale against train data

X_scaled = scaler.transform(X) 
X_scaledR = scaler.transform(X_testR)

### Model for data model with augumented data

In [None]:

model = Sequential()
# hidden layer with 20 neurons and input dim of 11
model.add(Dense(20,input_dim=11,activation='relu'))
# output layer
model.add(Dense(1,activation='sigmoid'))
# backpropagation with a binary loss function 
model.compile(loss='binary_crossentropy',optimizer='adam',
              metrics=['accuracy'])


### Function for model

In [None]:
def NN_model():
    
    model = Sequential()

    model.add(Dense(20,input_dim=11,activation='relu'))

    model.add(Dense(1,activation='sigmoid'))

    model.compile(loss='binary_crossentropy',optimizer='adam',
              metrics=['accuracy'])
    return model
# Batc is amount of points I take out of my data at each time and put into my model. 
# Epochs is the amount of interations that I do in the classifier. 
#The amount of time I go back with the gradient = epoch/(batch size)*(size of training data) 

### Without augmented data 

In [None]:
kf = KFold(n_splits=5, random_state=1234, shuffle=True)
kf.get_n_splits(X)
result = []

for train_index, test_index in kf.split(X_scaled):
    model = NN_model()
    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]
        
    # creating validation data
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.05)
        
    # creating new arrays for augmented data
    X = X_train
    Y = y_train   
            
    # Combining new data plus original data
    model.fit(X,Y,epochs=80,batch_size=10,validation_data=(X_val, y_val))
    #model.fit(X,Y,epochs=300,batch_size=100)
    ypred = model.predict_classes(X_scaledR)
    results = model.evaluate(X_test, y_test, batch_size=100)
    result.append(results)
        
print(result)

### Model with augmented data

#### Function that created augmented data

In [None]:
def radiation_noise(data, alpha_range=(0.9, 1.1), beta=1 / 25):
        alpha = np.random.uniform(*alpha_range)
        noise = np.random.normal(loc=0.0, scale=1.0, size=data.shape)
        return alpha * data + beta * noise

Performs the NN algorithm and doing cross validation with validation data 

In [None]:
kf = KFold(n_splits=5, random_state=1234, shuffle=True)
kf.get_n_splits(X)
result = []

for train_index, test_index in kf.split(X_scaled):
    model = NN_model()
    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]
        
    # creating validation data
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.05)
        
    # creating new arrays for augmented data
    X_DA = np.zeros((X_train.shape[0]*10, X_train.shape[1]))
    Y_DA = np.zeros(X_train.shape[0]*10)
    X = X_train
    Y = y_train
        
    for j in range(10):
        X_DA = radiation_noise(X_train)
        Y_DA = y_train
        X = np.concatenate((X, X_DA))  
        Y = np.concatenate((Y, Y_DA))
            
    # Combining new data plus original data
    model.fit(X,Y,epochs=80,batch_size=10,validation_data=(X_val, y_val))
    #model.fit(X,Y,epochs=300,batch_size=100)
    ypred = model.predict_classes(X_scaledR)
    results = model.evaluate(X_test, y_test, batch_size=100)
    result.append(results)
        
print(result)
        

In [None]:
result_np = np.asarray(result)

In [None]:
np.mean(result_np[:,1])
np.var(result_np[:,1])

### Validation and plots
Run model with all the data and validation 

In [None]:
n = 5 #amount of trials we do
Y_predtot = np.zeros((n, X_scaledR.shape[0]))

for i in range(n):
    model = NN_model()
    X_train = X_scaled
    y_train = y
        
    # Creating validation data 
    X, X_val, Y, y_val = train_test_split(X_train, y_train, test_size=0.10)        
        
    # creating new arrays for augmented data
    X_DA = np.zeros((X_train.shape[0]*10, X_train.shape[1]))
    Y_DA = np.zeros(X_train.shape[0]*10)
    X = X_train
    Y = y_train
        
    for j in range(10):
        X_DA = radiation_noise(X_train)
        Y_DA = y_train
        X = np.concatenate((X, X_DA))  
        Y = np.concatenate((Y, Y_DA))
        
    # Creating validation data 
    X, X_val, Y, y_val = train_test_split(X_DA, Y_DA, test_size=0.10)
    # Combining new data plus original data
    history = model.fit(X,Y,epochs=250,batch_size=100,validation_data=(X_val, y_val))
    #model.fit(X,Y,epochs=300,batch_size=100)
        
    #make it to a dataframe
    #df = pd.DataFrame (ypred)
    #save to xlsx file
    #filepath = 'result_{}.xlsx'.format(i)
    #Save to computer
    #df.to_excel(filepath, index=False)
        
    # list all data in history
    print(history.history.keys())
    # summarize history for accuracy
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test_val'], loc='upper left')
    plt.show()
    # summarize history for loss
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test_val'], loc='upper left')
    plt.show()
    
        

### Prediction on canvas data with all data 

In [None]:
n = 10 #amount of trials we do
Y_predtot = np.zeros((n, X_scaledR.shape[0]))

for i in range(n):
    model = NN_model()
    X_train = X_scaled
    y_train = y      
        
    # creating new arrays for augmented data
    X_DA = np.zeros((X_train.shape[0]*10, X_train.shape[1]))
    Y_DA = np.zeros(X_train.shape[0]*10)
    X = X_train
    Y = y_train
        
    for j in range(10):
        X_DA = radiation_noise(X_train)
        Y_DA = y_train
        X = np.concatenate((X, X_DA))  
        Y = np.concatenate((Y, Y_DA))
        
    # Combining new data plus original data
    history = model.fit(X,Y,epochs=80,batch_size=100,validation_data=(X_val, y_val))
    #model.fit(X,Y,epochs=300,batch_size=100)
        
    ypred = model.predict_classes(X_scaledR)
    Y_predtot[i,:] = ypred.reshape(X_scaledR.shape[0])


In [None]:
Y_predtot = np.transpose(Y_predtot)
df = pd.DataFrame (Y_predtot)

## save to xlsx file

filepath = 'result_tot.xlsx'

df.to_excel(filepath, index=False)

#df.to_csv(r'C:\Users\Emelie\Documents\Modern method in ML\Project\NN\Results\extra_data_{}.csv'.format(name) )