In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn.functional as F
import csv, time
from datetime import datetime, timedelta
import seaborn as sns

In [None]:
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn import feature_selection
from sklearn import model_selection
from sklearn.model_selection import GridSearchCV
from sklearn import metrics
from sklearn.neighbors import KNeighborsClassifier
from sklearn import svm
from sklearn.naive_bayes import MultinomialNB
from sklearn.decomposition import PCA
import warnings
from sklearn.metrics import roc_auc_score

# label grouping

In [None]:
# classify output data for post- or pre- experiment

def get_flour(option):    # option: pre or post

    file = pd.read_csv('./Outputs/FlourishingScale.csv')
    # read data from csv
    n = file.to_numpy()

    # we only use pre as our output
    n = n[n[:, 1] == option]

    # convert the id of the student into integer
    n[:, 0] = [int(i[1:]) for i in n[:, 0]]

    # get rid of 'pre' column
    n = n[:, np.delete(np.arange(10), 1)]

    # find all nan value and change them into -1
    n[pd.isnull(n)] = -1

    # delete -1(nan) row
    for i in range(1, 9):
        n = np.delete(n, np.where(n[:, i] == -1), axis=0)

    result = np.zeros((n.shape[0], 2))
    result[:, 0] = n[:, 0]
    result[:, 1] = n[:, 1:].sum(axis=1)

    # use median as our threshold
    a = np.median(result[:, 1])
    result[:, 1][result[:, 1] <= a] = 0
    result[:, 1][result[:, 1] > a] = 1

    return result

def get_panas(option):    # option: pre or post

    global pos_sum, neg_sum
    output = pd.read_csv('./Outputs/panas.csv')
    n = output.to_numpy()

    data = n[n[:, 1] == option][:, 2:]
    index = np.array([int(i[1:]) for i in n[n[:, 1] == option][:, 0]]).reshape(-1, 1)

    data[pd.isnull(data)] = 0

    data = data.astype(int)
    data = np.concatenate((index, data), axis=1)

    for i in range(1, 18):
        data = np.delete(data, np.where(data[:, i] == 0), axis=0)

    index = data[:, 0].reshape(-1, 1)
    data = data[:, 1:]
    positive = ['Interested', 'Strong', 'Enthusiastic', 'Proud', 'Alert', 'Inspired', 'Determined', 'Attentive',
                'Active']
    negative = ['Distressed', 'Upset', 'Guilty', 'Scared', 'Hostile', 'Irritable', 'Nervous', 'Jittery', 'Afraid']

    summ = np.zeros(data.shape[0])
    pos_sum = np.zeros(data.shape[0])
    neg_sum = np.zeros(data.shape[0])

    for i in range(9):
        ind_pos = [i.strip() for i in output.keys()[2:]].index(positive[i])
        ind_neg = [i.strip() for i in output.keys()[2:]].index(negative[i])
        summ += data[:, ind_pos]
        summ -= data[:, ind_neg]
        pos_sum += data[:, ind_pos]
        neg_sum += data[:, ind_neg]


    pos_sum = pos_sum.reshape(-1,1)
    neg_sum = neg_sum.reshape(-1,1)
    p_m, n_m = pos_sum.mean(), neg_sum.mean()

    pos_sum[pos_sum[:,0] <= p_m] = 0
    pos_sum[pos_sum[:,0] > p_m] = 1

    neg_sum[neg_sum[:,0] <= n_m] = 0  
    neg_sum[neg_sum[:,0] > n_m] = 1

    return np.concatenate((index,pos_sum), axis = 1), np.concatenate((index, neg_sum), axis = 1)


In [None]:
# classification for the mean value of pre and post

def get_flour_mean():
    
    file = pd.read_csv('./Outputs/FlourishingScale.csv')
    # read data from csv
    n = file.to_numpy()

    # we only use pre as our output
    n_pre = n[n[:, 1] == 'pre']
    n_post = n[n[:, 1] == 'post']

    # convert the id of the student into integer
    n_pre[:, 0] = [int(i[1:]) for i in n_pre[:, 0]]
    n_post[:, 0] = [int(i[1:]) for i in n_post[:, 0]]

    # get rid of 'pre' column
    n_pre = n_pre[:, np.delete(np.arange(10), 1)]
    n_post = n_post[:, np.delete(np.arange(10), 1)]

    # find all nan value and change them into -1
    n_pre[pd.isnull(n_pre)] = -1
    n_post[pd.isnull(n_post)] = -1

    # delete -1(nan) row
    for i in range(1, 9):
        n_pre = np.delete(n_pre, np.where(n_pre[:, i] == -1), axis=0)
        n_post = np.delete(n_post, np.where(n_post[:, i] == -1), axis=0)
    
    new_pre = n_pre[np.isin(n_pre[:,0], n_post[:,0])]
    new_post = n_post[np.isin(n_post[:,0], n_pre[:,0])]
    new_pre_post = (new_pre[:,1] + new_post[:,1])/2
    
    new = np.concatenate((new_pre[:,0].reshape(-1,1), new_pre_post.reshape(-1,1)),axis = 1)

    result = np.zeros((new.shape[0], 2))
    result[:, 0] = new[:, 0]
    result[:, 1] = new[:, 1:].sum(axis=1)

    # use median as our threshold
    a = np.median(result[:, 1])
    result[:, 1][result[:, 1] <= a] = 0
    result[:, 1][result[:, 1] > a] = 1

    return result

# both pre and post
def get_panas_mean():   
    def get_panas(option):

        output = pd.read_csv('./Outputs/panas.csv')
        n = output.to_numpy()

        data = n[n[:, 1] == option][:, 2:]
        index = np.array([int(i[1:]) for i in n[n[:, 1] == option][:, 0]]).reshape(-1, 1)

        data[pd.isnull(data)] = 0
       

        data = data.astype(int)
        data = np.concatenate((index, data), axis=1)

        for i in range(1, 18):
            data = np.delete(data, np.where(data[:, i] == 0), axis=0)

        index = data[:, 0].reshape(-1, 1)
        data = data[:, 1:]
        positive = ['Interested', 'Strong', 'Enthusiastic', 'Proud', 'Alert', 'Inspired', 'Determined', 'Attentive',
                    'Active']
        negative = ['Distressed', 'Upset', 'Guilty', 'Scared', 'Hostile', 'Irritable', 'Nervous', 'Jittery', 'Afraid']

        summ = np.zeros(data.shape[0])
        pos_sum = np.zeros(data.shape[0])
        neg_sum = np.zeros(data.shape[0])

        for i in range(9):
            ind_pos = [i.strip() for i in output.keys()[2:]].index(positive[i])
            ind_neg = [i.strip() for i in output.keys()[2:]].index(negative[i])
            summ += data[:, ind_pos]
            summ -= data[:, ind_neg]
            pos_sum += data[:, ind_pos]
            neg_sum += data[:, ind_neg]

        pos_sum = pos_sum.reshape(-1,1)
        neg_sum = neg_sum.reshape(-1,1)
        # return the really value
        return np.concatenate((index,pos_sum), axis = 1), np.concatenate((index, neg_sum), axis = 1)

    label_pre_pos, label_pre_neg = get_panas('pre')
    label_post_pos, label_post_neg = get_panas('post')
    new_pos = (label_pre_pos[np.isin(label_pre_pos[:,0], label_post_pos[:,0])] +\
              label_post_pos[np.isin(label_post_pos[:,0], label_pre_pos[:,0])])/2
    new_neg = (label_pre_neg[np.isin(label_pre_neg[:,0], label_post_neg[:,0])] +\
              label_post_neg[np.isin(label_post_neg[:,0], label_pre_neg[:,0])])/2
    result_pos = np.zeros((new_pos.shape[0], 2))
    result_neg = np.zeros((new_neg.shape[0], 2))
    result_pos[:, 0] = new_pos[:, 0]
    result_pos[:, 1] = new_pos[:, 1:].sum(axis=1)
    result_neg[:, 0] = new_neg[:, 0]
    result_neg[:, 1] = new_neg[:, 1:].sum(axis=1)

    # use median as our threshold
    a = np.median(result_pos[:, 1])
    b = np.median(result_neg[:, 1])
    result_pos[:, 1][result_pos[:, 1] <= a] = 0
    result_pos[:, 1][result_pos[:, 1] > a] = 1
    result_neg[:, 1][result_neg[:, 1] <= b] = 0
    result_neg[:, 1][result_neg[:, 1] > b] = 1
    
    return result_pos, result_neg


# feature preprocessing

In [None]:
#split data into 10 weeks

def get_data(data_name, label): # for computing duration
    total_data = []
    min_value = float('inf')
    max_value = 0

    for i in range(60): # extract the valid student id from the file
        try:
            input = pd.read_csv(f'./inputs/sensing/{data_name}/{data_name}_u{i:02d}.csv').to_numpy()
            total_data.append(i)
            start_min = np.min(input[:, 0])
            start_max = np.max(input[:, 0])
            if start_min < min_value:
                min_value = start_min
            if start_max > max_value:
                max_value = start_max
        except:
            continue

    conversation_id = np.array(total_data).reshape(-1, 1)
    partition = np.arange(min_value, max_value, 7 * 24 * 3600) # split into 10 weeks

    conversation_data = np.zeros((len(total_data), len(partition)))

    for i in range(len(total_data)):

        student_id = total_data[i]

        input = pd.read_csv(f'./inputs/sensing/{data_name}/{data_name}_u{student_id:02d}.csv').to_numpy()

        week_sum = np.zeros(len(partition))

        for j in range(len(partition) - 1): # search for data in corresponding time
            data = input[np.where((input[:, 0] > partition[j]) * (input[:, 0] < partition[j + 1]))]
            data_sum = sum(data[:, 1] - data[:, 0])
            week_sum[j] = data_sum
        data = input[np.where(input[:, 0] > partition[-1])]
        week_sum[-1] = sum(data[:, 1] - data[:, 0])
        # print(student_id, week_sum)
        conversation_data[i] = week_sum
        
    # add student id
    concatencate_data = np.concatenate((conversation_id, conversation_data), axis=1).astype(int)

    new = np.zeros_like(concatencate_data) # add label
    for i in range(len(label[:, 0])):
        if label[i, 0] in concatencate_data[:, 0]:
            index = np.where(concatencate_data[:, 0] == label[i, 0])[0]
            new[i, :] = concatencate_data[index, :]
    i = 48
    while new[i, 0] == 0:
        new = np.delete(new, -1, 0)
        i -= 1
    new = np.concatenate((new, label[:, 1].reshape(-1, 1)), axis=1).astype(int)

    return new


def group_split(data_name, score, label): # for computing frequency
    student_list = []
    max_value = 0
    min_value = float('inf')
    for i in range(60):
        try:
            # read file
            input = pd.read_csv(f'./inputs/sensing/{data_name}/{data_name}_u{i:02d}.csv').to_numpy()
            #             print(input)
            student_list.append(i)
            min_v = np.min(input[0, 0])
            max_v = np.max(input[-1, 0])
            if max_v > max_value:
                max_value = max_v
            if min_v < min_value:
                min_value = min_v
        except IOError:
            continue
    student_id = np.array(student_list).reshape(-1, 1)
    process_data = np.zeros((len(student_list), 10))
    partition = np.arange(min_value, max_value, 7 * 24 * 3600)
    for i in range(len(student_list)):
        input = pd.read_csv(f'./inputs/sensing/{data_name}/{data_name}_u{student_list[i]:02d}.csv').to_numpy()
        result = np.zeros(len(partition))

        for j in range(len(partition) - 1):
            data = input[np.where((input[:, 0] >= partition[j]) *
                                  (input[:, 0] < partition[j + 1]) * (input[:, 1] == score))].shape[0]
            result[j] = data * 2
        result[-1] = input[np.where((input[:, 0] >= partition[-1]) * (input[:, 1] == score))].shape[0] * 2
        process_data[i] = result

    concatencate_data = np.concatenate((student_id, process_data), axis=1).astype(int)

    new = np.zeros_like(concatencate_data)
    for i in range(len(label[:, 0])):
        if label[i, 0] in concatencate_data[:, 0]:
            index = np.where(concatencate_data[:, 0] == label[i, 0])[0]
            new[i, :] = concatencate_data[index, :]
    i = 48
    while new[i, 0] == 0:
        new = np.delete(new, -1, 0)
        i -= 1
    new = np.concatenate((new, label[:, 1].reshape(-1, 1)), axis=1).astype(int)

    return new

In [None]:
# night duration
def transform_all(time_stamp): # transform timestamp to local time
    utc_time = datetime.utcfromtimestamp(time_stamp)
    time = utc_time + timedelta(hours = -4)
    return time.day, time.hour, time.minute, time.second
    
class timegap:
    def __init__(self, a, b):
        self.a = a
        self.b = b
        self.attr = {}
        self.compute()
        self.g = self.gap()
    def compute(self):
        self.attr['start_date'] = self.a[0]
        self.attr['end_date'] = self.b[0]
        self.attr['start_hour'] = self.a[1]
        self.attr['end_hour'] = self.b[1]
        self.attr['start_min'] = self.a[2]
        self.attr['end_min'] = self.b[2]
        self.attr['start_sec'] = self.a[3]
        self.attr['end_sec'] = self.a[3]
    def gap(self):
        g = float('-inf')
        if self.attr['start_date'] == self.attr['end_date']: # same day
            start = self.attr['start_hour'] * 3600 + self.attr['start_min'] * 60 + self.attr['start_sec']
            end = self.attr['end_hour'] * 3600 + self.attr['end_min'] * 60 + self.attr['end_sec']
            if self.attr['start_hour'] >= 18: # if start after 6pm
#                 print(1)
                g = (end - start)//60
            elif self.attr['start_hour'] >= 8: # if start before 6pm after 8am
#                 print(2)
                g = (end - 18 * 3600)//60 if self.attr['end_hour'] >= 18 else 0 # if end before 6pm then g = 0
            elif self.attr['end_hour'] >= 8: # end after 8am
#                 print(3)
                g = (8 * 3600 - start)//60
            else:
#                 print(4)
                g = (end - start) // 60

        else: # different day
            start = self.attr['start_hour'] * 3600 + self.attr['start_min'] * 60 + self.attr['start_sec']
            end = self.attr['end_hour'] * 3600 + self.attr['end_min'] * 60 + self.attr['end_sec']
            if self.attr['end_hour'] >= 8 and self.attr['start_hour'] >= 18: # if end after 8am, start after 6pm
#                 print(5)
                g = (8 * 3600 + (24 * 3600 - start))//60
            elif self.attr['end_hour'] >= 8 and self.attr['start_hour'] < 18: # if end after 8am, start before 6pm
#                 print(6)
                g = (8 * 3600 + 6 * 3600)//60
            elif self.attr['end_hour'] < 8 and self.attr['start_hour'] >= 18: # if end after 8am, start before 6pm
#                 print(7)
                g = (end + (24 * 3600 - start))//60
            else:
#                 print(8)
                g = (end + 6 * 3600)//60
        return g


def dark_night(f): # extract night time
    file = open(f)
    x = np.array([i for i in csv.reader(file) if i][1:]).astype(int) 

    mid = 1362096000
    while mid < x[0][0]:
        mid += 24 * 3600
    mid -= 24 * 3600

    interval = [[]]

    for i in x:
#         print(interval)
        if i[0] < (mid + 24 * 3600):
            interval[-1].append(i)
        else:
            while True:
                mid += 24 * 3600
                if i[0] < (mid + 24 * 3600):
                    interval.append([i])
                    break
                else:
                    interval.append([np.array([0,0])])
            
    y = []
    for i in x:
        y.append([transform_all(i[0]),transform_all(i[1])])
    #     print(time.ctime(i[0]),time.ctime(i[1]), sep = ' || ')
#     print(y)
    yi = [[]]
    for i in interval:
        for j in i:
            yi[-1].append([transform_all(j[0]),transform_all(j[1])])
        yi.append([])
    yi.pop()

    sleep_time = 0
    sleep_day = []
#     current_s = y[0][0].split()[2]

    for itv in yi:
        for i in itv:
            tg = timegap(*i)
    #         print(f"{tg.g/60:.1f}")
    #         print(i)
            sleep_time += tg.g
        sleep_day.append(sleep_time)
        sleep_time = 0




#     print(np.array(sleep_day))
    return np.array(sleep_day)

In [None]:
darkNights = np.zeros(60)

for i in range(60): # extract night time for dark dataset
    f = f'./Inputs/sensing/dark/dark_u{i:02}.csv'
    try:
        
        darkNights[i] = dark_night(f).sum()
#         print(i, dark_night(f).mean())
    except FileNotFoundError:
        pass
    
plock = np.zeros(60)

for i in range(60):  # extract night time for phonelock dataset
    f = f'./Inputs/sensing/phonelock/phonelock_u{i:02}.csv'
    try:
        plock[i] = dark_night(f).sum()
#         print(i, dark_night(f).mean())
    except FileNotFoundError:
        pass
conv = np.zeros(60)
for i in range(60):  # extract night time for conversation dataset
    f = f'./Inputs/sensing/conversation/conversation_u{i:02}.csv'
    try:
        conv[i] = dark_night(f).sum()
#         print(i, dark_night(f).mean())
    except FileNotFoundError:
        pass
phoneChar = np.zeros(60)
for i in range(60):  # extract night time for phonecharge dataset
    f = f'./Inputs/sensing/phonecharge/phonecharge_u{i:02}.csv'
    try:
        phoneChar[i] = dark_night(f).sum()
#         print(i, dark_night(f).mean())
    except FileNotFoundError:
        pass

In [None]:
# bluetooth
def transform(time_stamp): # transform timestamp into local time and return hour
    utc_time = datetime.utcfromtimestamp(time_stamp)
    time = utc_time + timedelta(hours = -4)

    return time.hour

def count_bluetooth(f):
    file = open(f)
    x = np.array([i for i in csv.reader(file) if i][1:])[:, 0].astype(int)
    
    g = np.array(list((transform(i) for i in x))).astype(int).reshape(-1,1)
    
    day = g[np.where((g[:, 0] > 8) * (g[:, 0] <= 18))].shape[0]
    night = g[np.where(~((g[:, 0] > 8) * (g[:, 0] <= 18)))].shape[0]
    
    return np.array([day, night])

bluetooth = np.zeros((60, 2))

for i in range(60): # extract number of devices in daytime and nighttime
    f = f'./Inputs/sensing/bluetooth/bt_u{i:02}.csv'
    try:
        bluetooth[i] = count_bluetooth(f)

    except FileNotFoundError:
        pass

    
 

In [None]:
# compute the frequency in the daytime and nighttime
def count_audio(f):
    file = open(f)
    x = np.array([i for i in csv.reader(file) if i][1:]).astype(int)

    g = np.array(list((transform(i) for i in x[:,0]))).astype(int) # only consider hour
    l = x[:,1] # audio label
    g = np.stack((g, l), axis = 1)
    day = g[np.where((g[:, 0] > 8) * (g[:, 0] <= 18))] # daytime 8-18
    day_s = day[day[:, 1] == 0].shape[0] # silence, daytime
    day_v = day[day[:, 1] == 1].shape[0] # voice, daytime
    day_n = day[day[:, 1] == 2].shape[0] # noise, daytime
    
    night = g[np.where(~((g[:, 0] > 8) * (g[:, 0] <= 18)))] # nighttime 0-8 and 18-24

    night_s = night[night[:,1] == 0].shape[0] # silence, nighttime
    night_v = night[night[:,1] == 1].shape[0] # voice, nighttime
    night_n = night[night[:,1] == 2].shape[0] # noise, nighttime
    
    return np.array([day_s, day_v, day_n, night_s, night_v, night_n])
        
audio = np.zeros((60,6))

for i in range(60): # extract audio data splited by time
    f = f'./Inputs/sensing/audio/audio_u{i:02}.csv'
    try:
        audio[i] = count_audio(f)

    except FileNotFoundError:
        pass
    
activity = np.zeros((60,6)) # extract activity data splited by time
for i in range(60):
    f = f'./Inputs/sensing/activity/activity_u{i:02}.csv'
    try:
        activity[i] = count_audio(f)

    except FileNotFoundError:
        pass

In [None]:
# normalization
def norm(myData):
    return (myData[:, 1:-1] - myData[:, 1:-1].min(axis = 0)) \
           / (myData[:, 1:-1].max(axis = 0) - myData[:, 1:-1].min(axis = 0))

In [None]:
# combine features
def combine_feature(audio, activity, bluetooth, darkNights, plock, conv, phoneChar, label):
    aud = np.array([audio[int(i[0])] for i in label]) # day and night, 0,1,2
    act = np.array([activity[int(i[0])] for i in label]) # day and night, 0,1,2
    blt = np.array([bluetooth[int(i[0])] for i in label]) # day and night
    dk = np.array([darkNights[int(i[0])] for i in label]) # night duration
    lk = np.array([plock[int(i[0])] for i in label]) # night duration
    cv = np.array([conv[int(i[0])] for i in label]) # night duration
    pcharge = np.array([phoneChar[int(i[0])] for i in label]) # night duration
    conversation = get_data('conversation', label) # 10 weeks
    activity_stationary = group_split('activity', 0, label) # 10 weeks
    activity_walk = group_split('activity', 1, label) # 10 weeks
    activity_run = group_split('activity', 2, label) # 10 weeks
    audio_s = group_split('audio', 0, label) # 10 weeks
    phonecharge = get_data('phonecharge', label) # 10 weeks
    time_split_data = np.concatenate((label[:,0].reshape(-1,1), dk.reshape(-1,1)), axis = 1)
    time_split_data = np.concatenate((time_split_data,lk.reshape(-1,1)), axis = 1)
    time_split_data = np.concatenate((time_split_data, cv.reshape(-1,1)), axis = 1)
    time_split_data = np.concatenate((time_split_data, blt), axis = 1)
    time_split_data = np.concatenate((time_split_data, aud), axis = 1)
    time_split_data = np.concatenate((time_split_data, act), axis = 1)
    time_split_data = np.concatenate((time_split_data, pcharge.reshape(-1,1)), axis = 1)
    time_split_data = np.concatenate((time_split_data, phonecharge[:,1:-1]),axis = 1)
    time_split_data = np.concatenate((time_split_data, conversation[:, 1:-1]), axis = 1)
    time_split_data = np.concatenate((time_split_data, activity_stationary[:, 1:-1]), axis = 1)
    time_split_data = np.concatenate((time_split_data, activity_walk[:,1:-1]), axis = 1)
    time_split_data = np.concatenate((time_split_data, activity_run[:,1:-1]), axis = 1)
    time_split_data = np.concatenate((time_split_data, audio_s[:,1:-1]), axis = 1)
    time_split_data = np.concatenate((time_split_data, label[:,1].reshape(-1,1)), axis = 1)
    return time_split_data


In [None]:
# 78 features
x_label = ['dark_night', 'phonelock_night', 'conversation_night', 'bluetooth_day', 'bluetooth_night', 'audio_0_day', \
          'audio_1_day', 'audio_2_day', 'audio_0_night', 'audio_1_night', 'audio_2_night', 'act_0_day', \
          'act_1_day', 'act_2_day', 'act_0_night', 'act_1_night', 'act_2_night', 'phonecharge_night', 'phonecharge_week1',\
          'phonecharge_week2', 'phonecharge_week3','phonecharge_week4','phonecharge_week5','phonecharge_week6','phonecharge_week7',\
          'phonecharge_week8','phonecharge_week9','phonecharge_week10','conversation_week1',\
          'conversation_week2', 'conversation_week3','conversation_week4','conversation_week5','conversation_week6','conversation_week7',\
          'conversation_week8','conversation_week9','conversation_week10','act0_week1',\
          'act0_week2', 'act0_week3','act0_week4','act0_week5','act0_week6','act0_week7',\
          'act0_week8','act0_week9','act0_week10','act1_week1',\
          'act1_week2', 'act1_week3','act1_week4','act1_week5','act1_week6','act1_week7',\
          'act1_week8','act1_week9','act1_week10','act2_week1',\
          'act2_week2', 'act2_week3','act0_week4','act2_week5','act2_week6','act2_week7',\
          'act2_week8','act2_week9','act2_week10','audio0_week1',\
          'audio0_week2', 'audio0_week3','audio0_week4','audio0_week5','audio0_week6','audio0_week7',\
          'audio0_week8','audio0_week9','audio0_week10']

# model construction

In [None]:

# random forest
def rfc_clf(x_train, y_train):
    def  decorator(x_train, y_train):
        model = RandomForestClassifier(random_state=0)
        parameter = {'n_estimators':range(10,101,10),\
                     'max_depth':[1,2,3,4]}
        clf=GridSearchCV(model,parameter,scoring='roc_auc',iid=True,cv=9)
        clf.fit(x_train, y_train)
        best_parameter = clf.best_params_
        print('random forest best_parameter',best_parameter)
        model = RandomForestClassifier(random_state=0,\
                                       n_estimators=best_parameter['n_estimators'],\
                                       max_depth=best_parameter['max_depth'])
        model.fit(x_train, y_train)
        return model

    model = decorator(x_train,y_train)
    score = cross_val_score(model,x_train,y_train,cv=9,scoring='roc_auc').mean()
    accuracy = cross_val_score(model,x_train,y_train,cv=9,scoring='accuracy').mean()
    precision = cross_val_score(model,x_train,y_train,cv=9,scoring='precision').mean()
    recall = cross_val_score(model,x_train,y_train,cv=9,scoring='recall').mean()
    f1_score = cross_val_score(model,x_train,y_train,cv=9,scoring='f1').mean()

    report = [score,accuracy,precision,recall,f1_score]
    warnings.filterwarnings('ignore') 
    return report
    
# svm
def svm_clf(x_train, y_train):
    def decorator(x_train, y_train):
        model = svm.SVC(random_state=0,gamma='auto')
        parameter = {'kernel':['rbf','sigmoid','linear'],\
                     'degree':[1,2,3,4]}
        clf=GridSearchCV(model,parameter,scoring='roc_auc',iid=True,cv=9)
        clf.fit(x_train, y_train)
        best_parameter = clf.best_params_
        print('svm best_parameter',best_parameter)
#         # print(best_parameter)
        model = svm.SVC(random_state=0,gamma='auto',\
                        kernel=best_parameter['kernel'],\
                        degree=best_parameter['degree'])
        model.fit(x_train, y_train)
        return model
    
    model = decorator(x_train,y_train)
    score = cross_val_score(model,x_train,y_train,cv=9,scoring='roc_auc').mean()
    accuracy = cross_val_score(model,x_train,y_train,cv=9,scoring='accuracy').mean()
    precision = cross_val_score(model,x_train,y_train,cv=9,scoring='precision').mean()
    recall = cross_val_score(model,x_train,y_train,cv=9,scoring='recall').mean()
    f1_score = cross_val_score(model,x_train,y_train,cv=9,scoring='f1').mean()

    report = [score,accuracy,precision,recall,f1_score]

    warnings.filterwarnings('ignore') 
    
    return report

# knn
def knn_clf(x_train, y_train):
    def decorator(x_train, y_train):
        model = KNeighborsClassifier()
        parameter = {'n_neighbors':range(3,6),\
                     'algorithm':['auto','ball_tree','brute']}
        clf=GridSearchCV(model,parameter,scoring='roc_auc',iid=True,cv=9)
        clf.fit(x_train, y_train)
        best_parameter = clf.best_params_
        print('knn best_parameter',best_parameter)
#         # print(best_parameter)
        model = KNeighborsClassifier(\
                        n_neighbors=best_parameter['n_neighbors'],\
                        algorithm=best_parameter['algorithm'])
        model.fit(x_train, y_train)
        return model
    warnings.filterwarnings('ignore') 

    model = decorator(x_train,y_train)
    score = cross_val_score(model,x_train,y_train,cv=9,scoring='roc_auc').mean()
    accuracy = cross_val_score(model,x_train,y_train,cv=9,scoring='accuracy').mean()
    precision = cross_val_score(model,x_train,y_train,cv=9,scoring='precision').mean()
    recall = cross_val_score(model,x_train,y_train,cv=9,scoring='recall').mean()
    f1_score = cross_val_score(model,x_train,y_train,cv=9,scoring='f1').mean()
 
    report = [score,accuracy,precision,recall,f1_score]
 
    warnings.filterwarnings('ignore') 
    
    return report


# LogisticRegression
def lr_(x_train, y_train):
    def decorator(x_train, y_train):
        model = LogisticRegression(solver='liblinear',random_state=0)
        parameter = {'penalty':['l1','l2'],\
                     'class_weight':[None,{1:0.5, 0:0.5},{1:0.6, 0:0.4},{1:0.4, 0:0.6}]}
        clf=GridSearchCV(model,parameter,scoring='roc_auc',iid=True,cv=9)
        clf.fit(x_train, y_train)
        best_parameter = clf.best_params_
        print('lr best_parameter',best_parameter)
#         # print(best_parameter)
        model = LogisticRegression(solver='liblinear',\
                        penalty=best_parameter['penalty'],\
                        class_weight=best_parameter['class_weight'])
        model.fit(x_train, y_train)
        return model


    model = decorator(x_train,y_train)
#     y_pre = model.predict(x_train)
    score = cross_val_score(model,x_train,y_train,cv=9,scoring='roc_auc').mean()
    accuracy = cross_val_score(model,x_train,y_train,cv=9,scoring='accuracy').mean()
    precision = cross_val_score(model,x_train,y_train,cv=9,scoring='precision').mean()
    recall = cross_val_score(model,x_train,y_train,cv=9,scoring='recall').mean()
    f1_score = cross_val_score(model,x_train,y_train,cv=9,scoring='f1').mean()

    report = [score,accuracy,precision,recall,f1_score]

    warnings.filterwarnings('ignore') 
    
    return report    

In [None]:
# obtain evaluation matrix
def collect_socre(x_train,y_train):
    score_matrix=np.array(( 
            rfc_clf(x_train, y_train),
            lr_(x_train, y_train),
            knn_clf(x_train, y_train),
            svm_clf(x_train, y_train) ))
    df = pd.DataFrame(data=score_matrix,columns=['auc_roc','accuracy','precision','recall','f1_score'],index=['rfc','lr','knn','svm'])
    return df

In [None]:
# neural network
class DenseModel(torch.nn.Module):
    def __init__(self, num_of_features):
        super().__init__()
        self.l1 = torch.nn.Linear(num_of_features, 64)
        self.l2 = torch.nn.Linear(64, 64)
        
        self.l3 = torch.nn.Linear(64,128)
        self.l4 = torch.nn.Linear(128,128)
        
        self.l5 = torch.nn.Linear(64 + 128, 256)
        self.l6 = torch.nn.Linear(256, 256)
    
        self.l7 = torch.nn.Linear(64 + 128 + 256, 512)
        self.l8 = torch.nn.Linear(512, 512)
        
        self.end = torch.nn.Linear(512, 2)
        self.do = torch.nn.Dropout(0.5)
    def forward(self, x):
        x1 = self.do(torch.relu(self.l1(x))) # (features - 64)
        x2 = self.do(torch.relu(self.l2(x1))) # (64 - 64)
        x3 = self.do(torch.relu(self.l2(x2))) # (64 - 64)
        
        x4 = self.do(torch.relu(self.l3(x3))) # (64 - 128)
        x5 = self.do(torch.relu(self.l4(x4))) # (128 - 128)
        x6 = self.do(torch.relu(self.l4(x5))) # (128 - 128)
        
        x6c = torch.cat((x3, x6), dim = 1)
        x7 = self.do(torch.relu(self.l5(x6c))) # (192 - 256)
        x8 = self.do(torch.relu(self.l6(x7))) # (256 - 256)
        x9 = self.do(torch.relu(self.l6(x8))) # (256 - 256)
        
        x9c = torch.cat((x3, x6, x9), dim = 1)
        x10 = self.do(torch.relu(self.l7(x9c))) # (448 - 512)
        x11 = self.do(torch.relu(self.l8(x10))) # (512 - 512)
        x12 = self.do(torch.relu(self.l8(x11))) # (512 - 512)
        
        return self.end(x12) # (512 - 2)
        
# def metric(output, target):
def metric(tp, tn, fp, fn):
    metrics = {'recall': 0,
               'precision': 0,
               'accuracy': 0,
               'f1score': 0}
    print(tp, tn, fp, fn)
    metrics['recall'] = tp/(tp + fn)
    metrics['precision'] = tp/(tp + fp)
    metrics['accuracy'] = (tp + tn)/(tp + tn + fp + fn)
    metrics['f1score'] = 2 * metrics['recall'] * metrics['precision']/(metrics['recall'] + metrics['precision'])
    return metrics

In [None]:
# neural network with augmentation
def nn_aug(x_train, y_train):
    to_aug_x = np.array(x_train)
    to_aug_y = np.array(y_train)
    print(to_aug_x.shape, to_aug_y.shape)
    to_aug_data = np.concatenate((to_aug_x, to_aug_y.reshape(-1,1)), axis = 1)
    to_aug_data = np.repeat(to_aug_data, 100, axis = 0)
    np.random.shuffle(to_aug_data)

    aug_x = np.zeros((100 * x_train.shape[0], 10))
    aug_y = np.zeros((100 * x_train.shape[0],1))
    aug_x = to_aug_data[:,:10]
    aug_y = to_aug_data[:,-1]

    for i in range(100 * x_train.shape[0]):
        aug_x[i,:] += np.random.normal(0,0.33,10).round(2)

    train_x = torch.tensor(aug_x)
    train_y = torch.tensor(aug_y)
    dataset = torch.tensor(x_train)
    target = torch.tensor(y_train)
    bin_target = torch.zeros(x_train.shape[0],2)
    bin_target[(target == 0).flatten(), 0] = 1
    bin_target[(target == 1).flatten(), 1] = 1
    train_target = torch.zeros(100 * x_train.shape[0],2)
    train_target[(train_y == 0).flatten(), 0] = 1
    train_target[(train_y == 1).flatten(), 1] = 1

    trainTensors = torch.utils.data.TensorDataset(train_x, train_target)
    testTensors = torch.utils.data.TensorDataset(dataset, bin_target)

    trainingLoader = torch.utils.data.DataLoader(trainTensors, batch_size = 10, shuffle = True)
    testingLoader = torch.utils.data.DataLoader(testTensors, shuffle = True)
    model = DenseModel(10)
    optimizer = torch.optim.Adam(model.parameters(), lr = 1e-3)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=15, gamma=0.3)
    lossfn = F.binary_cross_entropy_with_logits

    epoch = 30

    trainLoss = np.zeros(epoch)
    valLoss = np.zeros(epoch)
    since = time.time()
    for i in range(1, epoch + 1):
        model.train()
        epoch_loss = 0
    #     print(lr_scheduler.get_lr())
        epoch_size = 0
        for data, target in trainingLoader:
            model.zero_grad()

            output = model(data.float())
    #         print(output)
            loss = lossfn(output, target)
    #         print(loss, target)
            epoch_loss += loss.item()


            loss.backward()
            optimizer.step()
            epoch_size += data.size(0)
        lr_scheduler.step()
        trainLoss[i - 1] = epoch_loss/epoch_size
        print(i)
    print(f"{int(time.time() - since)}s used!")

    a, = plt.plot(np.arange(0,30), trainLoss, c = 'blue', label = 'train')
    plt.show()
    model.eval()
    tp, tn, fp, fn = 0, 0, 0, 0
    y_score = []
    y_true = []
    with torch.no_grad():
        for data, target in testingLoader:
            output = model(data.float())
    #             print(output)
    #             print(torch.sigmoid(output))
            y_score.append(torch.sigmoid(output).flatten()[1].cpu())
            pred = torch.sigmoid(output).argmax()
    #         print(pred)
            target = target.flatten().argmax()
            y_true.append(target.cpu())
            print(pred, target)
            if pred >= 0.5 and target == 1:
                tp += 1
            elif pred >= 0.5 and target == 0:
                fp += 1
            elif pred < 0.5 and target == 0:
                tn += 1
            elif pred < 0.5 and target == 1:
                fn += 1
    metrics = metric(tp, tn, fp, fn)
    print(metrics)
    print(roc_auc_score(np.array(y_true), np.array(y_score)))

In [None]:
# neural network without augmentation
def nn(x_train, y_train):
    temp2 = np.zeros((5,5))
    for cv in range(5):
        dataset = torch.tensor(x_train)
        target = torch.tensor(y_train)
        bin_target = torch.zeros(x_train.shape[0],2)
        bin_target[(target == 0).flatten(), 0] = 1
        bin_target[(target == 1).flatten(), 1] = 1

        trainTensors = torch.utils.data.TensorDataset(dataset[:25], bin_target[:25])
        testTensors = torch.utils.data.TensorDataset(dataset[25:], bin_target[25:])

        trainingLoader = torch.utils.data.DataLoader(trainTensors, shuffle = True)
        testingLoader = torch.utils.data.DataLoader(testTensors, shuffle = True)
        model = DenseModel(10)
        optimizer = torch.optim.Adam(model.parameters(), lr = 1e-3)
        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.3)
        lossfn = F.binary_cross_entropy_with_logits

        epoch = 200

        trainLoss = np.zeros(epoch)
        valLoss = np.zeros(epoch)
        since = time.time()
        for i in range(1, epoch + 1):
            model.train()
            epoch_loss = 0
        #     print(lr_scheduler.get_lr())
            epoch_size = 0
            for data, target in trainingLoader:
                model.zero_grad()

                output = model(data.float())
        #         print(output)
                loss = lossfn(output, target)
        #         print(loss, target)
                epoch_loss += loss.item()


                loss.backward()
                optimizer.step()
                epoch_size += data.size(0)
            lr_scheduler.step()
            trainLoss[i - 1] = epoch_loss/epoch_size
            model.eval()

        print(f"{int(time.time() - since)}s used!")

        a, = plt.plot(np.arange(0,200), trainLoss, c = 'blue', label = 'train')
        plt.show()
        model.eval()
        tp, tn, fp, fn = 0, 0, 0, 0
        y_score = []
        y_true = []
        with torch.no_grad():
            for data, target in testingLoader:
                output = model(data.float())
    #             print(output)
    #             print(torch.sigmoid(output))
                y_score.append(torch.sigmoid(output).flatten()[1].cpu())
                pred = torch.sigmoid(output).argmax()
        #         print(pred)
                target = target.flatten().argmax()
                y_true.append(target.cpu())
    #             print(pred, target)
                if pred >= 0.5 and target == 1:
                    tp += 1
                elif pred >= 0.5 and target == 0:
                    fp += 1
                elif pred < 0.5 and target == 0:
                    tn += 1
                elif pred < 0.5 and target == 1:
                    fn += 1
        if (tp + fp) == 0:
            metrics = {'recall':tp/(tp + fn), 'precision':0, 'accuracy':(tp + tn)/(tp + tn + fp + fn), 'f1-score':0}
        else:
            metrics = metric(tp, tn, fp, fn)
        print(metrics)
        for j,met in enumerate(metrics):
            temp2[cv, j] = metrics[met]
        temp2[cv, -1] = roc_auc_score(np.array(y_true), np.array(y_score))
        print(roc_auc_score(np.array(y_true), np.array(y_score)))
        print(temp2[~(temp2[:,0] == 0)].mean(axis = 0))

# Flourishing post tuning

In [None]:
# post
pre_post = 'post'

label2 = get_flour(pre_post)

In [None]:
# all 78 features
data = combine_feature(audio, activity, bluetooth, darkNights, plock, conv, phoneChar, label2)
normData = norm(data) # normalization
y = data[:, -1]
x_train = normData
y_train = y

In [None]:
# original
collect_socre(x_train,y_train)

In [None]:
#Removing features with low variance
sel = feature_selection.VarianceThreshold(0)
train_variance = sel.fit_transform(x_train)
train_variance.shape

In [None]:
# find correlations to target
df=np.concatenate((x_train,data[:,-1].reshape(-1,1)),axis = 1)
print(df.shape)
a=[str(i) for i in range(1,79)]
a.append('target')
df = pd.DataFrame(df,columns=a)

corr_matrix = df.corr().abs()
print(corr_matrix['target'].sort_values(ascending=False).head(10))

In [None]:
sns.heatmap(corr_matrix)

In [None]:
# Select upper triangle of correlation matrix
matrix = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(np.bool))
# Find index of feature columns with high correlation
to_drop = [column for column in matrix.columns if any(matrix[column] > 0.8)]
print('Columns to drop: ' , (len(to_drop)))
print(to_drop)
new=df.drop(to_drop,axis=1)
new=new.to_numpy()
new_x_train=new[:,:-1]
new_y_train=new[:,-1]

# drop 45 features
# feature extraction
k_best = feature_selection.SelectKBest(score_func=feature_selection.f_classif, k=20)
# fit and transform
new_x_train = k_best.fit_transform(new_x_train,y_train)
print(new_x_train.shape)
print(collect_socre(new_x_train,y_train))

In [None]:
# find the corresponding index
new_features = []
new_feature_label = []
for i in range(len(x_train.T)):
    for k in range(10):
        if list(x_train[:,i]) == list(new_x_train[:,k]):
            new_features.append(i)
            new_feature_label.append(x_label[i])
            print(i, x_label[i])

In [None]:
# kbest
for n in range(10,11):
    # feature extraction
    k_best = feature_selection.SelectKBest(score_func=feature_selection.f_classif, k=n)
    # fit and transform
    new_x_train = k_best.fit_transform(x_train,y_train)
    print(new_x_train.shape)
    print(collect_socre(new_x_train,y_train))

In [None]:
# find the corresponding index
new_features = []
new_feature_label = []
for i in range(len(x_train.T)):
    for k in range(10):
        if list(x_train[:,i]) == list(new_x_train[:,k]):
            new_features.append(i)
            new_feature_label.append(x_label[i])
            print(i, x_label[i])

In [None]:
# select the top 10 features with the highest correlation
# feature extraction
# evaluation matrix for top 10 features
for n in range(10,11):
    rfe = feature_selection.RFE(RandomForestClassifier(n_estimators=1000,random_state=0), n_features_to_select=n)
    new_x_train = rfe.fit_transform(x_train, y_train)
    print(new_x_train.shape)
    print(collect_socre(new_x_train,y_train))

In [None]:
# find the corresponding index
new_features = []
new_feature_label = []
for i in range(len(x_train.T)):
    for k in range(10):
        if list(x_train[:,i]) == list(new_x_train[:,k]):
            new_features.append(i)
            new_feature_label.append(x_label[i])
            print(i, x_label[i])

In [None]:
# find correlations to target
df=np.concatenate((new_x_train,data[:,-1].reshape(-1,1)),axis = 1)
print(df.shape)
a=[str(i) for i in range(1,11)]
a.append('target')
df = pd.DataFrame(df,columns=a)

corr_matrix = df.corr().abs()
print(corr_matrix['target'].sort_values(ascending=False).head(10))
sns.heatmap(corr_matrix)

# select 10 features and apply to other measures 

[36, 43, 51, 56, 58, 66, 67, 69, 71, 73]
['conversation_week9', 'act0_week6', 'act1_week4', 'act1_week9', 'act2_week1', 'act2_week9', 'act2_week10', 'audio0_week2', 'audio0_week4', 'audio0_week6']

In [None]:
print(new_features)
print(new_feature_label)

In [None]:
# neural network with augmentation
# on top 10 features
nn_aug(x_train[:,new_features], y_train)

In [None]:
# neural network without augmentation
nn(x_train[:,new_features], y_train)

# pos_label post

In [None]:
# post
pre_post = 'post'

pos_label, neg_label = get_panas(pre_post)

In [None]:
# extract best features
def extract_feature(dataset, features):
    new_data = np.zeros((dataset.shape[0], 10))

    for j in range(len(new_features)):
        new_data[:,j] = dataset[:, features[j]]
    new_data = np.concatenate((dataset[:,0].reshape(-1,1), new_data), axis = 1)
    new_data = np.concatenate((new_data, dataset[:,-1].reshape(-1,1)), axis = 1)
    return new_data

In [None]:
data = combine_feature(audio, activity, bluetooth, darkNights, plock, conv, phoneChar, pos_label)

data = extract_feature(data, new_features)
normData = norm(data) # normalization

y = data[:, -1]
x_train = normData
y_train = y


In [None]:
# for testing data
# random forest
def rfc_clf(x_train, y_train):
    model = RandomForestClassifier(random_state=0,max_depth=3,n_estimators=90)
    score = cross_val_score(model,x_train,y_train,cv=9,scoring='roc_auc').mean()
    accuracy = cross_val_score(model,x_train,y_train,cv=9,scoring='accuracy').mean()
    precision = cross_val_score(model,x_train,y_train,cv=9,scoring='precision').mean()
    recall = cross_val_score(model,x_train,y_train,cv=9,scoring='recall').mean()
    f1_score = cross_val_score(model,x_train,y_train,cv=9,scoring='f1').mean()
    report = [score,accuracy,precision,recall,f1_score]
    warnings.filterwarnings('ignore') 
    return report
    
# svm
def svm_clf(x_train, y_train):
    model = svm.SVC(random_state=0,gamma='auto',degree=1,kernel='linear')
    score = cross_val_score(model,x_train,y_train,cv=9,scoring='roc_auc').mean()
    accuracy = cross_val_score(model,x_train,y_train,cv=9,scoring='accuracy').mean()
    precision = cross_val_score(model,x_train,y_train,cv=9,scoring='precision').mean()
    recall = cross_val_score(model,x_train,y_train,cv=9,scoring='recall').mean()
    f1_score = cross_val_score(model,x_train,y_train,cv=9,scoring='f1').mean()
    report = [score,accuracy,precision,recall,f1_score]
    warnings.filterwarnings('ignore') 
    
    return report

# knn
def knn_clf(x_train, y_train):
    model = KNeighborsClassifier(algorithm='auto',n_neighbors=3)
    score = cross_val_score(model,x_train,y_train,cv=9,scoring='roc_auc').mean()
    accuracy = cross_val_score(model,x_train,y_train,cv=9,scoring='accuracy').mean()
    precision = cross_val_score(model,x_train,y_train,cv=9,scoring='precision').mean()
    recall = cross_val_score(model,x_train,y_train,cv=9,scoring='recall').mean()
    f1_score = cross_val_score(model,x_train,y_train,cv=9,scoring='f1').mean()
    report = [score,accuracy,precision,recall,f1_score]
    warnings.filterwarnings('ignore') 
    
    return report


# LogisticRegression
def lr_(x_train, y_train):
    model = LogisticRegression(solver='liblinear',random_state=0,class_weight={1: 0.6, 0: 0.4},penalty='l2')
    score = cross_val_score(model,x_train,y_train,cv=9,scoring='roc_auc').mean()
    accuracy = cross_val_score(model,x_train,y_train,cv=9,scoring='accuracy').mean()
    precision = cross_val_score(model,x_train,y_train,cv=9,scoring='precision').mean()
    recall = cross_val_score(model,x_train,y_train,cv=9,scoring='recall').mean()
    f1_score = cross_val_score(model,x_train,y_train,cv=9,scoring='f1').mean()
    report = [score,accuracy,precision,recall,f1_score]
    warnings.filterwarnings('ignore') 
    
    return report   

def collect_socre(x_train,y_train):
    score_matrix=np.array(( 
            rfc_clf(x_train, y_train),
            lr_(x_train, y_train),
            knn_clf(x_train, y_train),
            svm_clf(x_train, y_train) ))
    df = pd.DataFrame(data=score_matrix,columns=['auc_roc','accuracy','precision','recall','f1_score'],index=['rfc','lr','knn','svm'])
    return df


In [None]:
print(collect_socre(x_train,y_train))

In [None]:
# neural network with augmentation
nn_aug(x_train, y_train)

In [None]:
# neural network without augmentation
nn(x_train, y_train)

# neg_label post

In [None]:
data = combine_feature(audio, activity, bluetooth, darkNights, plock, conv, phoneChar, neg_label)
data = extract_feature(data, new_features)
normData = norm(data) # normalization
y = data[:, -1]
x_train = normData
y_train = y

In [None]:
print(collect_socre(x_train,y_train))

In [None]:
# neural network with augmentation
nn_aug(x_train, y_train)

In [None]:
# neural network without augmentation
nn(x_train, y_train)

# flourishing pre

In [None]:
pre_post = 'pre'

label2 = get_flour(pre_post)

In [None]:
data = combine_feature(audio, activity, bluetooth, darkNights, plock, conv, phoneChar, label2)
data = extract_feature(data, new_features)
normData = norm(data) # normalization
y = data[:, -1]
x_train = normData
y_train = y

In [None]:
print(collect_socre(x_train,y_train))

In [None]:
# neural network with augmentation
nn_aug(x_train, y_train)

In [None]:
# neural network without augmentation
nn(x_train, y_train)

# flourishing pre and post

In [None]:
# pre and post
label2 = get_flour_mean()

In [None]:
data = combine_feature(audio, activity, bluetooth, darkNights, plock, conv, phoneChar, label2)
data = extract_feature(data, new_features)
normData = norm(data) # normalization
y = data[:, -1]
x_train = normData
y_train = y

In [None]:
print(collect_socre(x_train,y_train))

In [None]:
# neural network with augmentation
nn_aug(x_train, y_train)

In [None]:
# neural network without augmentation
nn(x_train, y_train)