In [1]:
import h5py
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pyeeg
import scipy.signal
import time

## Importer les données
Les données sont sous la forme h5. Il faut donc les importer puis créer x dataset en fonction du nombre de dataset présents dans le fichier (10). On utilise ici r+ pour avoir droit de lecture et d'écriture.

Chaque Dataset représente les mesures des accéléromètres, eeg et l'oximeter pour 38289 différentes mesures.
Les catégories de sommeil sont des entiers entre 0 et 5 présentés dans un csv.

In [2]:
dset = h5py.File('train.h5', 'r')

In [3]:
y_train = pd.read_csv('train_y.csv').values

## Liste des données et extraction 

Voici la liste des jeux de données disponible et leur extraction

In [5]:
print(list(f.keys()))


['accelerometer_x', 'accelerometer_y', 'accelerometer_z', 'eeg_1', 'eeg_2', 'eeg_3', 'eeg_4', 'eeg_5', 'eeg_6', 'eeg_7', 'pulse_oximeter_infrared']


In [None]:
f.flush()
f.close()

In [None]:
dsetx = f['accelerometer_x']
dsety = f['accelerometer_y']
dsetz = f['accelerometer_z']

In [None]:
dsetx4 = dsetx[y_test[:,1]==4,:]
dsety4 = dsety[y_test[:,1]==4,:]
dsetz4 = dsetz[y_test[:,1]==4,:]

name=1
for i in [dsetx4,dsety4,dsetz4] :
    plt.scatter([i for i in range (len(i[1,:]))],i[1,:])
    plt.savefig('acc_'+str(name)+'.png')
    plt.clf()
    name+=1

In [None]:
def plotting(h5,list_data_name, output):
    for dset_name in list_data_name :
        dset=h5[dset_name]
        dset2=dset[y_test[:,1]==output,:]
        plt.scatter([i for i in range (len(dset2[1,:]))],dset2[1,:])
        plt.savefig('output_'+str(output)+dset_name+'.png')
        plt.clf()

In [None]:
for j in range(1,5):
    plotting(f,['accelerometer_x', 'accelerometer_y', 'accelerometer_z'],j)

## Key Features extraction


Power spectral Intensity          bin_power()
petrosian Fractal Dimension       pdf()
Higucho Fractal Dimension         hdf()
Hjorth mobility and complexity    hjorth()
Spectral Entrocpy (Shannon)       spectral_entropy()
SVD Entropy                       svd_entropy()
Fisher information                fisher_info()
Approximate entropy               ap_entropy()
Detrended Fluctuation Analysis    dfa()
Hurst Exponent                    hurst() 

Creating a Vector with the indices

numpy.apply_along_axis(func1d, axis = 1, arr, *args, **kwargs)

In [19]:
def abs_mean(Features, lst_data, dset):
    
    for i in lst_data :
        Dset_int=dset[i]
        Col = np.apply_along_axis(np.mean,1,np.apply_along_axis(abs,1,Dset_int))
        Features[str(i)+'_abs_mean']=Col
        
        
    return Features

def mean(Features, lst_data, dset):
    
    for i in lst_data :
        Dset_int=dset[i]
        Col = np.apply_along_axis(np.mean,1,Dset_int)
        Features[str(i)+'_mean']=Col
    return Features

def max_value(Features, lst_data, dset):
    
    for i in lst_data :
        Dset_int=dset[i]
        Col = np.apply_along_axis(max,1,Dset_int)
        Features[str(i)+'_max_value']=Col
        
    return Features


def min_value(Features, lst_data, dset) :        
    
    for i in lst_data :
        
        Dset_int=dset[i]
        Col = np.apply_along_axis(min,1,Dset_int)
        Features[str(i)+'_min_value']=Col
        
    return Features


def max_abs_value(Features, lst_data, dset):
    
    for i in lst_data :
        Dset_int=dset[i]
        Col = np.apply_along_axis(max,1,np.apply_along_axis(abs,1,Dset_int))
        Features[str(i)+'_max_abs_value']=Col
        
    return Features


def abs_mean_derivate(Features, lst_data, dset):
   
    for i in lst_data :
        Dset_int=dset[i]
        multiplicative_coef = len(Dset_int[1])/30
        Col = np.apply_along_axis(np.mean,1,np.apply_along_axis(abs,1,np.apply_along_axis(np.gradient,1,Dset_int)*multiplicative_coef))
        Features[str(i)+'_abs_mean_derivate']=Col
        
    return Features

def max_abs_derivate(Features, lst_data, dset):
    
    for i in lst_data :
        Dset_int=dset[i]
        multiplicative_coef = len(Dset_int[1])/30
        Col = np.apply_along_axis(max,1,np.apply_along_axis(abs,1,np.apply_along_axis(np.gradient,1,Dset_int)*multiplicative_coef))
        Features[str(i)+'_max_abs_derivate']=Col
        
    return Features


def max_value_derivate(Features, lst_data, dset):
    
    for i in lst_data :
        Dset_int=dset[i]
        multiplicative_coef = len(Dset_int[1])/30
        Col = np.apply_along_axis(max,1,np.apply_along_axis(np.gradient,1,Dset_int)*multiplicative_coef)
        Features[str(i)+'_max_value_derivate']=Col
       
    return Features


def min_value_derivate(Features, lst_data, dset):
    
    for i in lst_data :
        Dset_int=dset[i]
        multiplicative_coef = len(Dset_int[1])/30
        Col = np.apply_along_axis(min,1,np.apply_along_axis(np.gradient,1,Dset_int)*multiplicative_coef)
        Features[str(i)+'_min_value_derivate']=Col
        
    return Features


def freq_max_power(Features, lst_data, dset):
    
    for i in lst_data :
        Dset_int=dset[i]
        Col = np.apply_along_axis(max,1,(np.apply_along_axis(abs,1,np.apply_along_axis(np.fft.fft,1,Dset_int))))
        Features[str(i)+'_freq_max_power']=Col
       
    return Features


def freq_max_value(Features, lst_data, dset):
    
    
    for i in  lst_data:
        Dset_int=dset[i]
        Col = pd.DataFrame(index = np.arange(Dset_int.shape[0]))
        Col['freq'] = ""
        FFTf = np.fft.fftfreq(len(Dset_int[1]))*len(Dset_int[1])/30
        FFTf = pd.DataFrame(FFTf)
        FFTi =np.apply_along_axis(np.argmax,1,( np.apply_along_axis(abs,1,np.apply_along_axis(np.fft.fft,1,Dset_int))))
        FFTi = pd.DataFrame(FFTi)
        for j in range(Dset_int.shape[0]) : 
            Col.iloc[j,0] = FFTf.iloc[FFTi.iloc[j,0],0]

        Features[str(i)+'_freq_max_value'] = Col  
        
    return Features


def max_amplitude_fft(Features, lst_data, dset):
    
    for i in lst_data :
        Dset_int=dset[i]
        Col = np.apply_along_axis(max,1,(np.apply_along_axis(abs,1,np.apply_along_axis(np.fft.fft,1,Dset_int))))
        Features[str(i)+'_max_amplitude_fft']=Col
       
    return Features

#MAX_POWER = max_amplitude_fft(Features, lst_data, dset)

def freq_max_amplitude_fft(Features, lst_data, dset):
    
    
    for i in  lst_data:
        Dset_int=dset[i]
        Col = pd.DataFrame(index = np.arange(Dset_int.shape[0]))
        Col['freq'] = ""
        FFTf = np.fft.fftfreq(len(Dset_int[1]))*len(Dset_int[1])/30
        FFTf = pd.DataFrame(FFTf)
        FFTi =np.apply_along_axis(np.argmax,1,( np.apply_along_axis(abs,1,np.apply_along_axis(np.fft.fft,1,Dset_int))))
        FFTi = pd.DataFrame(FFTi)
        for j in range(Dset_int.shape[0]) : 
            Col.iloc[j,0] = FFTf.iloc[abs(FFTi.iloc[j,0]),0]

        Features[str(i)+'_freq_max_amplitude_fft'] = Col  
        
    return Features

def mean_amplitude_fft(Features, lst_data, dset):
    
    for i in lst_data :
        Dset_int=dset[i]
        Col = np.apply_along_axis(np.mean,1,(np.apply_along_axis(abs,1,np.apply_along_axis(np.fft.fft,1,Dset_int))))
        Features[str(i)+'_mean_amplitude_fft']=Col
       
    return Features

def peak(Features, lst_data, dset):
    
    for i in  lst_data:
        
        Dset_int=dset[i]
        Tableau_signal = pd.DataFrame(Dset_int[:,:])
        Col = pd.DataFrame(index = np.arange(Dset_int.shape[0]))
        Col['peak'] = ""
        DF = np.apply_along_axis(scipy.signal.find_peaks,1,np.apply_along_axis(abs,1,Dset_int),distance = Dset_int.shape[1])
        DF = pd.DataFrame(DF)
        DF = DF[0]
        DF = DF.apply(int)
        
        for j in range(Dset_int.shape[0]) : 
            Col.iloc[j,0] = Tableau_signal.iloc[j,DF.iloc[j]]/np.mean(abs(Tableau_signal.iloc[j,:]))
        
        Features[str(i)+'_peak'] = Col   
        
    return Features

def puissance_moy_periodogram(Features, lst_data, dset) : 
   
     
    for i in lst_data :
        Dset_int=dset[i]
        Periodrogram_array =np.apply_along_axis(scipy.signal.periodogram,1,Dset_int)
        Periodrogram_array = Periodrogram_array[:,1,:] 
        Col = np.apply_along_axis(np.mean,1,Periodrogram_array)
        Features[str(i)+'__mean_power_periodogram']=Col
       
    return Features
 
def freq_max_power_periodogram(Features, lst_data, dset):
    
    for i in lst_data :
        Dset_int=dset[i]
        Periodrogram_array =np.apply_along_axis(scipy.signal.periodogram,1,Dset_int)
        Periodrogram_array = Periodrogram_array[:,1,:] 
        Col = np.apply_along_axis(max,1,Periodrogram_array)
        Features[str(i)+'_freq_max_power_periodogram']=Col
       
    return Features

def freq_max_value_periodogram(Features, lst_data, dset):
    
    for i in  lst_data:
        
        Dset_int=dset[i]
        Col = pd.DataFrame(index = np.arange(Dset_int.shape[0]))
        Col['freq'] = ""
        Periodrogram_array = np.apply_along_axis(scipy.signal.periodogram,1,Dset_int)
        Periodogram_frequences = Periodrogram_array[1,0,:]
        Periodogram_frequences = pd.DataFrame(Periodogram_frequences)
        Periodrogram_array = Periodrogram_array[:,1,:] 
        Periodrogram_array = np.apply_along_axis(np.argmax,1,Periodrogram_array)
        Periodrogram_array = pd.DataFrame(Periodrogram_array)
        
        for j in range(Dset_int.shape[0]) : 
            Col.iloc[j,0] = Periodogram_frequences.iloc[abs(Periodrogram_array.iloc[j,0]),0]
    
        Features[str(i)+'_freq_max_value_periodrogram'] = Col  
        
    return Features
 
#Mean_POWER = mean_amplitude_fft(Features, lst_data, dset)

#Bin frequences

def bin_power(X, Band, Fs):
    
    """Compute power in each frequency bin specified by Band from FFT result of
    X. By default, X is a real signal.
    Note
    -----
    A real signal can be synthesized, thus not real.
    Parameters
    -----------
    Band
        list
        boundary frequencies (in Hz) of bins. They can be unequal bins, e.g.
        [0.5,4,7,12,30] which are delta, theta, alpha and beta respectively.
        You can also use range() function of Python to generate equal bins and
        pass the generated list to this function.
        Each element of Band is a physical frequency and shall not exceed the
        Nyquist frequency, i.e., half of sampling frequency.
     X
        list
        a 1-D real time series.
    Fs
        integer
        the sampling rate in physical frequency
    Returns
    -------
    Power
        list
        spectral power in each frequency bin.
    Power_ratio
        list
        spectral power in each frequency bin normalized by total power in ALL
        frequency bins.
    """

    C = np.fft.fft(X)
    C = abs(C)
    Power = np.zeros(len(Band) - 1)
    for Freq_Index in range(0, len(Band) - 1):
        Freq = float(Band[Freq_Index])
        Next_Freq = float(Band[Freq_Index + 1])
        Power[Freq_Index] = np.sum(
            C[int(Freq / Fs * len(X)) : int(Next_Freq / Fs * len(X))]
        )
    Power_Ratio = Power / sum(Power)
    return Power, Power_Ratio


def bin_power_features(Features, lst_data, dset):
    
    for i in lst_data :
        Dset_int=dset[i]
        Resultat_int = np.apply_along_axis(bin_power,1,Dset_int, Band = [0.5,4,7,12,30], Fs = len(Dset_int[1])/30)
        Array_somme_frequence = Resultat_int[:,0,:]
        Array_somme_frequence = pd.DataFrame(Array_somme_frequence)
        Array_somme_frequence.columns = ['Delta','Theta','Alpha','Beta']
        Array_somme_frequence_normalisee = Resultat_int[:,1,:]
        Array_somme_frequence_normalisee = pd.DataFrame(Array_somme_frequence_normalisee)
        Array_somme_frequence_normalisee.columns = ['Delta_N','Theta_N','Alpha_N','Beta_N']
        Features[str(i)+'Delta'] = Array_somme_frequence['Delta']
        Features[str(i)+'Theta'] = Array_somme_frequence['Theta']
        Features[str(i)+'Alpha'] = Array_somme_frequence['Alpha']
        Features[str(i)+'Beta'] = Array_somme_frequence['Beta']
        Features[str(i)+'Delta_N'] = Array_somme_frequence_normalisee['Delta_N']
        Features[str(i)+'Theta_N'] = Array_somme_frequence_normalisee['Theta_N']
        Features[str(i)+'Alpha_N'] = Array_somme_frequence_normalisee['Alpha_N']
        Features[str(i)+'Beta_N'] = Array_somme_frequence_normalisee['Beta_N']
       
    return Features


### Signal analysis
Pyeeg module 
Goal : créer des fonctions pour extraire pour un dataset un certain features. 
On pourra ainsi itérer pour plusieurs fonctions afin d'obtenir nos features pour chaque input.
On stocke les valeurs dans un Dataset (Features) afin de garder une tracabilité des colonnes

In [20]:
def fish_info_feat(Features, lst_data, dset):
    for i in lst_data :
        Dset_int=dset[i]
        Col=np.apply_along_axis(lambda x : pyeeg.fisher_info(x,1,4),1, Dset_int)
        Features[str(i)+'_fish_info']=Col
    return Features


In [None]:
Features=fish_info_feat(Features, ['eeg_1','eeg_2'],f)
Features

## Features extraction on a Dataset

Function qui prend un H5 en entrée, et qui lui applique toutes nos fonctions d'extraction de Features. Il retourne un DataFrame contenant tous les Features extraits

In [None]:
Features=pd.DataFrame()
all_dset=list(f.keys())
col_eeg_oxy=['eeg_1', 'eeg_2', 'eeg_3', 'eeg_4', 'eeg_5', 'eeg_6', 'eeg_7', 'pulse_oximeter_infrared']
col_acc=['accelerometer_x', 'accelerometer_y', 'accelerometer_z']

In [None]:
Features=abs_mean(Features,all_dset,dset)
Features=max_value(Features,all_dset,dset)
Features=min_value(Features,all_dset,dset)
Features=max_abs_value(Features,all_dset,dset)
Features=abs_mean_derivate(Features,all_dset,dset)
Features=max_abs_derivate(Features,all_dset,dset)
Features=max_value_derivate(Features,all_dset,dset)
Features=min_value_derivate(Features,all_dset,dset)
Features=freq_max_power(Features,all_dset,dset)
Features=freq_max_value(Features,all_dset,dset)
Features=peak(Features,all_dset,dset)
Features=fish_info_feat(Features,col_eeg_oxy,dset)
Features= bin_power_features(Features, lst_data, dset)
Features=freq_max_power_periodogram(Features, lst_data, dset)
Features=puissance_moy_periodogram(Features, lst_data, dset)
Features=max_amplitude_fft(Features, lst_data, dset)
Features=freq_max_amplitude_fft(Features, lst_data, dset)
Features=mean_amplitude_fft(Features, lst_data, dset)

## Training/Testing

Ensemble de cellules permettant d'entrainer le modèle sur une partie du Dataset pour le tester sur une autre .

Afin de créer le Dataset, il y a deux possibilités : lire un csv ou recréer la Base de données

Voir cross_validation

In [2]:
Features_train = pd.read_csv('Features_train_2.csv')


In [11]:
y = pd.read_csv('train_y.csv').values[:,1]

In [14]:
Features_train

Unnamed: 0,accelerometer_x_abs_mean,accelerometer_y_abs_mean,accelerometer_z_abs_mean,eeg_1_abs_mean,eeg_2_abs_mean,eeg_3_abs_mean,eeg_4_abs_mean,eeg_5_abs_mean,eeg_6_abs_mean,eeg_7_abs_mean,...,eeg_7_freq_max_power_periodogram,pulse_oximeter_infrared_freq_max_power_periodogram,eeg_1__mean_power_periodogram,eeg_2__mean_power_periodogram,eeg_3__mean_power_periodogram,eeg_4__mean_power_periodogram,eeg_5__mean_power_periodogram,eeg_6__mean_power_periodogram,eeg_7__mean_power_periodogram,pulse_oximeter_infrared__mean_power_periodogram
0,0.000878,0.001156,0.001194,12.676050,19.072149,10.268462,30.081106,30.971897,31.231926,29.642679,...,1.665047e+05,3.383416e+06,6.549536e+02,1.348695e+03,3.724675e+02,3.356764e+03,3.507913e+03,3.640183e+03,3.064966e+03,2.489927e+05
1,0.000251,0.000301,0.000597,19.206153,22.487245,11.204950,42.917153,41.859833,42.839071,39.344419,...,3.040649e+05,1.798120e+08,1.266415e+03,1.627274e+03,4.289486e+02,6.282286e+03,6.167526e+03,5.984686e+03,5.241262e+03,1.727664e+06
2,0.001105,0.000887,0.000681,113.276949,122.942798,15.992673,35.244978,25.861371,136.784026,20.155488,...,9.277706e+04,9.708244e+05,4.735901e+05,5.175254e+05,2.422941e+03,6.587978e+03,4.058287e+03,5.206287e+05,1.552734e+03,2.026983e+04
3,0.000363,0.000294,0.000448,4.666600,6.934011,5.551572,11.790550,11.095672,10.850103,11.465123,...,3.911485e+04,2.996032e+04,7.728450e+01,1.780571e+02,1.111328e+02,6.734243e+02,6.435546e+02,5.201921e+02,6.481109e+02,1.183698e+03
4,0.000409,0.000363,0.000508,6.071155,8.808490,7.745231,14.442470,108.133630,13.211851,109.852836,...,2.739799e+06,7.563405e+04,1.183036e+02,2.425466e+02,1.868139e+02,6.866846e+02,5.657866e+04,5.708823e+02,5.734552e+04,2.660638e+03
5,0.000578,0.000489,0.000470,19.861206,34.091325,34.089369,63.013984,67.109527,65.932095,69.120839,...,1.492201e+06,5.292884e+05,1.578072e+03,4.431469e+03,5.292224e+03,1.446546e+04,1.727543e+04,1.644076e+04,1.820011e+04,8.186016e+03
6,0.000960,0.000721,0.000594,8.855150,11.541233,6.952787,25.829003,14.350972,27.763046,14.561313,...,6.139267e+04,7.958983e+05,3.540425e+02,6.369810e+02,1.687207e+02,3.081055e+03,1.174467e+03,3.485162e+03,1.030249e+03,1.534190e+04
7,0.000438,0.000988,0.002113,8.026057,11.881504,7.505039,19.715200,24.205009,18.912416,23.688742,...,1.554812e+05,2.953549e+05,2.062495e+02,4.445425e+02,1.793020e+02,1.279857e+03,1.851333e+03,1.170006e+03,1.704942e+03,4.325660e+03
8,0.000667,0.000648,0.001619,4.263364,5.114600,4.980687,23.385308,15.671199,22.881291,13.972915,...,5.457055e+04,8.396509e+05,6.837765e+01,1.057166e+02,1.230281e+02,2.834839e+03,1.291444e+03,2.623035e+03,8.662361e+02,1.902400e+04
9,0.002641,0.001518,0.002320,9.190740,11.701908,6.609174,108.076624,20.883712,107.142459,20.595925,...,6.858819e+04,8.050788e+06,2.674210e+02,4.356099e+02,1.420512e+02,1.557244e+05,1.444579e+03,1.554657e+05,1.374229e+03,8.929256e+04


In [12]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(Features_train.values, y, test_size = 0.25, random_state = 0)
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
X_train=sc.fit_transform(X_train)
X_test=sc.transform(X_test)

### Random Forrest

In [19]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators=10,n_jobs=-1)
model.fit(X_train,y_train)
y_pred = model.predict(X_test)
 
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm

KeyboardInterrupt: 

### XG BOOST

In [14]:
from xgboost import XGBClassifier

classifier = XGBClassifier(max_depth=5, n_estimators=10, n_jobs=-1)
classifier.fit(X_train,y_train)
y_pred = classifier.predict(X_test)

from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm

array([[ 492,    2,  293,    8,  105],
       [  74,    1,  169,    6,   64],
       [ 153,    3, 3453,  219,  487],
       [  44,    1,  572,  810,   13],
       [ 110,    2, 1083,   18, 1391]], dtype=int64)

In [15]:
classifier.score(X_test,y_test)

0.642118458163585

### Double split

### K-Fold Cross Validation

#### Random Forrest

In [17]:
Feat_tot=Features_train
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
sc_kfold = StandardScaler()
Feat_tot = sc_kfold.fit_transform(Feat_tot)

## Model 
classifier = RandomForestClassifier(n_estimators=100,n_jobs=-1)

scores = cross_val_score(classifier, Feat_tot, y, cv=10)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))


  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


Accuracy: 0.75 (+/- 0.01)


#### XGBOOST

In [18]:
Feat_tot=Features_train
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import StandardScaler
sc_kfold = StandardScaler()
Feat_tot = sc_kfold.fit_transform(Feat_tot)

## Model 
classifier = XGBClassifier(max_depth=5, n_estimators=100, n_jobs=-1)

scores = cross_val_score(classifier, Feat_tot, y, cv=10)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


Accuracy: 0.64 (+/- 0.01)


## Training sans test

Ensemble de cellules permettant d'entrainer le modèle sur  Dataset pour le tester sur une autre 

Voir cross_validation

In [21]:
dset_pred = h5py.File('test.h5', 'r')
y_train=pd.read_csv('train_y.csv').values[:,1]
dset = h5py.File('train.h5', 'r')

In [22]:
all_dset=list(dset.keys())
col_eeg_oxy=['eeg_1', 'eeg_2', 'eeg_3', 'eeg_4', 'eeg_5', 'eeg_6', 'eeg_7', 'pulse_oximeter_infrared']
col_acc=['accelerometer_x', 'accelerometer_y', 'accelerometer_z']

In [24]:
Features_train=pd.DataFrame()
Features_train=abs_mean(Features_train,all_dset,dset)
Features_train=max_value(Features_train,all_dset,dset)
Features_train=min_value(Features_train,all_dset,dset)
Features_train=max_abs_value(Features_train,all_dset,dset)
Features_train=abs_mean_derivate(Features_train,all_dset,dset)
Features_train=max_abs_derivate(Features_train,all_dset,dset)
Features_train=max_value_derivate(Features_train,all_dset,dset)
Features_train=min_value_derivate(Features_train,all_dset,dset)
Features_train=freq_max_power(Features_train,all_dset,dset)
Features_train=freq_max_value(Features_train,all_dset,dset)
Features_train=peak(Features_train,all_dset,dset)
Features_train=fish_info_feat(Features_train,col_eeg_oxy,dset)
## ADD Features

NameError: name 'scipy' is not defined

In [32]:
Features_train.to_csv('Features_train.csv')

In [48]:
start=time.time()
Features_test=pd.DataFrame()
print(1)
Features_test=abs_mean(Features_test,all_dset,dset_pred)
print(1)
Features_test=max_value(Features_test,all_dset,dset_pred)
print(1)
Features_test=min_value(Features_test,all_dset,dset_pred)
print(1)
Features_test=max_abs_value(Features_test,all_dset,dset_pred)
print(1)
Features_test=abs_mean_derivate(Features_test,all_dset,dset_pred)
print(1)
Features_test=max_abs_derivate(Features_test,all_dset,dset_pred)
print(1)
Features_test=max_value_derivate(Features_test,all_dset,dset_pred)
print(1)
Features_test=min_value_derivate(Features_test,all_dset,dset_pred)
print(1)
Features_test=freq_max_power(Features_test,all_dset,dset_pred)
print(1)
Features_test=freq_max_value(Features_test,all_dset,dset_pred)
print(1)
Features_test=peak(Features_test,all_dset,dset_pred)
print(1)
Features_test=fish_info_feat(Features_test,col_eeg_oxy,dset_pred)
end=time.time()
print(end-start)
#ADD Features

1
1
1
1
1
1
1
1
1
1
1
1
4272.845386505127


In [50]:
Features_test.to_csv('Features_test.csv')

#### From already preprocessed features

In [20]:
Features_train = pd.read_csv('Features_train_2.csv')
Features_test = pd.read_csv('Features_test_2.csv')
y_train = pd.read_csv('train_y.csv').values[:,1]

In [21]:
X_train = Features_train.loc[:,:].values
X_pred = Features_test.loc[:,:].values

from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_pred = sc.transform(X_pred)

In [22]:
# Model training
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators=100,n_jobs=-1)
model.fit(X_train,y_train)

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=-1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [25]:
y_pred = model.predict(X_pred)
Res=pd.DataFrame()
Res['sleep_stage']=y_pred
Res.index.name='id'
Res.to_csv('y_pred.csv')

In [28]:
classifier_xg = XGBClassifier(max_depth=35, n_estimators=50, n_jobs=-1)
classifier_xg.fit(X_train, y_train)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=35, min_child_weight=1, missing=None, n_estimators=50,
       n_jobs=-1, nthread=None, objective='multi:softprob', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1)

In [29]:
y_pred = model.predict(X_pred)
Res=pd.DataFrame()
Res['sleep_stage']=y_pred
Res.index.name='id'
Res.to_csv('y_pred_xg.csv')

In [None]:
import winsound
frequency=500
duration =5000
winsound.Beep(frequency,duration)