In [50]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd 
import numpy as np

In [51]:
data = pd.read_csv("Qmerge1-FirstLastHRV.csv") 
data.head()
#print(data.DESCR)

Unnamed: 0,nsrrid,gender_x,race_x,age_s1_x,ang_date,chd_dthdt,chf_date,cvd_dthdt,mi_date,stk_date,...,ihr,NN_RR,AVNN,SDNN,VLF,LF,LF_n,HF,HF_n,LF_HF
0,200079,2,3,56,,,,,,,...,75.112951,0.992,798.797,46.2118,695.639,285.53,0.256639,827.046,0.743361,0.345241
1,200079,2,3,56,,,,,,,...,70.633025,0.968839,849.461,72.6343,1234.8283,1666.85,0.507907,1614.95,0.492093,1.03214
2,200082,1,1,40,,,,,,,...,59.3554,0.99661,1010.86,39.4332,739.2121,531.024,0.686206,242.831,0.313794,2.1868
3,200082,1,1,40,,,,,,,...,56.66632,0.996466,1058.83,146.823,13231.3,3964.45,0.378345,6513.94,0.621655,0.60861
4,200108,2,1,60,,,,,,,...,64.954397,0.990741,923.725,27.7213,532.079,168.406,0.672796,81.9015,0.327204,2.0562


## Column Names description
1. nsrrid = This subject identifier was created by the National Sleep Research Resource (NSRR) team to more easily match with file downloads. Use 'pptid' to link with Biologic Specimen and Data Repository Information Coordinating Center (BioLINCC) and cohort-specific data.

2. BLUE25 = Quality of Life (QOL) (Sleep Heart Health Study Visit One (SHHS1)): Felt downhearted and blue. 
        1: All of the time
        2: Most of the time
        3: A good bit of the time
        4: Some of the time
        5: A little of time
        6: None of the time

3. ihr = Instantaneous heart rate

4. NN_RR = Ratio of consecutive normal sinus beats (NN) over all cardiac inter-beat (RR) intervals

5. AVNN = Mean of all normal sinus to normal sinus interbeat intervals (NN) 

6. SDNN = Standard deviation of all normal sinus to normal sinus interbeat (NN) intervals

7. VLF = Very low frequency power: the normal sinus to normal sinus interbeat (NN) interval spectral power between 0.003 and 0.04 Hz

8. LF = Low frequency power: the normal sinus to normal sinus interbeat (NN) interval spectral power between 0.04 and 0.15 Hz

9. HF = High frequency power: the normal sinus to normal sinus interbeat (NN) interval spectral power between 0.15 and 0.4 Hz

10. HF_n = High frequency power (normalized)

11. LF_HF = The ratio of low to high frequency power

In [52]:
heart_rate_data = data.copy()

In [53]:
heart_rate_data = heart_rate_data[['nsrrid', 'gender_x', 'race_x', 'age_s1_x', 'BLUE25', 'ihr', 'NN_RR', 'AVNN', 'SDNN', 'VLF', 'LF', 'HF', 'HF_n', 'LF_HF']]

In [54]:
heart_rate_data.head()
heart_rate_data.shape[0]

972

In [55]:
# Skipping every other row, to reemove duplicate rows
heart_rate_data = heart_rate_data[::2]
heart_rate_data.head()
heart_rate_data.shape[0] # row count

486

In [126]:
# Renaming columns 
heart_rate_data = heart_rate_data.rename(index=str, columns={'gender_x': 'Gender', 'race_x': 'Race', 'age_s1_x': 'Age'})
heart_rate_data = heart_rate_data[heart_rate_data['BLUE25'] != 'NAN']
heart_rate_data.head()

  result = method(y)


Unnamed: 0,nsrrid,Gender,Race,Age,BLUE25,ihr,NN_RR,AVNN,SDNN,VLF,LF,HF,HF_n,LF_HF
0,200079,2,3,56,5.0,75.112951,0.992,798.797,46.2118,695.639,285.53,827.046,0.743361,0.345241
2,200082,1,1,40,6.0,59.3554,0.99661,1010.86,39.4332,739.2121,531.024,242.831,0.313794,2.1868
4,200108,2,1,60,5.0,64.954397,0.990741,923.725,27.7213,532.079,168.406,81.9015,0.327204,2.0562
6,200109,2,1,71,6.0,76.865428,0.997389,780.585,8.41453,55.3561,8.87934,4.38036,0.330351,2.02708
8,200112,1,1,48,5.0,62.348286,0.996785,962.336,88.5344,3987.3524,3269.69,292.349,0.082073,11.1842


In [129]:
dep_data = heart_rate_data.dropna(axis=0, how='any')
dep_data.shape[0]

dep_data.loc[dep_data['BLUE25'] < 5, 'BLUE25'] = 0
dep_data.loc[dep_data['BLUE25'] >= 5, 'BLUE25'] = 1
dep_data.head(2)

Unnamed: 0,nsrrid,Gender,Race,Age,BLUE25,ihr,NN_RR,AVNN,SDNN,VLF,LF,HF,HF_n,LF_HF
0,200079,2,3,56,1.0,75.112951,0.992,798.797,46.2118,695.639,285.53,827.046,0.743361,0.345241
2,200082,1,1,40,1.0,59.3554,0.99661,1010.86,39.4332,739.2121,531.024,242.831,0.313794,2.1868


In [74]:
#heart_rate_data[heart_rate_data['BLUE25'] == 'NAN']
#dep_data['BLUE25'].unique()
#dep_data['BLUE25'] = dep_data['BLUE25'].apply(np.int64)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [61]:
dep_data.dtypes
#dep_data

nsrrid      int64
Gender      int64
Race        int64
Age         int64
BLUE25      int64
ihr       float64
NN_RR     float64
AVNN      float64
SDNN      float64
VLF       float64
LF        float64
HF        float64
HF_n      float64
LF_HF     float64
dtype: object

In [150]:
X = dep_data.drop('BLUE25', axis = 1)  #X = df1.drop('ql209f', axis = 1)
y = dep_data['BLUE25']



y_new = dep_data['BLUE25'].unique()
#print(f"Labels: {y[:10]}")
#print(f"Data: {X[:10]}")

In [151]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression()
classifier

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [152]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [153]:
classifier.fit(X_train, y_train)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [154]:
print(f"Training Data Score: {classifier.score(X_train, y_train)}")
print(f"Testing Data Score: {classifier.score(X_test, y_test)}")

Training Data Score: 0.9017857142857143
Testing Data Score: 0.8407079646017699


In [155]:
predictions = classifier.predict(X_test)
pd.DataFrame({"Prediction": predictions, "Actual": y_test})

Unnamed: 0,Prediction,Actual
652,1.0,1.0
624,1.0,1.0
378,1.0,0.0
62,1.0,1.0
466,1.0,1.0
478,1.0,1.0
752,1.0,1.0
346,1.0,1.0
596,1.0,1.0
8,1.0,1.0


In [90]:
from sklearn.preprocessing import LabelEncoder


label_encoder = LabelEncoder()
label_encoder.fit(y_new)
encoded_y = label_encoder.transform(y_new)
encoded_y

array([4, 5, 3, 0, 2, 1])

In [91]:
for label, original_class in zip(encoded_y, y_new):
    print('Original Class: ' + str(original_class))
    print('Encoded Label: ' + str(label))
    print('-' * 12)

Original Class: 5
Encoded Label: 4
------------
Original Class: 6
Encoded Label: 5
------------
Original Class: 4
Encoded Label: 3
------------
Original Class: 1
Encoded Label: 0
------------
Original Class: 3
Encoded Label: 2
------------
Original Class: 2
Encoded Label: 1
------------


In [92]:
from keras.utils import to_categorical

# Step 2: One-hot encoding
one_hot_y = to_categorical(encoded_y)
one_hot_y

array([[0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 1., 0., 0.],
       [1., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0.]], dtype=float32)

In [93]:
#from sklearn.preprocessing import StandardScaler

# Create a StandardScater model and fit it to the training data
#X_scaler = StandardScaler().fit([X_train])

In [114]:
#X_train_scaled = X_scaler.transform([X_train])
#X_test_scaled = X_scaler.transform([X_test])

In [110]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression()
classifier

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)

In [116]:
classifier.fit(X_train, y_train)



ValueError: Expected 2D array, got 1D array instead:
array=[].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

In [98]:
#print('Weight coefficients: ', model.coef_)
#print('y-axis intercept: ', model.intercept_) 

Weight coefficients:  [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
y-axis intercept:  [5. 6. 5. 6. 5. 6. 5. 6. 5. 5. 6. 4. 6. 6. 5. 5. 6. 6. 1. 6. 6. 6. 6. 6.
 6. 5. 6. 4. 5. 3. 4. 6. 5. 5. 6. 6. 5. 6. 5. 6. 6. 5. 6. 4. 6. 5. 6. 6.
 6. 6. 6. 5. 4. 6. 5. 5. 6. 6. 6. 6. 5. 6. 3. 5. 5. 6. 6. 6. 5. 5. 5. 4.
 5. 5. 5. 6. 5. 5. 2. 4. 5. 6. 6. 4. 6. 6. 6. 5. 6. 4. 6. 5. 5. 5. 6. 5.
 5. 6. 5. 6. 6. 6. 4. 6. 5. 6. 6. 5. 6. 5. 6. 5. 5. 5. 4. 5. 5. 5. 5. 6.
 6. 6. 6. 5. 4. 6. 3. 5. 4. 6. 6. 6. 4. 6. 6. 6. 6. 5. 6. 6. 6. 5. 6. 6.
 6. 5. 5. 4. 5. 6. 5. 5. 6. 5. 6. 5. 5. 6. 6. 6. 6. 6. 6. 4. 6. 6. 5. 5.
 6. 5. 6. 5. 6. 6. 6. 5. 5. 6. 5. 5. 6. 6. 6. 5. 5. 5. 4. 6. 6. 6. 5. 5.
 6. 6. 6. 6. 6. 6. 6. 6. 6. 6. 5. 5. 6. 6. 5. 5. 6. 5. 5. 6. 6. 5. 6. 3.
 5. 4. 5. 5. 5. 4. 6. 6. 6. 6. 6. 6. 6. 5. 4. 5. 6. 6. 5. 5. 5. 4. 5. 5.
 6. 4. 6. 4. 6. 4. 6. 6. 6. 6. 5. 5. 6. 6. 6. 6. 5. 6. 6. 6. 6. 5. 6. 6.


In [99]:
predictions = model.predict(X)
predictions

array([[5., 6., 5., 6., 5., 6., 5., 6., 5., 5., 6., 4., 6., 6., 5., 5.,
        6., 6., 1., 6., 6., 6., 6., 6., 6., 5., 6., 4., 5., 3., 4., 6.,
        5., 5., 6., 6., 5., 6., 5., 6., 6., 5., 6., 4., 6., 5., 6., 6.,
        6., 6., 6., 5., 4., 6., 5., 5., 6., 6., 6., 6., 5., 6., 3., 5.,
        5., 6., 6., 6., 5., 5., 5., 4., 5., 5., 5., 6., 5., 5., 2., 4.,
        5., 6., 6., 4., 6., 6., 6., 5., 6., 4., 6., 5., 5., 5., 6., 5.,
        5., 6., 5., 6., 6., 6., 4., 6., 5., 6., 6., 5., 6., 5., 6., 5.,
        5., 5., 4., 5., 5., 5., 5., 6., 6., 6., 6., 5., 4., 6., 3., 5.,
        4., 6., 6., 6., 4., 6., 6., 6., 6., 5., 6., 6., 6., 5., 6., 6.,
        6., 5., 5., 4., 5., 6., 5., 5., 6., 5., 6., 5., 5., 6., 6., 6.,
        6., 6., 6., 4., 6., 6., 5., 5., 6., 5., 6., 5., 6., 6., 6., 5.,
        5., 6., 5., 5., 6., 6., 6., 5., 5., 5., 4., 6., 6., 6., 5., 5.,
        6., 6., 6., 6., 6., 6., 6., 6., 6., 6., 5., 5., 6., 6., 5., 5.,
        6., 5., 5., 6., 6., 5., 6., 3., 5., 4., 5., 5., 5., 4., 

In [72]:
x_min = X.min()
x_max = X.max()

AttributeError: 'list' object has no attribute 'min'