In [56]:
# Import dependencies
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import StandardScaler
import statsmodels.api as sm
from sklearn import linear_model

In [57]:
# Import csv to dataframe
df = pd.read_csv(Path('./Modeling/2nd_modeling_target_sleep_quality/df_transformed.csv'))
df.head()

Unnamed: 0,Sleep quality,Time in bed,Activity (steps),Stressful day Total,Drank coffee Total,Drank tea Total,Ate late Total,Worked out Total,Fell asleep
0,65,452,0,0,0,0,0,0,2231
1,89,530,0,0,1,1,0,0,2238
2,100,512,0,0,0,0,0,0,2257
3,87,442,0,0,0,1,0,0,2132
4,93,483,0,1,1,1,0,0,13


In [59]:
# Check the Shape of the dataframe as using get_dummies
df.shape

(887, 9)

In [60]:
# Set X to all columns but sleep quality and set y to Sleep quality
X = df.drop('Sleep quality', axis=1)

y = df.iloc[:,0].copy()

In [61]:
# Train test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)


In [63]:
# Verify X is what we expect it to be
X[:5]

Unnamed: 0,Time in bed,Activity (steps),Stressful day Total,Drank coffee Total,Drank tea Total,Ate late Total,Worked out Total,Fell asleep
0,452,0,0,0,0,0,0,2231
1,530,0,0,1,1,0,0,2238
2,512,0,0,0,0,0,0,2257
3,442,0,0,0,1,0,0,2132
4,483,0,1,1,1,0,0,13


In [64]:
# Rename X back to df
df = pd.DataFrame(X)
df.head()

Unnamed: 0,Time in bed,Activity (steps),Stressful day Total,Drank coffee Total,Drank tea Total,Ate late Total,Worked out Total,Fell asleep
0,452,0,0,0,0,0,0,2231
1,530,0,0,1,1,0,0,2238
2,512,0,0,0,0,0,0,2257
3,442,0,0,0,1,0,0,2132
4,483,0,1,1,1,0,0,13


In [65]:
# Verify y matches the "Sleep_Quality" column dropped from X 
y.head()

0     65
1     89
2    100
3     87
4     93
Name: Sleep quality, dtype: int64

In [66]:
# Notice the shape of y
y.shape

(887,)

In [67]:
# Instantiate the LinearRegression Model
linear = LinearRegression()
linear

LinearRegression()

In [68]:
# Train the Model
linear.fit(X_train, y_train)

LinearRegression()

In [45]:
# Predict outcomes for test data set
y_pred = linear.predict(X_test)
pd.DataFrame({"Prediction": y_pred, "Actual": y_test})

Unnamed: 0,Prediction,Actual
522,80.772393,79
314,79.054583,86
768,79.260579,73
320,71.616312,72
809,70.284922,74
...,...,...
35,76.282396,64
46,78.570687,80
255,77.138954,78
670,81.788098,96


In [46]:
# Score the Model
linear.score(X_train, y_train)


0.603248893596652

In [16]:
# Instantiate the Lasso Model
classo = Lasso()
classo

Lasso()

In [17]:
# Train the data
classo.fit(X_train, y_train)

Lasso()

In [18]:
# Predict outcomes for test data set
y_pred = classo.predict(X_test)
pd.DataFrame({"Prediction": y_pred, "Actual": y_test})

Unnamed: 0,Prediction,Actual
522,79.229796,79
314,78.748267,86
768,77.738577,73
320,69.543845,72
809,69.110962,74
...,...,...
35,75.095852,64
46,77.403848,80
255,75.095852,78
670,80.663491,96


In [19]:
# Score the Model
classo.score(X_train, y_train)

0.5945859973126337

In [20]:
# .....................

In [21]:
# Instantiate the Ridge Model
ridge = Ridge()
ridge

Ridge()

In [22]:
# Train the Model
ridge.fit(X_train, y_train)

Ridge()

In [23]:
# Predict outcomes for test data set
y_pred = ridge.predict(X_test)
pd.DataFrame({"Prediction": y_pred, "Actual": y_test})

Unnamed: 0,Prediction,Actual
522,80.755582,79
314,79.061942,86
768,79.258979,73
320,71.601586,72
809,70.289281,74
...,...,...
35,76.271263,64
46,78.574384,80
255,77.124076,78
670,81.777230,96


In [24]:
# Score the model
ridge.score(X_train, y_train)

0.6032474131667027

In [25]:
# ................

In [26]:
# Instantiate the DecisionTreeRegressorModel
decision = DecisionTreeRegressor()

In [27]:
# Train the Model
decision.fit(X_train, y_train)

DecisionTreeRegressor()

In [28]:
# Predict outcomes for test data set
y_pred = decision.predict(X_test)
pd.DataFrame({"Prediction": y_pred, "Actual": y_test})

Unnamed: 0,Prediction,Actual
522,70.0,79
314,93.0,86
768,71.0,73
320,82.0,72
809,78.0,74
...,...,...
35,79.0,64
46,77.0,80
255,79.0,78
670,64.0,96


In [29]:
# Score the Model
decision.score(X_train, y_train)

1.0

In [30]:
#................

In [47]:
# Instantiate Multi_Variable_Linear_Regression
model = linear_model.LinearRegression()

In [48]:
# Train the Model
model.fit(X_train, y_train)

LinearRegression()

In [49]:
# Predict outcomes for test data set
y_pred = model.predict(X_test)
print(y_pred.shape)

(222,)


In [50]:
# with statsmodels
X = sm.add_constant(X) # adding a constant
 
model = sm.OLS(y, X).fit()
predictions = model.predict(X) 

# with sklearn
regr = linear_model.LinearRegression()
regr.fit(X, y)

print('Intercept: \n', regr.intercept_)
print('Coefficients: \n', regr.coef_)

print_model = model.summary()
print(print_model)

  x = pd.concat(x[::order], 1)


Intercept: 
 13.745189651845848
Coefficients: 
 [ 0.00000000e+00  1.42124026e-01 -8.30921821e-04  1.22791860e+00
 -6.70360733e-01  3.06011252e+00  1.53656065e+00 -1.85742943e-03
 -1.46657665e-03]
                            OLS Regression Results                            
Dep. Variable:          Sleep quality   R-squared:                       0.553
Model:                            OLS   Adj. R-squared:                  0.549
Method:                 Least Squares   F-statistic:                     135.6
Date:                Wed, 05 Jan 2022   Prob (F-statistic):          9.38e-148
Time:                        18:04:11   Log-Likelihood:                -3328.5
No. Observations:                 887   AIC:                             6675.
Df Residuals:                     878   BIC:                             6718.
Df Model:                           8                                         
Covariance Type:            nonrobust                                         
              

In [51]:
# Score the model
regr.score(X, y)

0.5527602473145632

In [52]:
#..................

In [53]:
#Use joblib to save the best model which is linear at 0.603248893596652 accuracy
from joblib import load, dump
dump(linear, "linear.joblib")

import os
os.getcwd()
# Reference
# model = load("linear.joblib")

'/Users/danielsquires/Desktop/Sleep_Analysis'