In [21]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error
import pandas_ta as ta
import pandas as pd
import numpy as np
import joblib

In [22]:
df_nflx_intraday = pd.read_csv('data_nflx_intraday.csv', index_col=0, parse_dates=True) # will be our working df
df = df_nflx_intraday.copy() # working df
df.columns = df.columns.str.lower()

In [23]:
df.head()

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-07-27 04:00:00,423.01,425.61,423.01,425.0,1150.0
2023-07-27 04:01:00,425.43,427.26,425.0,427.26,677.0
2023-07-27 04:02:00,427.19,427.19,426.0,426.7,341.0
2023-07-27 04:03:00,426.76,427.01,426.65,426.84,140.0
2023-07-27 04:04:00,426.84,427.64,426.84,427.49,117.0


In [24]:
df['price_increased'] = np.where(df['close'].diff() > 0, 1, 0)

In [25]:
df.head()

Unnamed: 0_level_0,open,high,low,close,volume,price_increased
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-07-27 04:00:00,423.01,425.61,423.01,425.0,1150.0,0
2023-07-27 04:01:00,425.43,427.26,425.0,427.26,677.0,1
2023-07-27 04:02:00,427.19,427.19,426.0,426.7,341.0,0
2023-07-27 04:03:00,426.76,427.01,426.65,426.84,140.0,1
2023-07-27 04:04:00,426.84,427.64,426.84,427.49,117.0,1


In [26]:
# adding a few indicators
df['rsi'] = ta.rsi(df['close'], length=30)
df['ad'] = ta.ad(df['high'], df['low'], df['close'], df['volume']) # Accumulation/Distribution Line
bbands = ta.bbands(df['close'], length=10, mamode="ema") #  Returns: pd.DataFrame: lower, mid, upper, bandwidth, and percent columns.
df[['bbandsl', 'bbandsm', 'bbandsu','bbandsb', 'bbandsp']] = bbands[['BBL_10_2.0', 'BBM_10_2.0', 'BBU_10_2.0', 'BBB_10_2.0', 'BBP_10_2.0']]

# getting rid of nan
df.dropna(inplace=True)

In [27]:
# dividing X and y
X = df.drop(columns=['price_increased'])
y = df[['price_increased']]

In [28]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

logistic_model = LogisticRegression()
logistic_model.fit(X_train, y_train)

y_pred_logistic = logistic_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred_logistic)
print("Accuracy:", accuracy)

Accuracy: 0.5094393592677345


  y = column_or_1d(y, warn=True)


In [29]:
joblib.dump(logistic_model, 'logistic_model.pkl')

['logistic_model.pkl']