In [75]:
# Imports
import os
import requests
import json
import pandas as pd
import numpy as np
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi
from pathlib import Path
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from pandas.tseries.offsets import DateOffset
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
%matplotlib inline

In [76]:
# Import the OHLCV dataset into a Pandas Dataframe
API_df = pd.read_csv(
    Path("emerging_markets_ohlcv.csv"), 
    index_col='date', 
    infer_datetime_format=True, 
    parse_dates=True
)

# Review the DataFrame
API_df.head()

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-01-21 09:30:00,23.83,23.83,23.83,23.83,100
2015-01-21 11:00:00,23.98,23.98,23.98,23.98,100
2015-01-22 15:00:00,24.42,24.42,24.42,24.42,100
2015-01-22 15:15:00,24.42,24.44,24.42,24.44,200
2015-01-22 15:30:00,24.46,24.46,24.46,24.46,200


In [77]:
project_df = API_df.loc[:,["close"]]

In [78]:
project_df

Unnamed: 0_level_0,close
date,Unnamed: 1_level_1
2015-01-21 09:30:00,23.83
2015-01-21 11:00:00,23.98
2015-01-22 15:00:00,24.42
2015-01-22 15:15:00,24.44
2015-01-22 15:30:00,24.46
...,...
2021-01-22 09:30:00,33.27
2021-01-22 11:30:00,33.35
2021-01-22 13:45:00,33.42
2021-01-22 14:30:00,33.47


In [79]:
# Calculate the daily returns using the closing prices and the pct_change function
project_df["Daily Return"] = project_df["close"].pct_change()

# Display sample data
project_df.head()

Unnamed: 0_level_0,close,Daily Return
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-01-21 09:30:00,23.83,
2015-01-21 11:00:00,23.98,0.006295
2015-01-22 15:00:00,24.42,0.018349
2015-01-22 15:15:00,24.44,0.000819
2015-01-22 15:30:00,24.46,0.000818


In [80]:
# Drop all NaN values from the DataFrame
project_df = project_df.dropna()

# Review the DataFrame
display(project_df.head())
display(project_df.tail())

Unnamed: 0_level_0,close,Daily Return
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-01-21 11:00:00,23.98,0.006295
2015-01-22 15:00:00,24.42,0.018349
2015-01-22 15:15:00,24.44,0.000819
2015-01-22 15:30:00,24.46,0.000818
2015-01-26 12:30:00,24.33,-0.005315


Unnamed: 0_level_0,close,Daily Return
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-01-22 09:30:00,33.27,-0.006866
2021-01-22 11:30:00,33.35,0.002405
2021-01-22 13:45:00,33.42,0.002099
2021-01-22 14:30:00,33.47,0.001496
2021-01-22 15:45:00,33.44,-0.000896


In [85]:
# Initialize investment signal tally

investment_signal = 0

# Initialize trade_type column for buys and sell
project_df["score"] = np.nan

# Initialize variable to hold the previous_price
previous_price = 0

# Loop through the Pandas DataFrame and initiate a score for each iteration


for index, row in project_df.iterrows():
    
    # add 1 if the current day price is greater than the previous day
    
    if row["close"] < (previous_price*.98)
        investment_signal = investment_signal + 2
        project_df.loc[index, "score"] = 1
        
    if row["close"] < previous_price:
        investment_signal = investment_signal + 1
        project_df.loc[index, "score"] = 1    
          
    # subtract 1 if the current day price is greater than the previous day
    if row["close"] > previous_price:
        investment_signal = investment_signal - 1
        project_df.loc[index, "score"] = 0    
    
    # set the previous_price variable to the close of current row
    previous_price = row["close"]

    # if the index is the last index of the Data
    if index == project_df.index[-1]:
        project_df.loc[index, "score"] = 1

SyntaxError: invalid syntax (636747658.py, line 18)

In [83]:
project_df.fillna
project_df.head(50)

Unnamed: 0_level_0,close,Daily Return,score
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015-01-21 11:00:00,23.98,0.006295,0.0
2015-01-22 15:00:00,24.42,0.018349,0.0
2015-01-22 15:15:00,24.44,0.000819,0.0
2015-01-22 15:30:00,24.46,0.000818,0.0
2015-01-26 12:30:00,24.33,-0.005315,1.0
2015-01-26 14:15:00,24.28,-0.002055,1.0
2015-01-26 14:45:00,24.26,-0.000824,1.0
2015-01-26 15:15:00,24.24,-0.000824,1.0
2015-01-27 10:00:00,24.05,-0.007838,1.0
2015-01-27 10:15:00,24.06,0.000416,0.0


In [29]:
investment_signal

-68

In [88]:
# Initialize trade_type column for buys and sell
project_df['trade_type']


# Loop through the Pandas DataFrame and initiate
for index, row in project_df.iterrows():
    
    # buy if the previous price is 0
    if previous_price == 0:
        project_df.loc[index, "trade_type"] = 1

    # buy if the current day price is less than previous close
    elif row["close"] < previous_price:
        project_df.loc[index, "trade_type"] = 1

    # hold if the current day price is greater than previous close
    elif row["close"] > previous_price:
        project_df.loc[index, "trade_type"] = 0

    # as well, hold if the current day price is equal to previous close
    else:
        project_df.loc[index, "trade_type"] = 0
    
    # set the previous_price variable to the close of current row
    previous_price = row["close"]

    # if the index is the last index of the Data
    if index == project_df.index[-1]:
        project_df.loc[index, "trade_type"] = 1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [89]:
project_df

Unnamed: 0_level_0,close,Daily Return,score,trade_type
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015-01-21 11:00:00,23.98,0.006295,0.0,1.0
2015-01-22 15:00:00,24.42,0.018349,0.0,0.0
2015-01-22 15:15:00,24.44,0.000819,0.0,0.0
2015-01-22 15:30:00,24.46,0.000818,0.0,0.0
2015-01-26 12:30:00,24.33,-0.005315,1.0,1.0
...,...,...,...,...
2021-01-22 09:30:00,33.27,-0.006866,1.0,1.0
2021-01-22 11:30:00,33.35,0.002405,0.0,0.0
2021-01-22 13:45:00,33.42,0.002099,0.0,0.0
2021-01-22 14:30:00,33.47,0.001496,0.0,0.0


In [90]:
# Preview the DataFrame
project_df.head()

# Count how many firms are in each category
project_df['close'].value_counts()

25.640    18
26.830    17
26.510    16
26.590    15
26.800    15
          ..
22.375     1
26.185     1
20.800     1
22.570     1
18.560     1
Name: close, Length: 1165, dtype: int64

In [91]:
project_df.dropna

<bound method DataFrame.dropna of                      close  Daily Return  score  trade_type
date                                                       
2015-01-21 11:00:00  23.98      0.006295    0.0         1.0
2015-01-22 15:00:00  24.42      0.018349    0.0         0.0
2015-01-22 15:15:00  24.44      0.000819    0.0         0.0
2015-01-22 15:30:00  24.46      0.000818    0.0         0.0
2015-01-26 12:30:00  24.33     -0.005315    1.0         1.0
...                    ...           ...    ...         ...
2021-01-22 09:30:00  33.27     -0.006866    1.0         1.0
2021-01-22 11:30:00  33.35      0.002405    0.0         0.0
2021-01-22 13:45:00  33.42      0.002099    0.0         0.0
2021-01-22 14:30:00  33.47      0.001496    0.0         0.0
2021-01-22 15:45:00  33.44     -0.000896    1.0         1.0

[4322 rows x 4 columns]>

In [92]:
# Import module
from sklearn.model_selection import train_test_split

# Split training and testing sets
# Create X, or features DataFrame
features = project_df[['score']]

# Create y, or target DataFrame
target = project_df['trade_type']

# Use train_test_split to separate the data
training_features, testing_features, training_targets, testing_targets = train_test_split(features, target)

In [93]:
training_targets.head(50)

date
2020-12-08 15:45:00    1.0
2017-03-23 10:30:00    0.0
2015-11-19 10:00:00    0.0
2019-02-07 13:30:00    1.0
2020-03-23 09:30:00    1.0
2016-11-07 13:15:00    0.0
2018-11-27 13:15:00    0.0
2015-08-12 13:00:00    0.0
2016-01-22 15:00:00    1.0
2019-04-04 15:00:00    0.0
2016-04-18 15:30:00    1.0
2017-07-31 15:45:00    0.0
2019-10-02 14:30:00    1.0
2019-04-30 15:45:00    0.0
2015-07-07 13:45:00    0.0
2015-08-14 13:00:00    0.0
2015-10-26 10:45:00    0.0
2017-08-17 14:30:00    1.0
2020-07-25 10:15:00    0.0
2017-04-12 12:00:00    1.0
2018-04-16 14:00:00    0.0
2020-12-17 09:30:00    0.0
2018-04-27 14:45:00    1.0
2016-12-30 15:00:00    1.0
2015-04-02 12:00:00    1.0
2020-01-16 11:30:00    0.0
2016-02-03 15:00:00    0.0
2015-04-07 15:45:00    1.0
2018-05-03 13:30:00    0.0
2016-09-21 13:45:00    0.0
2016-01-29 12:00:00    0.0
2017-09-27 10:45:00    1.0
2017-02-06 11:45:00    0.0
2019-05-16 15:15:00    1.0
2017-10-30 11:30:00    1.0
2018-08-20 11:15:00    1.0
2017-09-12 10:15:00    

In [94]:
training_features



Unnamed: 0_level_0,score
date,Unnamed: 1_level_1
2020-12-08 15:45:00,1.0
2017-03-23 10:30:00,0.0
2015-11-19 10:00:00,0.0
2019-02-07 13:30:00,1.0
2020-03-23 09:30:00,1.0
...,...
2018-09-04 10:45:00,1.0
2016-02-17 10:45:00,0.0
2018-01-31 09:45:00,1.0
2020-07-18 12:15:00,0.0


In [95]:
logistic_regression_model = LogisticRegression()

In [96]:
# Fit the model
logistic_regression_model.fit(training_features, training_targets)

ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

In [54]:
# Generate predictions from the model we just fit
predictions = logistic_regression_model.predict(training_features)

# Convert those predictions (and actual values) to a DataFrame
results_df = pd.DataFrame({"Prediction": predictions, "Actual": training_targets})
results_df

ValueError: Input contains NaN, infinity or a value too large for dtype('float64').