## Crypto Sentiment Analyzer

In [43]:
# Imports
import os
import requests
import json
import numpy as np
import pandas as pd
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from pandas.tseries.offsets import DateOffset
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
%matplotlib inline

In [44]:
load_dotenv()

True

## Establish Baseline Performance

#### Step 1: Import the OHLCV dataset into a Pandas DataFrame.

In [45]:
# Import the OHLCV dataset into a 

btc_url = 'https://api.alternative.me/v2/ticker/Bitcoin/?convert=USD'
eth_url = 'https://api.alternative.me/v2/ticker/Ethereum/?convert=USD'
bnb_url = 'https://api.alternative.me/v2/ticker/BinanceCoin/?convert=USD'
xrp_url = 'https://api.alternative.me/v2/ticker/ripple/?convert=USD'
luna_url = 'https://api.alternative.me/v2/ticker/terra-luna/?convert=USD'
cardano_url ='https://api.alternative.me/v2/ticker/cardano/?convert=USD'
avax_url = 'https://api.alternative.me/v2/ticker/avalanche-2/?convert=USD'
dot_url = 'https://api.alternative.me/v2/ticker/polkadot/?convert=USD'
doge_url = 'https://api.alternative.me/v2/ticker/doge/?convert=USD'
shiba_url= 'https://api.alternative.me/v2/ticker/shiba-inu/?convert=USD'
api_key = os.getenv('API_Key')

In [46]:
btc_response = requests.get(btc_url).json()
eth_response= requests.get(eth_url).json()
bnb_response= requests.get(bnb_url).json()
xrp_response= requests.get(xrp_url).json()
luna_response = requests.get(luna_url).json()
cardano_response = requests.get(cardano_url).json()
avax_response = requests.get(avax_url).json()
dot_response= requests.get(dot_url).json()
doge_response = requests.get(doge_url).json()
shiba_response = requests.get(shiba_url).json()



In [47]:
alpaca_api_key= os.getenv('API_Key')
alpaca_secret_key = os.getenv('API_Secret_Key')


alpaca= tradeapi.REST(
alpaca_api_key,
alpaca_secret_key,
api_version= "v2")

In [48]:

# Filter the date index and close columns
tickers = ['BTC']
# Use the pct_change function to generate returns from close prices
timeframe = '1D'

limit_rows= 1000
# Drop all NaN values from the DataFrame

start_date = pd.Timestamp('2018-01-01', tz= "America/New_York").isoformat()
end_date = pd.Timestamp('2022-03-15', tz="America/New_York").isoformat()


In [49]:
start_date

'2018-01-01T00:00:00-05:00'

In [50]:
from alpaca_trade_api.rest import TimeFrame, TimeFrameUnit
prices_df = alpaca.get_bars(
tickers,
# timeframe,
TimeFrame(1, TimeFrameUnit.Day), 
# start=start_date,
# end=end_date,
start='2018-01-01', 
end='2022-03-15',
limit=limit_rows).df

prices_df.head(100)

Unnamed: 0_level_0,open,high,low,close,volume,trade_count,vwap,symbol
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-04-16 04:00:00+00:00,100.89,100.89,98.77,99.2447,4497,186,100.273227,BTC
2021-04-19 04:00:00+00:00,99.12,99.13,98.985,99.1012,4551,96,99.104105,BTC
2021-04-20 04:00:00+00:00,98.79,99.3,98.79,99.0441,3736,82,98.932931,BTC
2021-04-21 04:00:00+00:00,99.02,99.01,98.9,98.9705,997,42,98.950429,BTC
2021-04-22 04:00:00+00:00,99.04,99.04,98.86,99.001,1599,68,98.958802,BTC
2021-04-23 04:00:00+00:00,99.07,99.01,98.8801,98.98,1903,42,98.978855,BTC
2021-04-26 04:00:00+00:00,99.0,99.0,98.9082,98.97,2503,77,98.956571,BTC
2021-04-27 04:00:00+00:00,98.95,98.95,98.82,98.8326,1317,41,98.901892,BTC
2021-04-28 04:00:00+00:00,98.88,98.8864,98.76,98.8864,1001,58,98.833692,BTC
2021-04-29 04:00:00+00:00,98.81,98.9,98.8,98.855,2466,78,98.850832,BTC


In [51]:
project_df = prices_df.loc[:,["close"]]

In [52]:
project_df

Unnamed: 0_level_0,close
timestamp,Unnamed: 1_level_1
2021-04-16 04:00:00+00:00,99.2447
2021-04-19 04:00:00+00:00,99.1012
2021-04-20 04:00:00+00:00,99.0441
2021-04-21 04:00:00+00:00,98.9705
2021-04-22 04:00:00+00:00,99.001
2021-04-23 04:00:00+00:00,98.98
2021-04-26 04:00:00+00:00,98.97
2021-04-27 04:00:00+00:00,98.8326
2021-04-28 04:00:00+00:00,98.8864
2021-04-29 04:00:00+00:00,98.855


 #### Step 3: Split the data into training and testing datasets.

In [53]:
# Calculate the daily returns using the closing prices and the pct_change function
project_df["Daily Return"] = project_df["close"].pct_change()

# Display sample data
project_df.head()

Unnamed: 0_level_0,close,Daily Return
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-04-16 04:00:00+00:00,99.2447,
2021-04-19 04:00:00+00:00,99.1012,-0.001446
2021-04-20 04:00:00+00:00,99.0441,-0.000576
2021-04-21 04:00:00+00:00,98.9705,-0.000743
2021-04-22 04:00:00+00:00,99.001,0.000308


In [54]:
# Drop all NaN values from the DataFrame
project_df = project_df.dropna()

# Review the DataFrame
display(project_df.head())
display(project_df.tail())

Unnamed: 0_level_0,close,Daily Return
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-04-19 04:00:00+00:00,99.1012,-0.001446
2021-04-20 04:00:00+00:00,99.0441,-0.000576
2021-04-21 04:00:00+00:00,98.9705,-0.000743
2021-04-22 04:00:00+00:00,99.001,0.000308
2021-04-23 04:00:00+00:00,98.98,-0.000212


Unnamed: 0_level_0,close,Daily Return
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-05-04 04:00:00+00:00,98.985,0.000408
2021-05-05 04:00:00+00:00,99.06,0.000758
2021-05-06 04:00:00+00:00,99.075,0.000151
2021-05-07 04:00:00+00:00,99.1475,0.000732
2021-05-10 04:00:00+00:00,99.11,-0.000378


#### Step 5: Review the classification report associated with the SVC model predictions.

In [55]:
# Initialize investment signal tally

investment_signal = 0

# Initialize trade_type column for buys and sell
project_df["score"] = np.nan

# Initialize variable to hold the previous_price
previous_price = 0

# Loop through the Pandas DataFrame and initiate a score for each iteration


for index, row in project_df.iterrows():
    
    # add 1 if the current day price is greater than the previous day
    
    if row["close"] < previous_price:
        investment_signal = investment_signal + 2
        project_df.loc[index, "score"] = 1
        
    if row["close"] < previous_price:
        investment_signal = investment_signal + 1
        project_df.loc[index, "score"] = 1    
          
    # subtract 1 if the current day price is greater than the previous day
    if row["close"] > previous_price:
        investment_signal = investment_signal - 1
        project_df.loc[index, "score"] = 0    
    
    # set the previous_price variable to the close of current row
    previous_price = row["close"]

    # if the index is the last index of the Data
    if index == project_df.index[-1]:
        project_df.loc[index, "score"] = 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)


In [56]:
project_df.fillna
project_df.head(50)


Unnamed: 0_level_0,close,Daily Return,score
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-04-19 04:00:00+00:00,99.1012,-0.001446,0.0
2021-04-20 04:00:00+00:00,99.0441,-0.000576,1.0
2021-04-21 04:00:00+00:00,98.9705,-0.000743,1.0
2021-04-22 04:00:00+00:00,99.001,0.000308,0.0
2021-04-23 04:00:00+00:00,98.98,-0.000212,1.0
2021-04-26 04:00:00+00:00,98.97,-0.000101,1.0
2021-04-27 04:00:00+00:00,98.8326,-0.001388,1.0
2021-04-28 04:00:00+00:00,98.8864,0.000544,0.0
2021-04-29 04:00:00+00:00,98.855,-0.000318,1.0
2021-04-30 04:00:00+00:00,98.8893,0.000347,0.0


In [57]:
investment_signal

12


#### Tune the Baseline Trading Algorithm


In [61]:
# Initialize trade_type column for buys and sell
project_df["trade_type"]= np.nan


# Loop through the Pandas DataFrame and initiate
for index, row in project_df.iterrows():
    
    # buy if the previous price is 0
    if previous_price == 0:
        project_df.loc[index, "trade_type"] = 1

    # buy if the current day price is less than previous close
    elif row["close"] < previous_price:
        project_df.loc[index, "trade_type"] = 1

    # hold if the current day price is greater than previous close
    elif row["close"] > previous_price:
        project_df.loc[index, "trade_type"] = 0

    # as well, hold if the current day price is equal to previous close
    else:
        project_df.loc[index, "trade_type"] = 0
    
    # set the previous_price variable to the close of current row
    previous_price = row["close"]

    # if the index is the last index of the Data
    if index == project_df.index[-1]:
        project_df.loc[index, "trade_type"] = 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [62]:
project_df

Unnamed: 0_level_0,close,Daily Return,score,trade_type
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-04-19 04:00:00+00:00,99.1012,-0.001446,0.0,1.0
2021-04-20 04:00:00+00:00,99.0441,-0.000576,1.0,1.0
2021-04-21 04:00:00+00:00,98.9705,-0.000743,1.0,1.0
2021-04-22 04:00:00+00:00,99.001,0.000308,0.0,0.0
2021-04-23 04:00:00+00:00,98.98,-0.000212,1.0,1.0
2021-04-26 04:00:00+00:00,98.97,-0.000101,1.0,1.0
2021-04-27 04:00:00+00:00,98.8326,-0.001388,1.0,1.0
2021-04-28 04:00:00+00:00,98.8864,0.000544,0.0,0.0
2021-04-29 04:00:00+00:00,98.855,-0.000318,1.0,1.0
2021-04-30 04:00:00+00:00,98.8893,0.000347,0.0,0.0


In [63]:
# Preview the DataFrame
project_df.head()

# Count how many firms are in each category
project_df['close'].value_counts()

99.0600    1
99.1475    1
99.0010    1
98.8326    1
98.8550    1
98.9700    1
98.9800    1
99.1012    1
99.1100    1
98.9705    1
99.0441    1
98.9850    1
98.9446    1
99.0750    1
98.8864    1
98.8893    1
Name: close, dtype: int64

In [64]:
project_df.dropna

<bound method DataFrame.dropna of                              close  Daily Return  score  trade_type
timestamp                                                          
2021-04-19 04:00:00+00:00  99.1012     -0.001446    0.0         1.0
2021-04-20 04:00:00+00:00  99.0441     -0.000576    1.0         1.0
2021-04-21 04:00:00+00:00  98.9705     -0.000743    1.0         1.0
2021-04-22 04:00:00+00:00  99.0010      0.000308    0.0         0.0
2021-04-23 04:00:00+00:00  98.9800     -0.000212    1.0         1.0
2021-04-26 04:00:00+00:00  98.9700     -0.000101    1.0         1.0
2021-04-27 04:00:00+00:00  98.8326     -0.001388    1.0         1.0
2021-04-28 04:00:00+00:00  98.8864      0.000544    0.0         0.0
2021-04-29 04:00:00+00:00  98.8550     -0.000318    1.0         1.0
2021-04-30 04:00:00+00:00  98.8893      0.000347    0.0         0.0
2021-05-03 04:00:00+00:00  98.9446      0.000559    0.0         0.0
2021-05-04 04:00:00+00:00  98.9850      0.000408    0.0         0.0
2021-05-05 04:

In [65]:
# Import module
from sklearn.model_selection import train_test_split

# Split training and testing sets
# Create X, or features DataFrame
features = project_df[['score']]

# Create y, or target DataFrame
target = project_df['trade_type']

# Use train_test_split to separate the data
training_features, testing_features, training_targets, testing_targets = train_test_split(features, target)

In [66]:
training_targets.head(50)

timestamp
2021-04-29 04:00:00+00:00    1.0
2021-04-27 04:00:00+00:00    1.0
2021-04-22 04:00:00+00:00    0.0
2021-04-30 04:00:00+00:00    0.0
2021-04-26 04:00:00+00:00    1.0
2021-04-21 04:00:00+00:00    1.0
2021-05-10 04:00:00+00:00    1.0
2021-05-04 04:00:00+00:00    0.0
2021-05-05 04:00:00+00:00    0.0
2021-04-28 04:00:00+00:00    0.0
2021-04-20 04:00:00+00:00    1.0
2021-05-03 04:00:00+00:00    0.0
Name: trade_type, dtype: float64

In [67]:
training_features

Unnamed: 0_level_0,score
timestamp,Unnamed: 1_level_1
2021-04-29 04:00:00+00:00,1.0
2021-04-27 04:00:00+00:00,1.0
2021-04-22 04:00:00+00:00,0.0
2021-04-30 04:00:00+00:00,0.0
2021-04-26 04:00:00+00:00,1.0
2021-04-21 04:00:00+00:00,1.0
2021-05-10 04:00:00+00:00,1.0
2021-05-04 04:00:00+00:00,0.0
2021-05-05 04:00:00+00:00,0.0
2021-04-28 04:00:00+00:00,0.0


In [68]:
logistic_regression_model = LogisticRegression()

In [69]:
# Fit the model
logistic_regression_model.fit(training_features, training_targets)

LogisticRegression()

In [70]:
# Fit the model
logistic_regression_model.fit(training_features, training_targets)

LogisticRegression()

In [72]:
# Generate predictions from the model we just fit
predictions = logistic_regression_model.predict(training_features)

# Convert those predictions (and actual values) to a DataFrame
results_df = pd.DataFrame({"Prediction": predictions, "Actual": training_targets})
results_df

Unnamed: 0_level_0,Prediction,Actual
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1
2021-04-29 04:00:00+00:00,1.0,1.0
2021-04-27 04:00:00+00:00,1.0,1.0
2021-04-22 04:00:00+00:00,0.0,0.0
2021-04-30 04:00:00+00:00,0.0,0.0
2021-04-26 04:00:00+00:00,1.0,1.0
2021-04-21 04:00:00+00:00,1.0,1.0
2021-05-10 04:00:00+00:00,1.0,1.0
2021-05-04 04:00:00+00:00,0.0,0.0
2021-05-05 04:00:00+00:00,0.0,0.0
2021-04-28 04:00:00+00:00,0.0,0.0
