In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import hvplot.pandas
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.preprocessing import StandardScaler
from pandas.tseries.offsets import DateOffset
from sklearn.metrics import classification_report

In [45]:
# Read the csv file 
amzn_df = pd.read_csv(Path("./Resources/amzn.csv"))

# Convert 'time' column from timestamp (seconds since epoch) to acutal time
amzn_df ['time'] = pd.to_datetime(
    amzn_df['time'],
    unit = 's',
    infer_datetime_format=True,
    utc=True
)    
# Convert timezone from UTC to Eastern Time
amzn_df['time'] = amzn_df['time'].dt.tz_convert('US/Eastern')

# Set 'time' column as the index 
amzn_df.set_index('time', inplace=True)
                      
# Review df
amzn_df.head()          

Unnamed: 0_level_0,open,high,low,close,VWAP,Upper Band #1,Lower Band #1,Upper Band #2,Lower Band #2,Upper Band #3,...,Volume,Volume MA,EMA,Smoothing Line,Developing Poc,Developing VA High,Developing VA Low,Developing Poc.1,Developing VA High.1,Developing VA Low.1
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2012-03-14 09:30:00-04:00,9.1825,9.215995,9.057,9.112999,9.128665,9.128665,9.128665,9.128665,9.128665,9.128665,...,73994680,107206355.0,9.59791,9.60661,,,,,,
2012-03-15 09:30:00-04:00,9.101,9.2215,9.015,9.2215,9.152667,9.152667,9.152667,9.152667,9.152667,9.152667,...,83212340,103564913.0,9.594164,9.602464,,,,,,
2012-03-16 09:30:00-04:00,9.164,9.284,9.117501,9.2525,9.218,9.218,9.218,9.218,9.218,9.218,...,98696480,95934268.0,9.590765,9.598431,,,,,,
2012-03-19 09:30:00-04:00,9.1725,9.334,9.15,9.276,9.253333,9.253333,9.253333,9.253333,9.253333,9.253333,...,78080380,92419724.0,9.587633,9.594651,,,,,,
2012-03-20 09:30:00-04:00,9.244,9.7205,9.144,9.6165,9.493667,9.493667,9.493667,9.493667,9.493667,9.493667,...,183346180,94828072.0,9.58792,9.591678,,,,,,


In [46]:
#Droping NAN Values 
amzn_df = amzn_df.dropna(axis=1)
#Review the data frame
amzn_df.head()

Unnamed: 0_level_0,open,high,low,close,VWAP,Upper Band #1,Lower Band #1,Upper Band #2,Lower Band #2,Upper Band #3,Lower Band #3,Plot,Plot.1,Plot.2,Volume,Volume MA,EMA,Smoothing Line
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2012-03-14 09:30:00-04:00,9.1825,9.215995,9.057,9.112999,9.128665,9.128665,9.128665,9.128665,9.128665,9.128665,9.128665,9.215995,9.057,9.1825,73994680,107206355.0,9.59791,9.60661
2012-03-15 09:30:00-04:00,9.101,9.2215,9.015,9.2215,9.152667,9.152667,9.152667,9.152667,9.152667,9.152667,9.152667,9.2215,9.015,9.101,83212340,103564913.0,9.594164,9.602464
2012-03-16 09:30:00-04:00,9.164,9.284,9.117501,9.2525,9.218,9.218,9.218,9.218,9.218,9.218,9.218,9.284,9.117501,9.164,98696480,95934268.0,9.590765,9.598431
2012-03-19 09:30:00-04:00,9.1725,9.334,9.15,9.276,9.253333,9.253333,9.253333,9.253333,9.253333,9.253333,9.253333,9.334,9.15,9.1725,78080380,92419724.0,9.587633,9.594651
2012-03-20 09:30:00-04:00,9.244,9.7205,9.144,9.6165,9.493667,9.493667,9.493667,9.493667,9.493667,9.493667,9.493667,9.7205,9.144,9.244,183346180,94828072.0,9.58792,9.591678


In [47]:
# Filter column and only keep ones needed
amzn_df = amzn_df.drop(columns=["Upper Band #1", "Lower Band #1", "Upper Band #2", "Lower Band #2", "Upper Band #3", "Lower Band #3","Plot", "Plot.1", "Plot.2","Smoothing Line"], axis=1)

# Review the DataFrame
amzn_df.head()


Unnamed: 0_level_0,open,high,low,close,VWAP,Volume,Volume MA,EMA
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2012-03-14 09:30:00-04:00,9.1825,9.215995,9.057,9.112999,9.128665,73994680,107206355.0,9.59791
2012-03-15 09:30:00-04:00,9.101,9.2215,9.015,9.2215,9.152667,83212340,103564913.0,9.594164
2012-03-16 09:30:00-04:00,9.164,9.284,9.117501,9.2525,9.218,98696480,95934268.0,9.590765
2012-03-19 09:30:00-04:00,9.1725,9.334,9.15,9.276,9.253333,78080380,92419724.0,9.587633
2012-03-20 09:30:00-04:00,9.244,9.7205,9.144,9.6165,9.493667,183346180,94828072.0,9.58792


In [53]:
# Read the csv file 
meta_df = pd.read_csv(Path("./Resources/meta.csv"))

# Convert 'time' column from timestamp (seconds since epoch) to acutal time
meta_df ['time'] = pd.to_datetime(
    meta_df['time'],
    unit = 's',
    infer_datetime_format=True,
    utc=True
)    
# Convert timezone from UTC to Eastern Time
meta_df['time'] = meta_df['time'].dt.tz_convert('US/Eastern')

# Set 'time' column as the index 
meta_df.set_index('time', inplace=True)
                      
# Review df
meta_df.head()   

Unnamed: 0_level_0,open,high,low,close,VWAP,Upper Band #1,Lower Band #1,Upper Band #2,Lower Band #2,Upper Band #3,...,Volume,Volume MA,EMA,Smoothing Line,Developing Poc,Developing VA High,Developing VA Low,Developing Poc.1,Developing VA High.1,Developing VA Low.1
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2012-05-18 09:30:00-04:00,42.05,45.0,38.0,38.2318,40.4106,40.4106,40.4106,40.4106,40.4106,40.4106,...,580587776,,,,,,,,,
2012-05-21 09:30:00-04:00,36.53,36.66,33.0,34.03,34.563333,34.563333,34.563333,34.563333,34.563333,34.563333,...,168309808,,,,,,,,,
2012-05-22 09:30:00-04:00,32.61,33.59,30.94,31.0,31.843333,31.843333,31.843333,31.843333,31.843333,31.843333,...,102053808,,,,,,,,,
2012-05-23 09:30:00-04:00,31.37,32.5,31.36,32.0,31.953333,31.953333,31.953333,31.953333,31.953333,31.953333,...,73721120,,,,,,,,,
2012-05-24 09:30:00-04:00,32.95,33.21001,31.77,33.03,32.670003,32.670003,32.670003,32.670003,32.670003,32.670003,...,50275872,,,,,,,,,


In [54]:
#Droping NAN Values 
meta_df = meta_df.dropna(axis=1)
#Review the data frame
meta_df.head()

Unnamed: 0_level_0,open,high,low,close,VWAP,Upper Band #1,Lower Band #1,Upper Band #2,Lower Band #2,Upper Band #3,Lower Band #3,Plot,Plot.1,Plot.2,Volume
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2012-05-18 09:30:00-04:00,42.05,45.0,38.0,38.2318,40.4106,40.4106,40.4106,40.4106,40.4106,40.4106,40.4106,45.0,38.0,42.05,580587776
2012-05-21 09:30:00-04:00,36.53,36.66,33.0,34.03,34.563333,34.563333,34.563333,34.563333,34.563333,34.563333,34.563333,36.66,33.0,36.53,168309808
2012-05-22 09:30:00-04:00,32.61,33.59,30.94,31.0,31.843333,31.843333,31.843333,31.843333,31.843333,31.843333,31.843333,33.59,30.94,32.61,102053808
2012-05-23 09:30:00-04:00,31.37,32.5,31.36,32.0,31.953333,31.953333,31.953333,31.953333,31.953333,31.953333,31.953333,32.5,31.36,31.37,73721120
2012-05-24 09:30:00-04:00,32.95,33.21001,31.77,33.03,32.670003,32.670003,32.670003,32.670003,32.670003,32.670003,32.670003,33.21001,31.77,32.95,50275872


In [57]:
# Filter column and only keep ones needed
meta_df = meta_df.drop(columns=["Upper Band #1", "Lower Band #1", "Upper Band #2", "Lower Band #2", "Upper Band #3", "Lower Band #3","Plot", "Plot.1", "Plot.2","Smoothing Line"], axis=1)

# Review the DataFrame
meta_df.head()

KeyError: "['Smoothing Line'] not found in axis"

In [4]:
# Read the csv file 
tsla_df = pd.read_csv(Path("./Resources/tsla.csv"))

# Convert 'time' column from timestamp (seconds since epoch) to acutal time
tsla_df ['time'] = pd.to_datetime(
    tsla_df['time'],
    unit = 's',
    infer_datetime_format=True,
    utc=True
)    
# Convert timezone from UTC to Eastern Time
tsla_df['time'] = tsla_df['time'].dt.tz_convert('US/Eastern')

# Set 'time' column as the index 
tsla_df.set_index('time', inplace=True)
                      
# Review df
tsla_df.head()   

Unnamed: 0_level_0,open,high,low,close,VWAP,Upper Band #1,Lower Band #1,Upper Band #2,Lower Band #2,Upper Band #3,...,Volume,Volume MA,EMA,Smoothing Line,Developing Poc,Developing VA High,Developing VA Low,Developing Poc.1,Developing VA High.1,Developing VA Low.1
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2012-03-12 09:30:00-04:00,2.312664,2.41933,2.306665,2.400664,2.375553,2.375553,2.375553,2.375553,2.375553,2.375553,...,29467829,16848705.0,1.929784,1.921918,,,,,,
2012-03-13 09:30:00-04:00,2.433997,2.439331,2.366664,2.405998,2.403998,2.403998,2.403998,2.403998,2.403998,2.403998,...,15024660,16731543.65,1.934522,1.92575,,,,,,
2012-03-14 09:30:00-04:00,2.399998,2.399998,2.319998,2.352664,2.357553,2.357553,2.357553,2.357553,2.357553,2.357553,...,12771972,16011617.15,1.938683,1.929832,,,,,,
2012-03-15 09:30:00-04:00,2.351998,2.365331,2.318664,2.333331,2.339109,2.339109,2.339109,2.339109,2.339109,2.339109,...,8573948,14366955.5,1.94261,1.93413,,,,,,
2012-03-16 09:30:00-04:00,2.326664,2.392664,2.321998,2.354665,2.356442,2.356442,2.356442,2.356442,2.356442,2.356442,...,10938550,13249154.35,1.94671,1.938462,,,,,,


In [58]:
#Droping NAN Values 
tsla_df = tsla_df.dropna(axis=1)
#Review the data frame
tsla_df.head()

Unnamed: 0_level_0,open,high,low,close,VWAP,Upper Band #1,Lower Band #1,Upper Band #2,Lower Band #2,Upper Band #3,Lower Band #3,Plot,Plot.1,Plot.2,Volume,Volume MA,EMA,Smoothing Line
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2012-03-12 09:30:00-04:00,2.312664,2.41933,2.306665,2.400664,2.375553,2.375553,2.375553,2.375553,2.375553,2.375553,2.375553,2.41933,2.306665,2.312664,29467829,16848705.0,1.929784,1.921918
2012-03-13 09:30:00-04:00,2.433997,2.439331,2.366664,2.405998,2.403998,2.403998,2.403998,2.403998,2.403998,2.403998,2.403998,2.439331,2.366664,2.433997,15024660,16731543.65,1.934522,1.92575
2012-03-14 09:30:00-04:00,2.399998,2.399998,2.319998,2.352664,2.357553,2.357553,2.357553,2.357553,2.357553,2.357553,2.357553,2.399998,2.319998,2.399998,12771972,16011617.15,1.938683,1.929832
2012-03-15 09:30:00-04:00,2.351998,2.365331,2.318664,2.333331,2.339109,2.339109,2.339109,2.339109,2.339109,2.339109,2.339109,2.365331,2.318664,2.351998,8573948,14366955.5,1.94261,1.93413
2012-03-16 09:30:00-04:00,2.326664,2.392664,2.321998,2.354665,2.356442,2.356442,2.356442,2.356442,2.356442,2.356442,2.356442,2.392664,2.321998,2.326664,10938550,13249154.35,1.94671,1.938462


In [59]:
# Filter column and only keep ones needed
tsla_df = tsla_df.drop(columns=["Upper Band #1", "Lower Band #1", "Upper Band #2", "Lower Band #2", "Upper Band #3", "Lower Band #3","Plot", "Plot.1", "Plot.2","Smoothing Line"], axis=1)

# Review the DataFrame
tsla_df.head()

Unnamed: 0_level_0,open,high,low,close,VWAP,Volume,Volume MA,EMA
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2012-03-12 09:30:00-04:00,2.312664,2.41933,2.306665,2.400664,2.375553,29467829,16848705.0,1.929784
2012-03-13 09:30:00-04:00,2.433997,2.439331,2.366664,2.405998,2.403998,15024660,16731543.65,1.934522
2012-03-14 09:30:00-04:00,2.399998,2.399998,2.319998,2.352664,2.357553,12771972,16011617.15,1.938683
2012-03-15 09:30:00-04:00,2.351998,2.365331,2.318664,2.333331,2.339109,8573948,14366955.5,1.94261
2012-03-16 09:30:00-04:00,2.326664,2.392664,2.321998,2.354665,2.356442,10938550,13249154.35,1.94671
