# 1. Import required libraries

In [1]:
import datetime  
import pandas  as pd 
import numpy as np
import matplotlib.pyplot as plt 
import missingno as msno # Package to visualize missing values\
import seaborn as sns
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, RangeTool
from bokeh.models.widgets import Dropdown
from bokeh.io import output_notebook, curdoc,show
from bokeh.layouts import column
from bokeh.models import BooleanFilter, CDSView, Select, Range1d, HoverTool
from bokeh.palettes import Category20
from bokeh.models.formatters import NumeralTickFormatter
from bokeh.resources import INLINE
from ipywidgets import interact
%matplotlib inline
output_notebook()
import plotly.graph_objects as go
import cufflinks as cf
import mplfinance as fplt
import plotly.express as px
import chart_studio.plotly as py
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.offline import plot
import ipywidgets as widgets
import plotly.graph_objs as go
init_notebook_mode(connected=True)
cf.go_offline()
import seaborn as sns

# 2. Load the data using pandas method

In [2]:
stock = pd.read_csv('11_all_stocks_5yr.csv')
stock.head()

FileNotFoundError: [Errno 2] No such file or directory: '11_all_stocks_5yr.csv'

# 3. Exploring some basic information about the dataset

In [None]:
nRow, nCol = stock.shape
print(f'There are {nRow} rows and {nCol} columns')

In [None]:
stock.info()

The dataset has totally 7 columns , 5 columns numerical type and 2 categorical features.

In our case date is a categorical datatype so we need to change it to datetime.

# 4. Check missing value

In [None]:
print(stock.isnull().sum().sort_values())
msno.bar(stock)

The number of cells missing from the dataset is negligible, it can be deleted without affecting the data set.

In [None]:
# delete mising values
stock=stock.dropna()
stock.shape

After deleting the rows containing cells with missing data, we will work with 619029 rows and 7 columns

# 5. Transform to datetime column date

In [None]:
stock['date'] = pd.to_datetime(stock['date'])
stock.info() # display date info after successful transformation 

# 6. Add comment column about status: "increase", "decrease", "equal"

In [None]:
def inc_dec(close,open):
    if close> open:
        value ="Increase"
    elif close<open:
        value="Decrease"
    else:
        value="Equal"
    return value

stock["Status"]=[inc_dec(close,open) for close, open in zip(stock.open,stock.close)]
stock

The goal is to easily see which day the stock price goes up and which day it's down

# 5. Pick tech stock name of the biggest companies in the US to couting statistics and visualize : Apple, Google, FaceBook, Microsoft

In [None]:
tech_stock = stock.loc[(stock["Name"].isin(["FB","AAPL","MSFT","GOOGL"]))]
tech_stock = tech_stock.set_index('date')
tech_stock

# 6. Couting statistics in each month, year group by name

In [None]:
stock.groupby('Name').describe()

In [None]:
# describe 4 companies
tech_stock.groupby('Name').describe()

In [None]:
fig = px.box(tech_stock, x="Name",y="close")
fig.show()

 After the visualization above, it can be clearly seen that there were no unusual outliers

# 7. Daily return percentage and analyzing distributions of daily return

In [None]:
tech_stock["AAPL"] = (tech_stock.loc[(tech_stock["Name"] == "AAPL")]['close'] /tech_stock.loc[(tech_stock["Name"] == "AAPL")]['close'].shift(1))-1
tech_stock["MSFT"] = (tech_stock.loc[(tech_stock["Name"] == "MSFT")]['close'] /tech_stock.loc[(tech_stock["Name"] == "MSFT")]['close'].shift(1))-1
tech_stock["FB"] = (tech_stock.loc[(tech_stock["Name"] == "FB")]['close'] /tech_stock.loc[(tech_stock["Name"] == "FB")]['close'].shift(1))-1
tech_stock["GOOGL"] = (tech_stock.loc[(tech_stock["Name"] == "GOOGL")]['close'] /tech_stock.loc[(tech_stock["Name"] == "GOOGL")]['close'].shift(1))-1

In [None]:
stocks_return = pd.concat([tech_stock["AAPL"],tech_stock["MSFT"],tech_stock["FB"],tech_stock["GOOGL"]], axis=1)
sns.pairplot(stocks_return,markers=["o", "s", "D"])
stocks_return.head()

In [None]:
fig = plt.figure(figsize = (12,6))
plt.subplot(2,2,1)
stocks_return['AAPL'].plot(legend = 'AAPL')
plt.subplot(2,2,2)
stocks_return['FB'].plot(legend = 'FB')
plt.subplot(2,2,3)
stocks_return['MSFT'].plot(legend = 'MSFT')
plt.subplot(2,2,4)
stocks_return['GOOGL'].plot(legend = 'GOOGL')

In [None]:
AAPL= stock.loc[(stock["Name"].isin(["AAPL"]))]
AAPL.index=AAPL.date
GOOGL= stock.loc[(stock["Name"].isin(["GOOGL"]))]
GOOGL.index=GOOGL.date
FB= stock.loc[(stock["Name"].isin(["FB"]))]
FB.index=FB.date
MSFT= stock.loc[(stock["Name"].isin(["MSFT"]))]
MSFT.index=MSFT.date

In [None]:
tech_stock.groupby("Name").hist(figsize=(12, 12))

# 8. Ploting closing prices in the years

In [None]:
'''
## Visualization of APPL stock close price in year
fig = plt.figure(figsize = (20,10))
sns.set_theme(color_codes = True)
plt.subplot(2,2,1)
color_1 = 'tab:green'
plt.title("Apple stock in years from 2013-2018")
plt.xlabel("Month",color = color_1)
plt.ylabel("close",color = color_1)
plt.plot(tech_stock.loc[(tech_stock["Name"] == "AAPL")]["close"],label = "Apple",color = color_1)
plt.legend()

### FB ###################
sns.set_theme(color_codes = True) 
plt.subplot(2,2,2)
color_2 = 'tab:blue'
plt.xlabel("Month",color = color_1)
plt.ylabel("close",color = color_1)
plt.title("Facebook in years from 2013 to 2018") 
plt.plot(tech_stock.loc[(tech_stock["Name"] == "FB")]["close"],label = "Facebook",color = color_2)
plt.legend()
#########################
sns.set_theme(color_codes = True) 
plt.subplot(2,2,3)
color_2 = 'tab:orange'
plt.xlabel("Month",color = color_1)
plt.ylabel("close",color = color_1)
plt.title("Microsoft in 2013-2018") 
plt.plot(tech_stock.loc[(tech_stock["Name"] == "MSFT")]["close"],label = "Microsoft",color = color_2)
plt.legend()
#####################################
sns.set_theme(color_codes = True)
plt.subplot(2,2,4)
color_2 = 'tab:red'
plt.xlabel("Month",color = color_1)
plt.ylabel("close",color = color_1)
plt.title("Google in 2013-2018") 
plt.plot(tech_stock.loc[(tech_stock["Name"] == "GOOGL")]["close"],label = "Goolge",color = color_2)
plt.legend()

plt.show()
'''

In [None]:
# close price with menu and interactive
stocks_close = pd.DataFrame({
    'MSFT': MSFT['close'],
    'AAPL': AAPL['close'],
    'FB': FB['close'],
    'GOOGL': GOOGL['close'],
})

fig = go.Figure()

for column in stocks_close.columns.to_list():
    fig.add_trace(
        go.Scatter(
            x = stocks_close.index,
            y = stocks_close[column],
            name = column
        )
    )
    
fig.update_layout(
    updatemenus=[go.layout.Updatemenu(
        active=0,
        buttons=list(
            [dict(label = 'All',
                  method = 'update',
                  args = [{'visible': [True, True, True, True]},
                          {'title': 'All',
                           'showlegend':True}]),
             dict(label = 'MSFT',
                  method = 'update',
                  args = [{'visible': [True, False, False, False]},
                          {'title': 'MICROSOFT',
                           'showlegend':True}]),
             dict(label = 'AAPL',
                  method = 'update',
                  args = [{'visible': [False, True, False, False]},
                          {'title': 'APPLE',
                           'showlegend':True}]),
             dict(label = 'FB',
                  method = 'update',
                  args = [{'visible': [False, False, True, False]},
                          {'title': 'FACEBOOK',
                           'showlegend':True}]),
             dict(label = 'GOOGL',
                  method = 'update',
                  args = [{'visible': [False, False, False, True]},
                          {'title': 'GOOGLE',
                           'showlegend':True}]),
            ])
        )
    ])

fig.show()

The charts give information about the growth of tech companies in the us in the year of 2013. As you can see that all four figures show up the overall increase. Meanwhile Facebook witnessed the fastest growth.

# 9. Plotting volume feature.

In [None]:
# Now let's plot the total volume of stock being traded each day
tech_list = ['AAPL', 'GOOG', 'MSFT', 'FB']
company_list = [AAPL, GOOGL, MSFT, FB]
company_name = ["APPLE", "GOOGLE", "MICROSOFT", "FACEBOOK"]
plt.figure(figsize=(5, 5))
plt.subplots_adjust(top=1.25, bottom=1.2)

for i, company in enumerate(company_list, 1):
    plt.subplot(2, 2, i)
    company['volume'].iplot( kind = 'bar', color = 'red')
    plt.ylabel('volume')
    plt.xlabel(None)
    plt.title(f"{tech_list[i - 1]}")

In [None]:
# volume with menu
stocks_volume = pd.DataFrame({
    'MSFT': MSFT['volume'],
    'AAPL': AAPL['volume'],
    'FB': FB['volume'],
    'GOOGL': GOOGL['volume'],
})

fig = go.Figure()
        
for column in stocks_volume.columns.to_list():
    fig.add_trace(
        go.Bar(
            x = stocks_volume.index,
            y = stocks_volume[column],
            name = column,
            marker_color='black',
        )
    )
    
fig.update_layout(
    updatemenus=[go.layout.Updatemenu(
        active=0,
        buttons=list(
            [dict(label = 'MSFT',
                  method = 'update',
                  args = [{'visible': [True, False, False, False]}, # the index of True aligns with the indices of plot traces
                          {'title': 'MICROSOFT',
                           'showlegend':True}]),
             dict(label = 'AAPL',
                  method = 'update',
                  args = [{'visible': [False, True, False, False]},
                          {'title': 'APPLE',
                           'showlegend':True}]),
             dict(label = 'FB',
                  method = 'update',
                  args = [{'visible': [False, False, True, False]},
                          {'title': 'FACEBOOK',
                           'showlegend':True}]),
             dict(label = 'GOOGL',
                  method = 'update',
                  args = [{'visible': [False, False, False, True]},
                          {'title': 'GOOGLE',
                           'showlegend':True}]),
            ])
        )
    ])

fig.show()

Volume is one of the most important key to make trade for short term investors and traders.
Due to the size of the dataset. I just am going to plotting the apple stock. You can do the same for all stock with specific names
Volume plays an import role in technical analysis and features prominently among some keys technical indicators

# 10. Simple moving average 10, 20, 50 and 200

In [None]:
ma_day = [ 20, 50,200]

for ma in ma_day:
    for company in company_list:
        column_name = f"SMA for {ma} days"
        company[column_name] = company['close'].rolling(ma).mean()

fig, axes = plt.subplots(nrows=2, ncols=2)
fig.set_figheight(8)
fig.set_figwidth(12)

AAPL[['close', 'SMA for 20 days', 'SMA for 50 days','SMA for 200 days']].plot(ax=axes[0,0])
axes[0,0].set_title('APPLE')

GOOGL[['close', 'SMA for 20 days', 'SMA for 50 days','SMA for 200 days']].plot(ax=axes[0,1])
axes[0,1].set_title('GOOGLE')

MSFT[['close', 'SMA for 20 days', 'SMA for 50 days','SMA for 200 days']].plot(ax=axes[1,0])
axes[1,0].set_title('MICROSOFT')

FB[['close', 'SMA for 20 days', 'SMA for 50 days','SMA for 200 days']].plot(ax=axes[1,1])
axes[1,1].set_title('FACEBOOK')

fig.tight_layout()

Moving average is one of the most analytical tool in trading. Technical traders often use this strategy to make decision on the following trend bases on the establised trend

# 11. Bollinger band

In [None]:
tech_stock['MA20'] = tech_stock.loc[(tech_stock['Name'] == "AAPL")]["close"].rolling(window = 20).mean()
tech_stock['20dSTD'] = tech_stock.loc[(tech_stock['Name'] == "AAPL")]["close"].rolling(window = 20).std()
tech_stock['Upper'] = tech_stock['MA20'] + (tech_stock['20dSTD']*2)
tech_stock['Lower'] = tech_stock['MA20'] - (tech_stock['20dSTD']*2)
aapl_bb=tech_stock.loc[(tech_stock['Name'] == "AAPL")][['close','MA20','Upper','Lower']]
aapl_bb.plot(figsize = (20,10),legend = True,title = 'APPLE in years 2013-2018')
plt.axis('tight')
plt.ylabel('close')
plt.grid(True)

tech_stock['MA20'] = tech_stock.loc[(tech_stock['Name'] == "FB")]["close"].rolling(window = 20).mean()
tech_stock['20dSTD'] = tech_stock.loc[(tech_stock['Name'] == "FB")]["close"].rolling(window = 20).std()
tech_stock['Upper'] = tech_stock['MA20'] + (tech_stock['20dSTD']*2)
tech_stock['Lower'] = tech_stock['MA20'] - (tech_stock['20dSTD']*2)
tech_stock.loc[(tech_stock['Name'] == "FB")][['close','MA20','Upper','Lower']].plot(figsize = (20,10),legend = True,title = 'FACEBOOK in years 2013-2018')
plt.axis('tight')
plt.ylabel('close')
plt.grid(True)

tech_stock['MA20'] = tech_stock.loc[(tech_stock['Name'] == "MSFT")]["close"].rolling(window = 20).mean()
tech_stock['20dSTD'] = tech_stock.loc[(tech_stock['Name'] == "MSFT")]["close"].rolling(window = 20).std()
tech_stock['Upper'] = tech_stock['MA20'] + (tech_stock['20dSTD']*2)
tech_stock['Lower'] = tech_stock['MA20'] - (tech_stock['20dSTD']*2)
tech_stock.loc[(tech_stock['Name'] == "MSFT")][['close','MA20','Upper','Lower']].plot(figsize = (20,10),legend = True,title = 'MICROSOFT in years 2013-2018')
plt.axis('tight')
plt.ylabel('close')
plt.grid(True)

tech_stock['MA20'] = tech_stock.loc[(tech_stock['Name'] == "GOOGL")]["close"].rolling(window = 20).mean()
tech_stock['20dSTD'] = tech_stock.loc[(tech_stock['Name'] == "GOOGL")]["close"].rolling(window = 20).std()
tech_stock['Upper'] = tech_stock['MA20'] + (tech_stock['20dSTD']*2)
tech_stock['Lower'] = tech_stock['MA20'] - (tech_stock['20dSTD']*2)
tech_stock.loc[(tech_stock['Name'] == "GOOGL")][['close','MA20','Upper','Lower']].plot(figsize = (20,10),legend = True,title = 'GOOGLE in years 2013-2018')
plt.axis('tight')
plt.ylabel('close')
plt.grid(True)


- Bollinger band shows the volatility of stock prices, the ability to reverse potential in a specific time period.
- Microsoft stock trading volume increased and decreased quite dramatically.

# 12. Candle Sticks

In [None]:
INCREASING_COLOR = 'green'
DECREASING_COLOR = 'red'
############
comp=GOOGL
data = [ dict(
    type = 'candlestick',
    x = comp.index, 
    open = comp["open"],
    close =comp["close"],
    low = comp["low"], 
    high = comp["high"],
    yaxis = 'y2',
    name = 'GS',
    increasing = dict( line = dict( color = INCREASING_COLOR ) ),
    decreasing = dict( line = dict( color = DECREASING_COLOR ) ),
) ]

In [None]:
layout=dict()
fig = dict( data=data, layout=layout )
fig['layout'] = dict()
fig['layout']['plot_bgcolor'] = 'rgb(250, 250,250)'
fig['layout']['xaxis'] = dict( rangeselector = dict( visible = True ) )
fig['layout']['yaxis'] = dict( domain = [0, 0.2], showticklabels = False )
fig['layout']['yaxis2'] = dict( domain = [0.2, 0.8] )
fig['layout']['legend'] = dict( orientation = 'h', y=0.9, x=0.3, yanchor='bottom' )
fig['layout']['margin'] = dict( t=40, b=40, r=40, l=40 )


In [None]:
rangeselector=dict(
    visibe = True,
    x = 0, y = 0.9,
    bgcolor = 'rgba(250, 200, 250, 0.4)',
    font = dict( size = 13 ),
    buttons=list([
        dict(count=1,
             label='reset',
             step='all'),
        dict(count=1,
             label='1yr',
             step='year',
             stepmode='backward'),
        dict(count=3,
            label='3 mo',
            step='month',
            stepmode='backward'),
        dict(count=1,
            label='1 mo',
            step='month',
            stepmode='backward'),
        dict(step='all')
    ]))

In [None]:
fig['layout']['xaxis']['rangeselector'] = rangeselector
def movingaverage(interval, window_size=10):
    window = np.ones(int(window_size))/float(window_size)
    return np.convolve(interval, window, 'same')
mv_y = movingaverage(comp["close"])
mv_x = list(comp.index)

# Clip the ends
mv_x = mv_x[5:-5]
mv_y = mv_y[5:-5]

fig['data'].append( dict( x=mv_x, y=mv_y, type='scatter', mode='lines', 
                         line = dict( width = 1 ),
                         marker = dict( color = 'orange' ),
                         yaxis = 'y2', name='Moving Average' ) )

In [None]:
# set volume bar chart colors
colors = []
for i in range(1,len(comp.close)):
    if i != 0:
        if comp.close[i] > comp.close[i-1]:
            colors.append(INCREASING_COLOR)
        else:
            colors.append(DECREASING_COLOR)
    else:
        colors.append(DECREASING_COLOR)
        
# add volume bar chart
fig['data'].append( dict( x=comp.index, y=comp.volume,                         
                         marker=dict( color=colors ),
                         type='bar', yaxis='y', name='Volume' ) )

In [None]:
def bbands(price, window_size=10, num_of_std=5):
    rolling_mean = price.rolling(window=window_size).mean()
    rolling_std  = price.rolling(window=window_size).std()
    upper_band = rolling_mean + (rolling_std*num_of_std)
    lower_band = rolling_mean - (rolling_std*num_of_std)
    return rolling_mean, upper_band, lower_band

In [None]:
import plotly.graph_objs as go
# these two lines allow your code to show up in a notebook
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode()
import plotly

In [None]:
bb_avg, bb_upper, bb_lower = bbands(comp.close)
fig['data'].append( dict( x=comp.index, y=bb_upper, type='scatter', yaxis='y2', 
                         line = dict( width = 1 ),
                         marker=dict(color='#ccc'), hoverinfo='none', 
                         legendgroup='Bollinger Bands', name='Bollinger Bands') )

fig['data'].append( dict( x=comp.index, y=bb_lower, type='scatter', yaxis='y2',
                         line = dict( width = 1 ),
                         marker=dict(color='#ccc'), hoverinfo='none',
                         legendgroup='Bollinger Bands', showlegend=False ) )
plotly.offline.init_notebook_mode()
plotly.offline.iplot(fig,filename = 'candlestick-test-3',validate = False )

- Facebook's stock price trend has grown well. Between June 2013 and January 2014, this is the period when stocks rose sharply, we observe that with the Bollinger Bands and volume volume.