In [1]:
# ! pip install kaggle
# ! kaggle datasets download -d dgawlik/nyse
# ! unzip nyse.zip -d data_nyse
# ! rm nyse.zip
# ! ls
# ! pip install  matplotlib
# ! pip install  seaborn
##https://www.kaggle.com/dgawlik/nyse##

In [2]:
# ! pip install mpld3

In [3]:
import pandas as pd
import numpy as np

import mpld3#zoomable graphs
mpld3.enable_notebook()

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('darkgrid')
plt.style.use("fivethirtyeight")
%matplotlib inline
pd.set_option('display.max_columns', None)#Display all columns
pd.set_option('display.max_rows', None)#Display all columns

Context
This dataset is a playground for fundamental and technical analysis. It is said that 30% of traffic on stocks is already generated by machines, can trading be fully automated? If not, there is still a lot to learn from historical data.

Content
Dataset consists of following files:

*   **prices.csv**: raw, as-is daily prices. Most of data spans from 2010 to the end 2016, for companies new on stock market date range is shorter. There have been approx. 140 stock splits in that time, this set doesn't account for that.
*    **prices-split-adjusted.csv**: same as prices, but there have been added adjustments for splits.
*    **securities.csv**: general description of each company with division on sectors
*    **fundamentals.csv**: metrics extracted from annual SEC 10K fillings (2012-2016), should be enough to derive most of popular fundamental indicators.
Acknowledgements
Prices were fetched from Yahoo Finance, fundamentals are from Nasdaq Financials, extended by some fields from EDGAR SEC databases.

Inspiration
Here is couple of things one could try out with this data:

One day ahead prediction: Rolling Linear Regression, ARIMA, Neural Networks, LSTM
Momentum/Mean-Reversion Strategies
Security clustering, portfolio construction/hedging
Which company has biggest chance of being bankrupt? Which one is undervalued (how prices behaved afterwards), what is Return on Investment?

Import Data

In [4]:
prices_data_path = './data_nyse/prices.csv'
securities_data_path = './data_nyse/securities.csv'
prices_split_data_path = './data_nyse/prices-split-adjusted.csv'
fundamentals_data_path = './data_nyse/fundamentals.csv'


prices_data = pd.read_csv(prices_data_path)
securities_data = pd.read_csv(securities_data_path)
prices_split_data = pd.read_csv(prices_split_data_path)
fundamentals_data = pd.read_csv(fundamentals_data_path)

Inspect Data

In [5]:
prices_data.head()

Unnamed: 0,date,symbol,open,close,low,high,volume
0,2016-01-05 00:00:00,WLTW,123.43,125.839996,122.309998,126.25,2163600.0
1,2016-01-06 00:00:00,WLTW,125.239998,119.980003,119.940002,125.540001,2386400.0
2,2016-01-07 00:00:00,WLTW,116.379997,114.949997,114.93,119.739998,2489500.0
3,2016-01-08 00:00:00,WLTW,115.480003,116.620003,113.5,117.440002,2006300.0
4,2016-01-11 00:00:00,WLTW,117.010002,114.970001,114.089996,117.330002,1408600.0


In [6]:
securities_data.head()
print(securities_data)

NameError: name 'securities' is not defined

In [None]:
prices_split_data.head()

In [None]:
fundamentals_data.head()

Function to give us an idea of our Dataframes

In [None]:
def observe_data(dataframeName):
    dataframeName.info()
    print("Dataframe Shape: " + str(dataframeName.shape))
    print("Dataframe Size: " + str(dataframeName.size))
    print("Number Of Rows: " + str(len(dataframeName)))
    print("Number Of Columns: " + str(len(dataframeName.columns)))
    #used to view some basic statistical details like percentile, mean, std etc. of a data frame or a series of numeric values
    print(dataframeName.describe())#you can also pick specific columns as such: dataframe.columnName.describe()

In [None]:
observe_data(prices_split_data)

Function plotting historical closing values of a specific company from *prices_split_data* dataframe

In [None]:
string = ', '.join(prices_split_data.symbol.unique())
print("The 500 companies are: " + string)

In [None]:
def plot_closing_values(company_name):    
    temp = prices_split_data.loc[prices_split_data["symbol"] == company_name]
    temp.close.plot()
    plt.title(company_name)
    plt.ylabel('Close')

In [None]:
! pip install plotly

In [None]:
import plotly.graph_objects as go
def plot_candlestick_chart(company_name):    
    df = prices_split_data.loc[prices_split_data["symbol"] == company_name]
    fig = go.Figure(data=[go.Candlestick(x=df.date,
                    open=df.open,
                    high=df.high,
                    low=df.low,
                    close=df.close)])

    fig.show()

In [None]:
plot_closing_values("GOOG")

In [None]:
plot_candlestick_chart("GOOG")

A moving average (MA) is a widely used indicator in technical analysis that helps smooth out price action by filtering out the “noise” from random short-term price fluctuations.

In [None]:
def moving_average(company_name):
    df = prices_split_data.loc[prices_split_data["symbol"] == company_name]
    df = df[['close','date']]
    df.reset_index(level=0, inplace=True)
    plt.plot(df.date, df.close)
    plt.show()
    #SMA - Simple Moving Average
    rolling_mean = df.close.rolling(20).mean()
    rolling_mean2 = df.close.rolling(50).mean()
    rolling_mean3 = df.close.rolling(100).mean()
    plt.plot(df.date, df.close, label=str(company_name))
    plt.plot(df.date, rolling_mean, label=str(company_name+' 20 Day SMA'), color='orange')
    plt.plot(df.date, rolling_mean2, label=str(company_name+' 50 Day SMA'), color='magenta')
    plt.plot(df.date, rolling_mean3, label=str(company_name+' 100 Day SMA'), color='red')
    plt.legend(loc='upper left')
    plt.show()
    #EMA - Exponential Moving Average
    exp1 = df.close.ewm(span=20, adjust=False).mean()
    exp2 = df.close.ewm(span=50, adjust=False).mean()
    exp3 = df.close.ewm(span=100, adjust=False).mean()
    plt.plot(df.date, df.close, label=str(company_name))
    plt.plot(df.date, exp1, label=str(company_name+' 20 Day EMA'), color='orange')
    plt.plot(df.date, exp2, label=str(company_name+' 50 Day EMA'), color='magenta')
    plt.plot(df.date, exp3, label=str(company_name+' 100 Day EMA'), color='red')
    plt.legend(loc='upper left')
    plt.legend(loc='upper left')
    plt.show()


In [None]:
moving_average("GOOG")

We've done some baseline analysis, let's go ahead and dive a little deeper. We're now going to analyze the risk of the stock. In order to do so we'll need to take a closer look at the daily changes of the stock, and not just its absolute value, using the *pct_change* function, which will return the percentage change between the current and a prior element.

In [None]:
def risk_computation(company_name):
    risks = prices_split_data.loc[prices_split_data["symbol"] == company_name].close.pct_change()
    print(risks.head())
    risks.plot( legend=True, linestyle='--', marker='o')
    #Histogram & KDE Plot
    # Note the use of dropna() here, otherwise the NaN values can't be read by seaborn
    plt.figure(figsize=(12, 12))

    sns.distplot(risks.dropna(), bins=100, color='purple')
    plt.ylabel('Daily Return')
    plt.title(company_name)
    # Could have also done:
    #AAPL['Daily Return'].hist()


In [None]:
risk_computation("GOOG")

Grab all the closing prices for the tech stock list into one DataFrame

In [None]:
company_name = "GOOG"
closing_df = prices_split_data.loc[prices_split_data["symbol"] == company_name].close

In [None]:
closing_df.head()

Now that we have all the closing prices, let's go ahead and get the daily return for all the stocks, for a collection of companies