In [1]:
import ipywidgets as widgets
from IPython.display import display, HTML

javascript_functions = {False: "hide()", True: "show()"}
button_descriptions  = {False: "Show code", True: "Hide code"}


def toggle_code(state):

    """
    Toggles the JavaScript show()/hide() function on the div.input element.
    """

    output_string = "<script>$(\"div.input\").{}</script>"
    output_args   = (javascript_functions[state],)
    output        = output_string.format(*output_args)

    display(HTML(output))


def button_action(value):

    """
    Calls the toggle_code function and updates the button description.
    """

    state = value.new

    toggle_code(state)

    value.owner.description = button_descriptions[state]

state = False
toggle_code(state)

button = widgets.ToggleButton(state, description = button_descriptions[state])
button.observe(button_action, "value")

display(button)

ToggleButton(value=False, description='Show code')

# 1. Introduction

## 1.1 Understanding Stock Market

**1. Stock Market :**  
A Stock market or Share Market is the aggregation of buyers and sellers of stocks which represent ownership claims on business. Investment in the stock market is most often done via [stockbrokerages](https://en.wikipedia.org/wiki/Stockbroker) and [electronic trading platforms] (https://en.wikipedia.org/wiki/Electronic_trading_platform). Investments is usually made with an investment strategy in mind.

Every investor looks for a profit by buying stocks with low price and selling those stocks with high price. The price of stock usually depends on supply demand gap. Following points illustarate how the price of stocks changes.  
1. If more number of buyers want to buy a share than the more number of sellers selling, then sellers are in control then can fix the price of the stock.
2. If number of buyers are less than the number of sellers, then buyers are in control and the can fix the price.  

**2. Stock Index :**  
A Stock index or stock market index, is an index that measures a stock market or a subset of the stock market, that helps investors compare curretn price levels with past prices to calculate market performance.  

The major stock indices in usa are:  
a) [Nasdaq composite](https://en.wikipedia.org/wiki/NASDAQ_Composite)  
b) [S&p 500](https://en.wikipedia.org/wiki/S%26P_500_Index)  
c) [DJIA](https://en.wikipedia.org/wiki/Dow_Jones_Industrial_Average)  

The major stock market indices in india are:  
a) [BSE Sensex](https://en.wikipedia.org/wiki/BSE_SENSEX)  
b) [NSE Nifty](https://en.wikipedia.org/wiki/NIFTY_50)  

You can understand more about stock market [here](https://www.youtube.com/watch?v=ZCFkWDdmXG8).  


## 1.2 problem Description  

Every stock market index contains low, high, open, closing prices of every day stock prices.  
**LOW:** low indicates the lowest price of the stock on that day.  
**HIGH:** High indicates the highest price of the stock on that day.  
**Open:** Open indicates the opening price of the stock on that day.  
**Close:** Close indicates the closing price of the stock on that day.  

If closig price is greater than selling price then it means sellers are in control. Otherwise, buyers are in control.  

**The main objective in my problem is to predict the closing price of a stock market index.**  



In [9]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from plotly.offline import plot
import chart_studio.plotly as py
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode
import cufflinks as cf
import plotly.express as px

import ipywidgets as widgets
from ipywidgets import interact, interact_manual

# 2. Understanding the dataset

In [3]:
data_path = '../DataSet/'
raw_data = pd.read_csv(data_path + 'nasdaq/finance.txt')
raw_data["Date"] = pd.to_datetime(raw_data["Date"])
print("Top 5 rows of the dataset:")
raw_data.head()

Top 5 rows of the dataset:


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,1991-01-02,373.0,373.5,371.799988,372.200012,372.200012,92020000
1,1991-01-03,371.200012,371.799988,367.399994,367.5,367.5,108390000
2,1991-01-04,366.5,367.899994,365.899994,367.200012,367.200012,103830000
3,1991-01-07,363.5,365.799988,360.100006,360.200012,360.200012,109460000
4,1991-01-08,359.100006,360.5,358.200012,359.0,359.0,111730000


The above data is the stock index of **nasdaq composite** from January 1st 1991 to June 19th 2020.

In [4]:
raw_data.dtypes

Date         datetime64[ns]
Open                float64
High                float64
Low                 float64
Close               float64
Adj Close           float64
Volume                int64
dtype: object

In [5]:
print("number of data points:", raw_data.shape[0])

number of data points: 7424


# Descriptive Analysis

In [6]:
print("Descriptive statistics of each column:")

@interact
def describe_each_column(column = ["Close", "Open", "High", "Low", "Volume"],
                         start_date=widgets.DatePicker(value=pd.to_datetime('1991-01-02')),
                         end_date=widgets.DatePicker(value=pd.to_datetime('2020-06-19'))
                        ):
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)
    new_df = raw_data[(raw_data["Date"] >= start_date) & (raw_data["Date"] <= end_date)]
    print("statistics of "+ column + " price between " + str(start_date.date()) + " and " + str(end_date.date()) + ":")
    print(new_df[column].describe())

Descriptive statistics of each column:


interactive(children=(Dropdown(description='column', options=('Close', 'Open', 'High', 'Low', 'Volume'), value…

# Exploratory Analysis

## Exploratory analysis on raw data

In [7]:
print("Exploring the time-series data of different prices:")
@interact
def plot_series_prices(price_column = ["Close", "Open", "High", "Low"],
                      start_date=widgets.DatePicker(value=pd.to_datetime('1991-01-02')),
                      end_date=widgets.DatePicker(value=pd.to_datetime('2020-06-19')),
                      theme = ["plotly", "plotly_white", "plotly_dark", "ggplot2", "seaborn", "simple_white", "none"]
                      ):
    
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)
    new_df = raw_data[(raw_data["Date"] >= start_date) & (raw_data["Date"] <= end_date)]
    fig = px.line(new_df, x = "Date", y = price_column, labels = {"Date": "Date", price_column: price_column + " price"},
                     template = theme, title = price_column + " price vs time")
    fig.show()

Exploring the time-series data of different prices:


interactive(children=(Dropdown(description='price_column', options=('Close', 'Open', 'High', 'Low'), value='Cl…

In [14]:
print("scatter plot between two columns:")

@interact
def plot_scatter(start_date=widgets.DatePicker(value=pd.to_datetime('1991-01-02')),
                 end_date=widgets.DatePicker(value=pd.to_datetime('2020-06-19')),
                 x=list(raw_data.select_dtypes('number').columns),
                 y=list(raw_data.select_dtypes('number').columns)[1:],
                 
                 theme = ["plotly", "plotly_white", "plotly_dark", "ggplot2", "seaborn", "simple_white", "none"]
                ):
    
    fig = px.scatter(raw_data, x = x, y = y, labels = {x: x, y: y},
                     template = theme)
    fig.show()

scatter plot between two columns:


interactive(children=(DatePicker(value=Timestamp('1991-01-02 00:00:00'), description='start_date'), DatePicker…

In [20]:
print("correlation between two columns:")

@interact
def plot_correlation(start_date=widgets.DatePicker(value=pd.to_datetime('1991-01-02')),
                     end_date=widgets.DatePicker(value=pd.to_datetime('2020-06-19')),
                     column1=list(raw_data.select_dtypes('number').columns),
                     column2=list(raw_data.select_dtypes('number').columns)
                     ):
    
    print(f"correlation: {raw_data[column1].corr(raw_data[column2])}")

correlation between two columns:


interactive(children=(DatePicker(value=Timestamp('1991-01-02 00:00:00'), description='start_date'), DatePicker…