# Understanding Stock Markets using Data Science

### Importing all the required libraries

In [None]:
# For Data Manipulation & Visualization
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from datetime import date
import plotly.graph_objects as go

# Setting default figure size
from pylab import rcParams
rcParams['figure.figsize'] = 15,6

# For Implementing Trading Strategies
import bt

# For using Technical Indicators - https://mrjbq7.github.io/ta-lib/
import talib

# To get data for any script from NSE Exchange
from nsepy import get_history

# Filter Warnings
import warnings
warnings.filterwarnings('ignore')

There are two ways to start analysing any financial instrument
* Read it directly using Yahoo Finance
* Download the data in a csv file and read it using Pandas

We will use the first approach to read the data for any financial instrument directly into Python

In [None]:
# Downloading data from NSE Exchange
symbol = input("Enter symbol of stock\n")
df = get_history(symbol=symbol, start=date(2015,1,1), end=date.today(), index=True) # Set index=True for index prices (NIFTY)
df['Date'] = df.index
df.tail()

#### Let's Visualize the data

In [None]:
plt.plot(df['Close'], color='red')
plt.title("Daily close price")
plt.show()

In [None]:
# Define the candlestick
candlestick = go.Candlestick(
    x = df.index,
    open = df['Open'],
    high = df['High'],
    low = df['Low'],
    close = df['Close'])

# Create a plot
fig = go.Figure(data=[candlestick])

# Show the plot
fig.show()

#### Let's Calculate the Daily Returns

In [None]:
# Calculate daily returns
df['daily_return'] = df['Close'].pct_change() * 100

# Plot the data
plt.plot(df['daily_return'])
plt.show()

In [None]:
# Plot histogram
df['daily_return'].hist(bins=100)

## Understanding Technical Indicators
* Trend Indicators - Moving Averages
* Momentum Indicator - RSI
* Volatility Indicator - Bollinger Bands

### 1. Trend Indicator - Moving Averages

In [None]:
# Calculate two SMAs
df['SMA_short'] = talib.SMA(df['Close'], timeperiod=10)
df['SMA_long'] = talib.SMA(df['Close'], timeperiod=50)

# Print the last five rows
df.tail()

In [None]:
# Plot SMA with the price
plt.plot(df['SMA_short'],
         label='SMA_short')

plt.plot(df['SMA_long'],
         label='SMA_long')

plt.plot(df['Close'],
         label='Close')

# Customize and show the plot
plt.legend()
plt.title('Simple Moving Averages')

In [None]:
# Calculate two EMAs
df['EMA_short'] = talib.EMA(df['Close'], timeperiod=10)
df['EMA_long'] = talib.EMA(df['Close'], timeperiod=50)

# Print the last five rows
df.tail()

In [None]:
# Plot EMA with the price
plt.plot(df['EMA_short'],
         label='EMA_short')

plt.plot(df['EMA_long'],
         label='EMA_long')

plt.plot(df['Close'],
         label='Close')

# Customize and show the plot
plt.legend()
plt.title('Exponential Moving Averages')

EMA is more sensitive to the most recent price movement

In [None]:
# SMA vs EMA
plt.plot(df['SMA_long'],
         label='SMA_long')

plt.plot(df['EMA_long'],
         label='EMA_long')

plt.plot(df['Close'],
         label='Close')

# Customize and show the plot
plt.legend()
plt.title('SMA vs EMA')

#### ADX - Average Directional Movement Index
* ADX <= 25 : Suggests there is no trend in the market
* ADX > 25 : Suggests market is trending
* ADX > 50 : Suggests there is a strong trend in the market 

In [None]:
# Calculate ADX
df['ADX'] = talib.ADX(df['High'], df['Low'], df['Close'], timeperiod=14)

# Print the last five rows
df.tail()

In [None]:
# Create subplots
fig, (ax1, ax2) = plt.subplots(2)

# Plot ADX with the price
ax1.set_ylabel('Price')
ax1.plot(df['Close'])
ax2.set_ylabel('ADX')
ax2.plot(df['ADX'])
ax1.set_title('Price and ADX')

### 2. Momentum Indicator - RSI (Relative Strength Index)
* RSI > 70
 - Suggests the asset is likely overbought and the price may soon reverse
* RSI < 30
 - Suggests the asset is likely oversold and the price may soon rally

In [None]:
# Calculate RSI
df['RSI'] = talib.RSI(df['Close'], timeperiod=14)

# Print the last five rows
df.tail()

In [None]:
# Create subplots
fig, (ax1, ax2) = plt.subplots(2)

# Plot RSI with the price
ax1.set_ylabel('Price')
ax1.plot(df['Close'])
ax2.set_ylabel('RSI')
ax2.plot(df['RSI'])
ax1.set_title('Price and RSI')

### 3. Volatility Indicator - Bolinger Bands

In [None]:
# Define the Bollinger Bands
upper, mid, lower = talib.BBANDS(df['Close'],
                                 nbdevup=2,
                                 nbdevdn=2,
                                 timeperiod=20)

In [None]:
# Plot the Bollinger Bands
plt.plot(df['Close'], label='Price')
plt.plot(upper, label="Upper band")
plt.plot(mid, label='Middle band')
plt.plot(lower, label='Lower band')

# Customize and show the plot
plt.title('Bollinger Bands')
plt.legend()

# Understanding bt library & define a Trading Strategy
### The bt process
* Step 1: Get the historical price data
* Step 2: Define the strategy
* Step 3: Backtest the strategy with the data
* Step 4: Evaluate the result

#### Step 1: Get the data

In [None]:
# Download historical prices - Downloads data from Yahoo Finance
bt_data = bt.get('GOOG, SBIN.NS, EURUSD=X',
                 start='2019-1-1', end=date.today())
bt_data.head()

#### Step 2: Define the Strategy

In [None]:
bt_strategy = bt.Strategy('Trade_Weekly',
                          [bt.algos.RunWeekly(), # Run weekly
                           bt.algos.SelectAll(), # Use all data
                           bt.algos.WeighEqually(), # Maintain equal weights
                           bt.algos.Rebalance()]) # Rebalance

#### Step 3: Backtest the strategy

In [None]:
# Create a backtest
bt_test = bt.Backtest(bt_strategy, bt_data)

# Run the backtest
bt_res = bt.run(bt_test)

#### Step 4: Evaluate the result

In [None]:
# Plot the result
bt_res.plot(title="Backtest result")

In [None]:
# Get trade details
bt_res.get_transactions()

# Trading Signals
* Trend Following
* Mean Reversion

### Trend Following Signals
* Above SMA
* EMA Crossover
#### Above SMA

In [None]:
# Get price data by the stock ticker
ticker = 'nsei'
price_data = bt.get('^NSEI', start='2019-11-1', end=date.today())

In [None]:
# Calculate SMA -Simple Moving Average
sma = price_data.rolling(20).mean()

In [None]:
# Define the signal-based strategy
bt_strategy_abovesma = bt.Strategy('AboveSMA',
                          [bt.algos.SelectWhere(price_data > sma),
                           bt.algos.WeighEqually(),
                           bt.algos.Rebalance()])

In [None]:
# Create the backtest and run it
bt_backtest_abovesma = bt.Backtest(bt_strategy_abovesma, price_data)
bt_result_abovesma = bt.run(bt_backtest_abovesma)

In [None]:
# Plot the backtest result
bt_result_abovesma.plot(title='Backtest result')

In [None]:
# Plot the backtest result
bt_result_abovesma.get_transactions().head(15)

#### EMA Crossover

In [None]:
# Calculate the indicators
EMA_short = talib.EMA(price_data[ticker],
                      timeperiod=10).to_frame()

EMA_long = talib.EMA(price_data[ticker],
                     timeperiod=40).to_frame()

In [None]:
# Create the signal DataFrame
signal = EMA_long.copy()
signal[EMA_long.isnull()] = 0

# Construct the signal
signal[EMA_short > EMA_long] = 1
signal[EMA_short < EMA_long] = -1

# Rename series to ticker name - Not required if working with multiple tickers
signal.columns = [ticker]

In [None]:
# Plot the signal, price and MAs
combined_df = bt.merge(signal, price_data, EMA_short, EMA_long)
combined_df.columns = ['Signal', 'Price', 'EMA_short', 'EMA_long']
combined_df.plot(secondary_y=['Signal'])

In [None]:
#Define the strategy
bt_strategy_ema = bt.Strategy('EMA_crossover',
                          [bt.algos.WeighTarget(signal),
                           bt.algos.Rebalance()])

In [None]:
# Create the backtest and run it
bt_backtest_ema = bt.Backtest(bt_strategy_ema, price_data)
bt_result_ema = bt.run(bt_backtest_ema)

In [None]:
# Plot the backtest result
bt_result_ema.plot(title='Backtest result')

### Mean Reversion Signals
RSI-based mean reversion strategy:
* Short signal: RSI > 70
 - Suggests the asset is likely overbought and the price may soon reverse
* Long signal: RSI < 30
 - Suggests the asset is likely oversold and the price may soon rally

In [None]:
# Calculate the RSI
stock_rsi = talib.RSI(price_data[ticker]).to_frame()

In [None]:
# Create the same DataFrame structure as RSI
signal = stock_rsi.copy()
signal[stock_rsi.isnull()] = 0

# Construct the signal
signal[stock_rsi < 30] = 1
signal[stock_rsi > 70] = -1
signal[(stock_rsi <= 70) & (stock_rsi >= 30)] = 0

# Rename series to ticker name - Not required if working with multiple tickers
signal.columns = [ticker]

In [None]:
# Plot the RSI
stock_rsi.plot()
plt.title('RSI')

In [None]:
# Merge data into one DataFrame
combined_df = bt.merge(signal, price_data)
combined_df.columns = ['Signal', 'Price']

# Plot the signal with price
combined_df.plot(secondary_y = ['Signal'])

In [None]:
# Define the strategy
bt_strategy_rsi = bt.Strategy('RSI_MeanReversion',
                          [bt.algos.WeighTarget(signal),
                           bt.algos.Rebalance()])

In [None]:
# Create the backtest and run it
bt_backtest_rsi = bt.Backtest(bt_strategy_rsi, price_data)
bt_result_rsi = bt.run(bt_backtest_rsi)

In [None]:
# Plot the backtest result
bt_result_rsi.plot(title='Backtest result')

## Strategy Optimization and Benchmarking
#### How to decide the values of input parameters?
* Question:
Which is a better SMA lookback period to use in the signals?
* Solution: 
Strategy Optimization (Try a range of input parameter values in backtesting and compare the results)

In [None]:
def signal_strategy(ticker, period, name, start='2018-11-1', end='2020-12-1'):

    # Get the data and calculate SMA
    price_data = bt.get(ticker, start=start, end=end)
    sma = price_data.rolling(period).mean()

    # Define the signal-based strategy
    bt_strategy = bt.Strategy(name,
                              [bt.algos.SelectWhere(price_data>sma),
                               bt.algos.WeighEqually(),
                               bt.algos.Rebalance()])

    # Return the backtest
    return bt.Backtest(bt_strategy, price_data)

In [None]:
ticker1 = '^NSEI'

sma20 = signal_strategy(ticker1,
                        period=20, name='SMA20')

sma50 = signal_strategy(ticker1,
                        period=50, name='SMA50')

sma100 = signal_strategy(ticker1,
                         period=100, name='SMA100')

# Run backtests and compare results
bt_results = bt.run(sma20, sma50, sma100)
bt_results.plot(title='Strategy optimization')

#### Let's Benchmark

In [None]:
def buy_and_hold(ticker, name, start='2018-11-1', end='2020-12-1'):

    # Get the data
    price_data = bt.get(ticker, start=start, end=end)

    # Define the benchmark strategy
    bt_strategy = bt.Strategy(name,
                              [bt.algos.RunOnce(),
                               bt.algos.SelectAll(),
                               bt.algos.WeighEqually(),
                               bt.algos.Rebalance()])

    # Return the backtest
    return bt.Backtest(bt_strategy, price_data)

In [None]:
benchmark = buy_and_hold(ticker1, name='benchmark')

# Run all backtests and plot the resutls
bt_results = bt.run(sma20, sma50, sma100, benchmark)
bt_results.plot(title='Strategy benchmarking')

# Strategy Return Analysis

In [None]:
# Get all backtest stats
resInfo = bt_result_ema.stats
print(resInfo.index)

In [None]:
# Get daily, monthly and yearly returns
print('Daily return: %.4f'% resInfo.loc['daily_mean'])
print('Monthly return: %.4f'% resInfo.loc['monthly_mean'])
print('Yearly return: %.4f'% resInfo.loc['yearly_mean'])

In [None]:
# Get the compound annual growth rate
print('Compound annual growth rate: %.4f'% resInfo.loc['cagr'])

In [None]:
# Plot the weekly return histogram
bt_result_ema.plot_histograms(bins=50, freq = 'w')

In [None]:
# Get the lookback returns
lookback_returns = bt_result_ema.display_lookback_returns()
print(lookback_returns)

### Get Drawdown Results

In [None]:
resInfo = bt_result_ema.stats

# Get the max drawdown
max_drawdown = resInfo.loc['max_drawdown']
print('Maximum drawdown: %.2f'% max_drawdown)

# Get the average drawdown
avg_drawdown = resInfo.loc['avg_drawdown']
print('Average drawdown: %.2f'% avg_drawdown)

# Get the average drawdown days
avg_drawdown_days = resInfo.loc['avg_drawdown_days']
print('Average drawdown days: %.0f'% avg_drawdown_days)

### Get Calmar Ratio
The higher the Calmar ratio, the better a strategy performed on a risk-adjusted basis. Typically a Calmar ratio larger than 3 is considered excellent.

In [None]:
resInfo = bt_result_ema.stats

#Get the CAGR
cagr = resInfo.loc['cagr']

# Get the max drawdown
max_drawdown = resInfo.loc['max_drawdown']

# Calculate Calmar ratio mannually
calmar_calc = cagr / max_drawdown * (-1)
print('Calmar Ratio calculated: %.2f'% calmar_calc)

In [None]:
resInfo = bt_result_ema.stats

# Get the Calmar ratio
calmar = resInfo.loc['calmar']
print('Calmar Ratio: %.2f'% calmar)

## Sharpe Ratio
The bigger the Sharpe ratio, the more attractive the return

In [None]:
resInfo = bt_result_ema.stats

# Get Sharpe ratios from the backtest stats
print('Sharpe ratio daily: %.2f'% resInfo.loc['daily_sharpe'])
print('Sharpe ratio monthly %.2f'% resInfo.loc['monthly_sharpe'])
print('Sharpe ratio annually %.2f'% resInfo.loc['yearly_sharpe'])

## Sortino Ratio

In [None]:
resInfo = bt_result_ema.stats

# Get Sortino ratio from backtest stats
print('Sortino ratio daily: %.2f'% resInfo.loc['daily_sortino'])
print('Sortino ratio monthly %.2f'% resInfo.loc['monthly_sortino'])
print('Sortino ratio annually %.2f'% resInfo.loc['yearly_sortino'])

<h2><center>Happy Learning!</center></h2>