<a href="https://colab.research.google.com/github/suriyakanth2711/fin/blob/main/pwfd_intro.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install yfinance

Collecting yfinance
  Downloading https://files.pythonhosted.org/packages/79/bd/d64719da8f5367f4d8b16e83507fa1d90942f433f748a4cf3ed7aa515d14/yfinance-0.1.63.tar.gz
Collecting lxml>=4.5.1
[?25l  Downloading https://files.pythonhosted.org/packages/30/c0/d0526314971fc661b083ab135747dc68446a3022686da8c16d25fcf6ef07/lxml-4.6.3-cp37-cp37m-manylinux2014_x86_64.whl (6.3MB)
[K     |████████████████████████████████| 6.3MB 32.3MB/s 
Building wheels for collected packages: yfinance
  Building wheel for yfinance (setup.py) ... [?25l[?25hdone
  Created wheel for yfinance: filename=yfinance-0.1.63-py2.py3-none-any.whl size=23919 sha256=c1237f8d4f15ed7f4a3bed060725e379996f433846da9110d924f64088455fc2
  Stored in directory: /root/.cache/pip/wheels/fe/a0/79/b73d4a0c535b421b88fc7b393936b371fabbfeaf979eca4050
Successfully built yfinance


In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
#download data from yahoo finance
import yfinance as yf
df = yf.download('GOOGL',start='2010-10-10',end='2020-2-20',progress=False, actions='inline')

df.head()

In [None]:
df = df.loc[:,['Adj Close']]
df.rename(columns = {'Adj Close':'adj_close'},inplace = True)

df['simple_rtn'] = df.adj_close.pct_change() #simple return
df['log_rtn'] = np.log(df.adj_close/df.adj_close.shift(1)) #log return

df.head()

In [None]:
#historical volatility
def realized_volatility(x):
    return np.sqrt(np.sum(x**2))

df_rv = df.groupby(pd.Grouper(freq='M')).apply(realized_volatility)
df_rv.rename(columns={'log_rtn': 'rv'}, inplace=True)

df_rv.rv = df_rv.rv * np.sqrt(12)
df_rv.head()

In [None]:
df_rv.rv.plot(figsize=(17,3))

In [None]:
fig, ax = plt.subplots(3, 1, figsize=(17, 10), sharex=True)
df.adj_close.plot(ax=ax[0])
ax[0].set(title = 'GOOGL time series', ylabel = 'Stock price ($)')
df.simple_rtn.plot(ax=ax[1])
ax[1].set(ylabel = 'Simple returns (%)')
df.log_rtn.plot(ax=ax[2])
ax[2].set(xlabel = 'Date',ylabel = 'Log returns (%)')

In [None]:
#Calculate moving average and moving standard deviation
df_rolling = df[['simple_rtn']].rolling(window=21).agg(['mean', 'std'])

df_rolling.columns = df_rolling.columns.droplevel()

df_outliers = df.join(df_rolling)

In [None]:
def indentify_outliers(row, n_sigmas=3):
     x = row['simple_rtn']
     mu = row['mean']
     sigma = row['std']
     if (x > mu + 3 * sigma) | (x < mu - 3 * sigma):
         return 1
     else:
         return 0
        
df_outliers['outlier'] = df_outliers.apply(indentify_outliers,axis=1)
outliers = df_outliers.loc[df_outliers['outlier'] == 1,['simple_rtn']]

In [None]:
fig, ax = plt.subplots(figsize=(17,5))
ax.plot(df_outliers.index, df_outliers.simple_rtn, color='black', label='Normal')
ax.scatter(outliers.index, outliers.simple_rtn, color='red', label='Anomaly')
ax.set_title("Google's stock returns")
ax.legend(loc='upper right')