In [2]:
# Required modules are imported
from pathlib import Path
import pandas as pd
import glob
from pandas.tseries.offsets import DateOffset

# All CSV file paths from the '5Stocks' folder in Downloads are collected
def locate(loc):
    return glob.glob(f'{loc}/*.csv')

# Each CSV file is read into a DataFrame
# 'Date' is used as the index and is parsed as a datetime object
# A dictionary of DataFrames is created, keyed by the filename
# All the individual DataFrames are concatenated into a single MultiIndex DataFrame
# The index levels are set as 'ticker' and 'Date'
def joiner(loc):
    sepdfs = {ind.split('/')[-1].split('.')[0]: pd.read_csv(ind, index_col='Date', parse_dates=True)for ind in locate(loc)}
    return pd.concat(sepdfs, names=['ticker'])

# The data is filtered to include only rows from the year 2007 onwards
def sieve(loc):
    dates = joiner(loc).index.get_level_values('Date')
    only10 = dates.year >= 2007
    return joiner(loc)[only10].sort_index()

# Missing 'Close' values are handled
# Short gaps (3 days or fewer) are interpolated linearly and remaining missing values (in longer gaps) are back-filled
def fillin(loc):
    stocks10y=sieve(loc)
    g = stocks10y['Close'].isna().cumsum().where(stocks10y['Close'].isna())
    stocks10y['Close'] = stocks10y['Close'].where(g.map(g.value_counts()) > 3,stocks10y['Close'].interpolate('linear'))
    stocks10y['Close'] = stocks10y['Close'].bfill()
    return stocks10y

# Markers like the 7-day moving average(closing price), 30-day moving average(closing price), Daily percentage return, 30-day rolling volatility (standard deviation of returns) of the stocks are calculated.
def markers(loc):
    onekey = (fillin(loc).reset_index().sort_values(by=['ticker', 'Date', 'Close']))
    onekey['7DMA'] = (onekey.groupby('ticker')['Close'].rolling(window=7, min_periods=0).mean().reset_index(level=0, drop=True))
    onekey['30DMA'] = (onekey.groupby('ticker')['Close'].rolling(window=30, min_periods=0).mean().reset_index(level=0, drop=True))
    onekey['Return'] = (onekey.groupby('ticker')['Close'].pct_change() * 100)
    onekey['RV30'] = (onekey.groupby('ticker')['Return'].rolling(window=30, min_periods=0).std().reset_index(level=0, drop=True))
    return onekey

# The DataFrame is converted back to a MultiIndexed format and things like the most volatile 30-day period, stock with the highest average return are identified and summary statistics are printed.
def results(loc):
    stocksf = markers(loc).set_index(['ticker', 'Date'])
    onekey=markers(loc)
    onlypost30days = onekey[onekey.groupby('ticker').cumcount() >= 29]
    maxvol = onlypost30days.loc[onlypost30days['RV30'].idxmax()]
    avret = onekey.groupby('ticker')['Return'].mean()
    retstock = avret.idxmax()
    maxavret = avret.max()
    print(f' Most Volatile Month for: {maxvol["ticker"]}\n',f'Most Volatile Month: {(maxvol["Date"] - pd.Timedelta(days=29)).date()} to {maxvol["Date"].date()}\n',f'Maximum Rolling Volatility: {maxvol["RV30"]}')
    print(f' Stock with Highest Average Return: {retstock}\n',f'Highest Average Return: {maxavret}')

print(results(Path.home() / 'Downloads' / '5Stocks'))
#The argument is where your 5 stock .csvs are stored in your computer.

 Most Volatile Month for: amzn
 Most Volatile Month: 2008-09-29 to 2008-10-28
 Maximum Rolling Volatility: 6.942334931099093
 Stock with Highest Average Return: amzn
 Highest Average Return: 0.15461187733581844
None
