# Histogram and Scatterplots

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt

def symbol_to_path(symbol, base_dir = "Data"):
    """Return CSV path with given ticker symbol"""
    return os.path.join(base_dir, "{}.csv".format(str(symbol)))
def get_data(symbols, dates):
    df = pd.DataFrame(index=dates)
    """Read stock data (adjusted close) for given symbols from CSV files."""
    if 'SPY' not in symbols:
        """add SPY for reference"""
        symbols.insert(0, 'SPY') 
        
    for symbol in symbols:
        df_temp = pd.read_csv(symbol_to_path(symbol), index_col = "Date", 
                          parse_dates = True, usecols = ['Date', 'Adj Close'], na_values = ['nan'])
        df_temp.rename(columns={'Adj Close': symbol}, inplace=True)
        """column name must be unique"""
        df = df.join(df_temp)
        df = df.dropna()
    
    return df

def plot_data(df, title="Stock prices"):
    """Plot stock prices with a custom title and meaningful axis labels."""
    ax = df.plot(title=title, fontsize=12)
    ax.set_xlabel("Date")
    ax.set_ylabel("Price")
    plt.show()

def daily_returns(df):
    daily_returns = (df/df.shift(1))-1
    daily_returns.iloc[0,:]=0
    return daily_returns
    
def test_run():
    # Define a date range
    sd,ed = '2020-01-30', '2020-07-30'
    dates = pd.date_range('2020-01-30', '2020-07-30')

    # Choose stock symbols to read
    symbols = ['AAPL', 'MFT', 'TSLA', 'GOOG']
    
    # Get stock data
    df = get_data(symbols, dates)
    
    # Slice and plot
    ax = df['SPY'].plot(title = "SPY rolling mean", label = 'SPY')
    
    rm_SPY = df['SPY'].rolling(window=20).mean()
    rm_SPY.plot(label="Rolling Mean",ax=ax)
    
    ax.set_xlabel("Date")
    ax.set_ylabel("Price")
    ax.legend(loc = 'upper left')
    plt.show()
    #define the window is 20 days so the first 20 days value are empty
    
def plot_selected(df, columns, start_index, end_index):
    """Plot the desired columns over index values in the given range."""
    df = normalize_data(df)
    plot_data(df.loc[start_index:end_index, columns])

def normalize_data(df):
    """Normalize stock prices using the first row of the dataframe."""
    return df / df.iloc[0, :]

if __name__ == "__main__":
    test_run()