In [2]:
import pandas as pd
import numpy as np

In [3]:
df = pd.read_csv("Merged_DataFrame.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,Date,Open,Close,High,Low,Volume,Symbol,tt1qrt_Date,tt1qrt_Open,...,tt1w_Close,tt1w_High,tt1w_Low,tt1w_Volume,tt2d_Date,tt2d_Open,tt2d_Close,tt2d_High,tt2d_Low,tt2d_Volume
0,0,19980102,41.09375,41.75,41.8125,40.875,2573000,MMM,19980102.0,41.09375,...,41.75,41.8125,40.875,2573000.0,19980102.0,41.09375,41.6875,42.625,40.875,5078600.0
1,1,19980105,41.5,41.6875,42.625,41.34375,2505600,MMM,19980102.0,41.09375,...,41.3125,42.875,41.0,18555000.0,19980102.0,41.09375,41.6875,42.625,40.875,5078600.0
2,2,19980106,41.625,41.8125,42.875,41.5,3620200,MMM,19980102.0,41.09375,...,41.3125,42.875,41.0,18555000.0,19980106.0,41.625,42.15625,42.875,41.375,6115000.0
3,3,19980107,41.90625,42.15625,42.3125,41.375,2494800,MMM,19980102.0,41.09375,...,41.3125,42.875,41.0,18555000.0,19980106.0,41.625,42.15625,42.875,41.375,6115000.0
4,4,19980108,42.0,41.65625,42.0,41.125,1951400,MMM,19980102.0,41.09375,...,41.3125,42.875,41.0,18555000.0,19980108.0,42.0,41.3125,42.0,41.0,9934400.0


In [29]:
# Simple Moving Average
def SimpleMovingAvgClassicUpdater(data, price, length, starting_index = 0):
    symbols = data["Symbol"].unique()
    for _ in symbols:
        sma = []
        # Get the values for the current symbol
        values = data[data['Symbol'] == _][price].values
        # Loop over the values and calculate the moving average
        for i in range(starting_index, len(values)):
            # If there are not enough values to calculate the moving average, put nan and skip this iteration
            if ((i < length) or (str(values[i]) == "nan")):
                sma.append(np.nan)
            else:
                 # if the current price value is the same as the previous price value, use the same SMA value
                if values[i] == values[i - 1]:
                    sma.append(sma[i - 1])
                else:
                    # Calculate the moving average by taking the average of the previous "length" values
                    avg = sum(values[i-length:i]) / length
                    sma.append(avg)
    return sma

# Original updater
def SimpleMovingAvgUpdater(data, price, length, range_of_values):
    # Get the index of the last data point
    last_index = data.index[-1]
    # Calculate the starting and ending indexes for the moving average calculation
    start_index = last_index - range_of_values + 1
    end_index = last_index
    # Calculate the moving average using the SimpleMovingAvg function
    sma = SimpleMovingAvg(data_range, price, length, start_index)
    data.loc[start_index:end_index+1, price] = sma

# Does not call the classic function
def SimpleMovingAvgUpdater(data, price, length, column):
    # Get the index of the last data point
    last_index = data.index[-1]
    # Loop through the data starting from the first empty cell in the "column" column
    for i in range(last_index + 1):
        # If the current cell is not empty, skip this iteration
        if not pd.isnull(data.loc[i, column]):
            continue
        # If there are not enough values to calculate the moving average, skip this iteration
        if i < length:
            continue
        # Calculate the moving average by taking the average of the previous "length" values
        avg = sum(data.loc[i-length:i, price].values) / length
        # Update the current cell with the calculated moving average
        data.loc[i, column] = avg
    return data


#I prefer this one because it calls the classic function
def SimpleMovingAvgUpdater(data, price, length, column):
    # Get the index of the last data point
    last_index = data.index[-1]
    # Loop through the data starting from the first empty cell in the "column" column
    for i in range(last_index + 1):
        # If the current cell is not empty, skip this iteration
        if not pd.isnull(data.loc[i, column]):
            continue
        # If there are not enough values to calculate the moving average, skip this iteration
        if i < length:
            continue
        # Slice the dataframe to include only the data within the specified range
        data_range = data.iloc[i-length:i+1]
        # Calculate the moving average using the SimpleMovingAvg function
        sma = SimpleMovingAvg(data_range, price, length)
        # Update the current cell with the calculated moving average
        data.loc[i, column] = sma[0]
    return data

### SMA Running on all data.

In [30]:
#Arguments: df, price column on which moving average is applied, length (PercentDLength)
df["SimpleMovingAvgClassic"] = SimpleMovingAvgClassicUpdater(df, "Close", 5)
df["SimpleMovingAvgClassic"]

0           NaN
1           NaN
2           NaN
3           NaN
4           NaN
         ...   
5945    201.030
5946    201.090
5947    200.078
5948    198.864
5949    197.580
Name: SimpleMovingAvgClassic, Length: 5950, dtype: float64

In [31]:
#Removing the last 5 moving average values intentionally so that we can call the updater function and fix it.
df.loc[5945:5949, "SimpleMovingAvgClassic"] = np.nan
df["SimpleMovingAvgClassic"]

0      NaN
1      NaN
2      NaN
3      NaN
4      NaN
        ..
5945   NaN
5946   NaN
5947   NaN
5948   NaN
5949   NaN
Name: SimpleMovingAvgClassic, Length: 5950, dtype: float64

### Updater - SMA Running on only the range of new data provided. Optimized Version.

In [32]:
#Arguments: df, price column on which moving average is applied, length (PercentDLength), Range (Values to be updated)
df_last_index = df.index[-1]
range_of_values = 5
starting_index = df_last_index - range_of_values + 1
df.loc[starting_index:df_last_index, "SimpleMovingAvgClassic"] = SimpleMovingAvgClassicUpdater(df, "Close", 5, starting_index)
df["SimpleMovingAvgClassic"]

0           NaN
1           NaN
2           NaN
3           NaN
4           NaN
         ...   
5945    201.030
5946    201.090
5947    200.078
5948    198.864
5949    197.580
Name: SimpleMovingAvgClassic, Length: 5950, dtype: float64

RangeIndex(start=0, stop=5950, step=1)