# Filling Missing Data

In [1]:
import pandas as pd
import numpy as np

# Sample time-series data
dates = pd.date_range(start="2024-01-01", periods=10, freq='D')
data = {'Date': dates, 'Stock_Price': [100, 102, np.nan, 105, 107, np.nan, 110, 112, 113, np.nan]}
df = pd.DataFrame(data).set_index('Date')

# method for forward fill
df['Stock_Price_Filled'] = df['Stock_Price'].ffill()

# Filling missing values using interpolation
df['Stock_Price_Interpolated'] = df['Stock_Price'].interpolate()

print(df)


            Stock_Price  Stock_Price_Filled  Stock_Price_Interpolated
Date                                                                 
2024-01-01        100.0               100.0                     100.0
2024-01-02        102.0               102.0                     102.0
2024-01-03          NaN               102.0                     103.5
2024-01-04        105.0               105.0                     105.0
2024-01-05        107.0               107.0                     107.0
2024-01-06          NaN               107.0                     108.5
2024-01-07        110.0               110.0                     110.0
2024-01-08        112.0               112.0                     112.0
2024-01-09        113.0               113.0                     113.0
2024-01-10          NaN               113.0                     113.0


# Creating Lag Features

In [2]:
df['Lag_1'] = df['Stock_Price_Filled'].shift(1)
df['Lag_2'] = df['Stock_Price_Filled'].shift(2)
df['Lag_3'] = df['Stock_Price_Filled'].shift(3)
print(df)

            Stock_Price  Stock_Price_Filled  Stock_Price_Interpolated  Lag_1  \
Date                                                                           
2024-01-01        100.0               100.0                     100.0    NaN   
2024-01-02        102.0               102.0                     102.0  100.0   
2024-01-03          NaN               102.0                     103.5  102.0   
2024-01-04        105.0               105.0                     105.0  102.0   
2024-01-05        107.0               107.0                     107.0  105.0   
2024-01-06          NaN               107.0                     108.5  107.0   
2024-01-07        110.0               110.0                     110.0  107.0   
2024-01-08        112.0               112.0                     112.0  110.0   
2024-01-09        113.0               113.0                     113.0  112.0   
2024-01-10          NaN               113.0                     113.0  113.0   

            Lag_2  Lag_3  
Date        

# Calculating Moving Averages and Standard Deviation

In [3]:
df['Rolling_Mean_3'] = df['Stock_Price_Filled'].rolling(window=3).mean()
df['Rolling_Std_3'] = df['Stock_Price_Filled'].rolling(window=3).std()
print(df)

            Stock_Price  Stock_Price_Filled  Stock_Price_Interpolated  Lag_1  \
Date                                                                           
2024-01-01        100.0               100.0                     100.0    NaN   
2024-01-02        102.0               102.0                     102.0  100.0   
2024-01-03          NaN               102.0                     103.5  102.0   
2024-01-04        105.0               105.0                     105.0  102.0   
2024-01-05        107.0               107.0                     107.0  105.0   
2024-01-06          NaN               107.0                     108.5  107.0   
2024-01-07        110.0               110.0                     110.0  107.0   
2024-01-08        112.0               112.0                     112.0  110.0   
2024-01-09        113.0               113.0                     113.0  112.0   
2024-01-10          NaN               113.0                     113.0  113.0   

            Lag_2  Lag_3  Rolling_Mean_

# Rolling Window Features

In [4]:
df['Differenced'] = df['Stock_Price_Filled'].diff()
print(df)

            Stock_Price  Stock_Price_Filled  Stock_Price_Interpolated  Lag_1  \
Date                                                                           
2024-01-01        100.0               100.0                     100.0    NaN   
2024-01-02        102.0               102.0                     102.0  100.0   
2024-01-03          NaN               102.0                     103.5  102.0   
2024-01-04        105.0               105.0                     105.0  102.0   
2024-01-05        107.0               107.0                     107.0  105.0   
2024-01-06          NaN               107.0                     108.5  107.0   
2024-01-07        110.0               110.0                     110.0  107.0   
2024-01-08        112.0               112.0                     112.0  110.0   
2024-01-09        113.0               113.0                     113.0  112.0   
2024-01-10          NaN               113.0                     113.0  113.0   

            Lag_2  Lag_3  Rolling_Mean_

# Scaling Time-Series Data

In [5]:
from sklearn.preprocessing import MinMaxScaler, StandardScaler

scaler_minmax = MinMaxScaler()
df['Scaled_MinMax'] = scaler_minmax.fit_transform(df[['Stock_Price_Filled']])

scaler_standard = StandardScaler()
df['Scaled_Standard'] = scaler_standard.fit_transform(df[['Stock_Price_Filled']])
print(df)

            Stock_Price  Stock_Price_Filled  Stock_Price_Interpolated  Lag_1  \
Date                                                                           
2024-01-01        100.0               100.0                     100.0    NaN   
2024-01-02        102.0               102.0                     102.0  100.0   
2024-01-03          NaN               102.0                     103.5  102.0   
2024-01-04        105.0               105.0                     105.0  102.0   
2024-01-05        107.0               107.0                     107.0  105.0   
2024-01-06          NaN               107.0                     108.5  107.0   
2024-01-07        110.0               110.0                     110.0  107.0   
2024-01-08        112.0               112.0                     112.0  110.0   
2024-01-09        113.0               113.0                     113.0  112.0   
2024-01-10          NaN               113.0                     113.0  113.0   

            Lag_2  Lag_3  Rolling_Mean_

# Creating Supervised Data

In [6]:
def create_supervised(df, target_col, n_lags=3):
    supervised_df = df.copy()
    for i in range(1, n_lags + 1):
        supervised_df[f'Lag_{i}'] = df[target_col].shift(i)
    supervised_df.dropna(inplace=True)
    return supervised_df

supervised_data = create_supervised(df, 'Stock_Price_Filled', n_lags=5)
print(supervised_data)

            Stock_Price  Stock_Price_Filled  Stock_Price_Interpolated  Lag_1  \
Date                                                                           
2024-01-07        110.0               110.0                     110.0  107.0   
2024-01-08        112.0               112.0                     112.0  110.0   
2024-01-09        113.0               113.0                     113.0  112.0   

            Lag_2  Lag_3  Rolling_Mean_3  Rolling_Std_3  Differenced  \
Date                                                                   
2024-01-07  107.0  105.0      108.000000       1.732051          3.0   
2024-01-08  107.0  107.0      109.666667       2.516611          2.0   
2024-01-09  110.0  107.0      111.666667       1.527525          1.0   

            Scaled_MinMax  Scaled_Standard  Lag_4  Lag_5  
Date                                                      
2024-01-07       0.769231         0.634496  102.0  102.0  
2024-01-08       0.923077         1.072079  105.0  102.0  
20

# Adding Time-Based Features

In [7]:
df['Day_of_Week'] = df.index.dayofweek
df['Month'] = df.index.month
df['Year'] = df.index.year
print(df[['Day_of_Week', 'Month', 'Year']])

            Day_of_Week  Month  Year
Date                                
2024-01-01            0      1  2024
2024-01-02            1      1  2024
2024-01-03            2      1  2024
2024-01-04            3      1  2024
2024-01-05            4      1  2024
2024-01-06            5      1  2024
2024-01-07            6      1  2024
2024-01-08            0      1  2024
2024-01-09            1      1  2024
2024-01-10            2      1  2024
