In [18]:
from droughts_modelling.data import DataFunctions
from droughts_modelling.window_gen import WindowGenerator
from droughts_modelling.updated_DL_trainer import DeepLearning2
import tensorflow
from tensorflow.keras import models, layers
import os

In [3]:
test_data = DataFunctions().light_weekly_aggregate_test()

In [None]:
metawindow = WindowGenerator(test_data[test_data['fips_']==1001],input_width=6,label_width=6,shift=1,label_columns=["score_max"]).make_dataset()
for fips in set(test_data['fips_']):
    if fips != 1001:
        df = test_data[test_data['fips_'] == fips]
        window = WindowGenerator(df,input_width=6,label_width=6,shift=1,label_columns=["score_max"]).make_dataset()
        metawindow = metawindow.concatenate(window)
    
metawindow

In [None]:
metawindow

In [None]:
def initialize_model():
    model = models.Sequential()
    model.add(layers.LSTM(32,return_sequences=True,activation='tanh'))
    model.add(layers.Dense(1,activation='sigmoid'))
    model.compile(loss='binary_crossentropy',optimizer='rmsprop',metrics=['accuracy'])
    return model

In [None]:
model = initialize_model()

In [None]:
model.fit(metawindow,epochs=1,batch_size=32,verbose=1)

In [15]:
from sklearn.preprocessing import OneHotEncoder
from tensorflow.keras import models,layers
from sklearn.preprocessing import OneHotEncoder
from droughts_modelling.data import DataFunctions
from droughts_modelling.window_gen import WindowGenerator
import numpy as np

class DeepLearning2():
    
    def __init__(self):
        self.train_data = DataFunctions().light_weekly_aggregate_train()
        self.test_data = DataFunctions().light_weekly_aggregate_test()
        self.features = self.train_data.drop(columns=['fips_','year_','week_num_','score_max']).columns
    
    #Data Scaling: Train and Test
    def robust(self):
        train_df = self.train_data.copy()
        test_df = self.test_data.copy()
        for f in self.features:
            train_median = np.median(train_df[f])
            train_iqr = np.subtract(*np.percentile(train_df[f], [75, 25]))
            train_df[f] = train_df[f].map(lambda x: (x-train_median)/train_iqr)
            test_df[f] = test_df[f].map(lambda x: (x-train_median)/train_iqr)
            
        self.train_df_robust = train_df
        self.test_df_robust = test_df
        print('robust done')
    
    
    #One Hot Encoding: Train and Test
    
    def ohe(self):
        self.robust()
        train_df = self.train_df_robust.copy()
        test_df = self.test_df_robust.copy()
        
        train_ohe = OneHotEncoder(sparse = False)
        test_ohe = OneHotEncoder(sparse = False)
        
        train_ohe.fit(train_df[['score_max']])
        test_ohe.fit(test_df[['score_max']])
        
        scoremax_encoded_train = train_ohe.transform(train_df[['score_max']])
        scoremax_encoded_test = test_ohe.transform(test_df[['score_max']])
        
        train_df["score_max_0"],train_df["score_max_1"],train_df['score_max_2'],train_df['score_max_3'],train_df['score_max_4'],train_df['score_max_5'] = scoremax_encoded_train.T 
        test_df["score_max_0"],test_df["score_max_1"],test_df['score_max_2'],test_df['score_max_3'],test_df['score_max_4'],test_df['score_max_5'] = scoremax_encoded_test.T 
        
        self.train_df_robust_ohe = train_df.drop(columns=['score_max'])
        self.test_df_robust_ohe = test_df.drop(columns=['score_max'])
        print('ohe_done')
        
    #Generating Windows: Train and Test    
        
    def window(self):
        self.ohe()
        train_df = self.train_df_robust_ohe
        test_df = self.test_df_robust_ohe
        
        def fip_splitter(df):
            window = WindowGenerator(df[df['fips_'] == 1001],input_width=6,label_width=6,shift=1,label_columns=["score_max_0","score_max_1","score_max_2","score_max_3","score_max_4","score_max_5"]).make_dataset()
            for fips in set(df['fips_']):
                if fips != 1001:
                    fip_df = df[df['fips_'] == fips]
                    fip_window = WindowGenerator(fip_df,input_width=6,label_width=6,shift=1,label_columns=["score_max_0","score_max_1","score_max_2","score_max_3","score_max_4","score_max_5"]).make_dataset()
                    window = window.concatenate(fip_window)
            return window
            
        self.train_metawindow = fip_splitter(train_df)
        self.test_metawindow = fip_splitter(test_df)
        return self.test_metawindow
    
    
    #Model + evaluation
    def initialize_model(self):
        self.model = models.Sequential()
        self.model.add(layers.LSTM(32,return_sequences=True,activation='tanh'))
        self.model.add(layers.LSTM(32,return_sequences=True,activation='tanh'))
        self.model.add(layers.Dense(20,activation='relu'))
        self.model.add(layers.Dense(6,activation='softmax'))
        self.model.compile(loss='categorical_crossentropy',optimizer='rmsprop',metrics=['accuracy'])
        
    def train_evaluate_model(self):
        self.initialize_model()
        self.window()
        self.model.fit(self.train_metawindow,epochs=1,batch_size=32,verbose=0)
        self.model.evaluate(self.test_metawindow,verbose=0)

In [16]:
DeepLearning2().window()

robust done
ohe_done


<ConcatenateDataset shapes: ((None, 6, 29), (None, 6, 6)), types: (tf.float32, tf.float32)>

In [19]:
file_path = os.path.dirname(os.path.dirname(os.path.dirname(os.getcwd())))
train_full_path = os.path.join(file_path,'realGhostFoxx','droughts_modelling', 'raw_data', 'train_timeseries.csv')
validate_full_path = os.path.join(file_path,'realGhostFoxx','droughts_modelling', 'raw_data', 'validation_timeseries.csv')
test_full_path = os.path.join(file_path,'realGhostFoxx','droughts_modelling', 'raw_data', 'test_timeseries.csv')

In [20]:
file_path

'/Users/hughlupson/code'

In [21]:
train_full_path

'/Users/hughlupson/code/realGhostFoxx/droughts_modelling/raw_data/train_timeseries.csv'

In [22]:
validate_full_path

'/Users/hughlupson/code/realGhostFoxx/droughts_modelling/raw_data/validation_timeseries.csv'

In [23]:
test_full_path

'/Users/hughlupson/code/realGhostFoxx/droughts_modelling/raw_data/test_timeseries.csv'

In [24]:
file_path = os.path.dirname(os.path.dirname(os.path.dirname(os.getcwd())))
train_full_path = os.path.join(file_path,'realGhostFoxx','droughts_modelling', 'raw_data', 'train_timeseries.csv')


In [25]:
train_full_path

'/Users/hughlupson/code/realGhostFoxx/droughts_modelling/raw_data/train_timeseries.csv'

In [None]:
from sklearn.preprocessing import OneHotEncoder
from tensorflow.keras import models,layers
from sklearn.preprocessing import OneHotEncoder
from droughts_modelling.data import DataFunctions
from droughts_modelling.window_gen import WindowGenerator
import numpy as np

class DeepLearning2():
    
    def __init__(self):
        self.train_data = DataFunctions().light_weekly_aggregate_train()
        self.test_data = DataFunctions().light_weekly_aggregate_test()
        self.features = self.train_data.drop(columns=['fips_','year_','week_num_','score_max']).columns
    
    #Data Scaling: Train and Test
    def robust(self):
        train_df = self.train_data.copy()
        test_df = self.test_data.copy()
        for f in self.features:
            train_median = np.median(train_df[f])
            train_iqr = np.subtract(*np.percentile(train_df[f], [75, 25]))
            train_df[f] = train_df[f].map(lambda x: (x-train_median)/train_iqr)
            test_df[f] = test_df[f].map(lambda x: (x-train_median)/train_iqr)
            
        self.train_df_robust = train_df
        self.test_df_robust = test_df
    
    
    #One Hot Encoding: Train and Test
    
    def ohe(self):
        self.robust()
        train_df = self.train_df_robust.copy()
        test_df = self.test_df_robust.copy()
        
        train_ohe = OneHotEncoder(sparse = False)
        test_ohe = OneHotEncoder(sparse = False)
        
        train_ohe.fit(train_df[['score_max']])
        test_ohe.fit(test_df[['score_max']])
        
        scoremax_encoded_train = train_ohe.transform(train_df[['score_max']])
        scoremax_encoded_test = test_ohe.transform(test_df[['score_max']])
        
        train_df["score_max_0"],train_df["score_max_1"],train_df['score_max_2'],train_df['score_max_3'],train_df['score_max_4'],train_df['score_max_5'] = scoremax_encoded_train.T 
        test_df["score_max_0"],test_df["score_max_1"],test_df['score_max_2'],test_df['score_max_3'],test_df['score_max_4'],test_df['score_max_5'] = scoremax_encoded_test.T 
        
        self.train_df_robust_ohe = train_df.drop(columns=['score_max'])
        self.test_df_robust_ohe = test_df.drop(columns=['score_max'])
        
    #Generating Windows: Train and Test    
        
    def window(self):
        self.ohe()
        train_df = self.train_df_robust_ohe
        test_df = self.test_df_robust_ohe
        
        def fip_splitter(df):
            window = WindowGenerator(df[df['fips_'] == 1001],input_width=6,label_width=6,shift=1,label_columns=["score_max_0","score_max_1","score_max_2","score_max_3","score_max_4","score_max_5"]).make_dataset()
            for fips in set(df['fips_']):
                if fips != 1001:
                    fip_df = df[df['fips_'] == fips]
                    fip_window = WindowGenerator(fip_df,input_width=6,label_width=6,shift=1,label_columns=["score_max_0","score_max_1","score_max_2","score_max_3","score_max_4","score_max_5"]).make_dataset()
                    window = window.concatenate(fip_window)
            return window
            
        self.train_metawindow = fip_splitter(train_df)
        self.test_metawindow = fip_splitter(test_df)
        return self.test_metawindow
    
    #Model + evaluation
    def initialize_model(self):
        self.model = models.Sequential()
        self.model.add(layers.LSTM(32,return_sequences=True,activation='tanh'))
        self.model.add(layers.LSTM(32,return_sequences=True,activation='tanh'))
        self.model.add(layers.Dense(20,activation='relu'))
        self.model.add(layers.Dense(6,activation='softmax'))
        self.model.compile(loss='categorical_crossentropy',optimizer='rmsprop',metrics=['accuracy'])
        
    def train_evaluate_model(self):
        self.initialize_model()
        self.window()
        self.model.fit(self.train_metawindow,epochs=1,batch_size=32,verbose=0)
        self.model.evaluate(self.test_metawindow,verbose=0)
        
if __name__ == '__main__':
    DeepLearning2().train_evaluate_model()

In [None]:
import os
import pandas as pd
import numpy as np
import datetime as dt
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.tree import DecisionTreeClassifier
import ast

class DataFunctions:
    
    def __init__(self):
        file_path = os.path.dirname(os.path.dirname(os.path.dirname(os.getcwd())))
        full_path_train = os.path.join(file_path,'realGhostFoxx','droughts_modelling', 'raw_data', 'train_timeseries.csv')
        full_path_validate = os.path.join(file_path,'realGhostFoxx','droughts_modelling', 'raw_data', 'validation_timeseries.csv')
        full_path_test = os.path.join(file_path,'realGhostFoxx','droughts_modelling', 'raw_data', 'test_timeseries.csv')
        self.train_data = pd.read_csv(full_path_train)[2:]
        self.validation_data = pd.read_csv(full_path_validate)[1:]
        self.test_data = pd.read_csv(full_path_test)[6:]
        
        self.fips_path = os.path.join(file_path,'realGhostFoxx','droughts_modelling', 'raw_data', 'fips_dict.csv')
        self.fips_dict = pd.read_csv(self.fips_path,index_col=[0])
    
    def train_last_2_years(self):
        df = self.train_data
        df['date'] = pd.to_datetime(df['date'])
        temp_df = df[df['date'] >= '2015-01-01']
    
        return temp_df

    def weekly_aggregate(self):
        df = self.train_data
        
        #first create new features: month, weekday, weeknum
        df['year'] = pd.to_datetime(df['date']).dt.isocalendar().year
        df['week_num'] = pd.to_datetime(df['date']).dt.isocalendar().week
        
        #then encode the score as a new feature - not sure if we'll need it 
        df['score_day'] = df['score'].apply(lambda x: 'yes' if pd.notnull(x) == True else '')

        #then start aggregating by fips, month, week_num
        aggregated_data_train = df.groupby(['fips','year','week_num']).agg(
                                        {'PRECTOT': ['min', 'mean', 'std'],
                                        'PS': ['min', 'mean', 'std'],
                                        'QV2M': ['min', 'mean', 'std'],
                                        'T2M': ['min', 'mean', 'std'],
                                        'T2MDEW': ['min', 'mean', 'std'],
                                        'T2MWET': ['min', 'mean', 'std'],
                                        'T2M_MAX': ['min', 'mean', 'std'],
                                        'T2M_MIN': ['min', 'mean', 'std'],
                                        'T2M_RANGE': ['min', 'mean', 'std'],
                                         'TS': ['min', 'mean', 'std'],
                                         'WS10M': ['min', 'mean', 'std'],
                                         'WS10M_MAX': ['min', 'mean', 'std'],
                                         'WS10M_MIN': ['min', 'mean', 'std'],
                                         'WS10M_RANGE': ['min', 'mean', 'std'],
                                         'WS50M': ['min', 'mean', 'std'],
                                         'WS50M_MAX': ['min', 'mean', 'std'],
                                         'WS50M_MIN': ['min', 'mean', 'std'],
                                         'WS50M_RANGE': ['min', 'mean', 'std'],
                                         'score': 'max'}).reset_index().sort_values(['fips','year','week_num'])

        #finally, remove the multiindex from aggregated data_train so it looks neat and has flat column name structure
        #Then round scores to nearest integer
        aggregated_data_train.columns = ['_'.join(col) for col in aggregated_data_train.columns.values]
        aggregated_data_train['score_max'] = aggregated_data_train['score_max'].map(lambda x: np.round(x))
        
        return aggregated_data_train.dropna()
    
    def light_weekly_aggregate_train(self):
        df = self.train_data
        #first create new features: month, weekday, weeknum
        df['year'] = pd.to_datetime(df['date']).dt.isocalendar().year
        #first create new features: year, month, weekday, weeknum

        #first create new features: month, weekday, weeknum

        df['week_num'] = pd.to_datetime(df['date']).dt.isocalendar().week

        #then encode the score as a new feature - not sure if we'll need it 
        df['score_day'] = df['score'].apply(lambda x: 'yes' if pd.notnull(x) == True else '')

        #then start aggregating by fips, month, week_num
        aggregated_data_train = df.groupby(['fips','year','week_num']).agg(
                                        {'PRECTOT': ['mean'],
                                        'PS': ['mean'],
                                        'QV2M': ['mean'],
                                        'T2M': ['mean'],
                                        'T2MDEW': ['mean'],
                                        'T2MWET': ['mean'],
                                        'T2M_MAX': ['mean'],
                                        'T2M_MIN': ['mean'],
                                        'T2M_RANGE': ['mean'],
                                         'TS': ['mean'],
                                         'WS10M': ['mean'],
                                         'WS10M_MAX': ['mean'],
                                         'WS10M_MIN': ['mean'],
                                         'WS10M_RANGE': ['mean'],
                                         'WS50M': ['mean'],
                                         'WS50M_MAX': ['mean'],
                                         'WS50M_MIN': ['mean'],
                                         'WS50M_RANGE': ['mean'],
                                         'score': 'max'}).reset_index().sort_values(['fips','year','week_num'])

        #finally, remove the multiindex from aggregated data_train so it looks neat and has flat column name structure
        #Then round scores to nearest integer
        aggregated_data_train.columns = ['_'.join(col) for col in aggregated_data_train.columns.values]
        aggregated_data_train['score_max'] = aggregated_data_train['score_max'].map(lambda x: np.round(x))

        fips_dict = self.fips_dict.drop(columns=['COUNTYNAME',"STATE",'geom']).rename(columns={'fips':'fips_'})
        fips_dict["lat_long"] = fips_dict["lat_long"].transform(lambda x: ast.literal_eval(x))
        fips_dict["lat"] = pd.DataFrame(fips_dict["lat_long"].tolist())[0]
        fips_dict["long"] = pd.DataFrame(fips_dict["lat_long"].tolist())[1]
        fips_dict.drop(columns=["lat_long"],inplace=True)
        aggregated_data_train = pd.merge(aggregated_data_train,fips_dict, on=["fips_"], how="inner")
        aggregated_data_train = aggregated_data_train[['fips_', 'year_', 'week_num_', 'PRECTOT_mean', 'PS_mean', 'QV2M_mean',
       'T2M_mean', 'T2MDEW_mean', 'T2MWET_mean', 'T2M_MAX_mean',
       'T2M_MIN_mean', 'T2M_RANGE_mean', 'TS_mean', 'WS10M_mean',
       'WS10M_MAX_mean', 'WS10M_MIN_mean', 'WS10M_RANGE_mean', 'WS50M_mean',
       'WS50M_MAX_mean', 'WS50M_MIN_mean', 'WS50M_RANGE_mean','lat', 'long','score_max']]

        return aggregated_data_train.dropna()
    
    def light_weekly_aggregate_validate(self):
        df = self.validation_data
        #first create new features: month, weekday, weeknum
        df['year'] = pd.to_datetime(df['date']).dt.isocalendar().year
        df['week_num'] = pd.to_datetime(df['date']).dt.isocalendar().week

        #then encode the score as a new feature - not sure if we'll need it 
        df['score_day'] = df['score'].apply(lambda x: 'yes' if pd.notnull(x) == True else '')

        #then start aggregating by fips, month, week_num
        aggregated_data_validate = df.groupby(['fips','year','week_num']).agg(
                                        {'PRECTOT': ['mean'],
                                        'PS': ['mean'],
                                        'QV2M': ['mean'],
                                        'T2M': ['mean'],
                                        'T2MDEW': ['mean'],
                                        'T2MWET': ['mean'],
                                        'T2M_MAX': ['mean'],
                                        'T2M_MIN': ['mean'],
                                        'T2M_RANGE': ['mean'],
                                         'TS': ['mean'],
                                         'WS10M': ['mean'],
                                         'WS10M_MAX': ['mean'],
                                         'WS10M_MIN': ['mean'],
                                         'WS10M_RANGE': ['mean'],
                                         'WS50M': ['mean'],
                                         'WS50M_MAX': ['mean'],
                                         'WS50M_MIN': ['mean'],
                                         'WS50M_RANGE': ['mean'],
                                         'score': 'max'}).reset_index().sort_values(['fips','year','week_num'])

        #finally, remove the multiindex from aggregated data_train so it looks neat and has flat column name structure
        #Then round scores to nearest integer
        aggregated_data_validate.columns = ['_'.join(col) for col in aggregated_data_validate.columns.values]
        aggregated_data_validate['score_max'] = aggregated_data_validate['score_max'].map(lambda x: np.round(x))
        
        fips_dict = self.fips_dict.drop(columns=['COUNTYNAME',"STATE",'geom']).rename(columns={'fips':'fips_'})
        fips_dict["lat_long"] = fips_dict["lat_long"].transform(lambda x: ast.literal_eval(x))
        fips_dict["lat"] = pd.DataFrame(fips_dict["lat_long"].tolist())[0]
        fips_dict["long"] = pd.DataFrame(fips_dict["lat_long"].tolist())[1]
        fips_dict.drop(columns=["lat_long"],inplace=True)
        aggregated_data_validate = pd.merge(aggregated_data_validate,fips_dict, on=["fips_"], how="inner")
        aggregated_data_validate = aggregated_data_validate[['fips_', 'year_', 'week_num_', 'PRECTOT_mean', 'PS_mean', 'QV2M_mean',
       'T2M_mean', 'T2MDEW_mean', 'T2MWET_mean', 'T2M_MAX_mean',
       'T2M_MIN_mean', 'T2M_RANGE_mean', 'TS_mean', 'WS10M_mean',
       'WS10M_MAX_mean', 'WS10M_MIN_mean', 'WS10M_RANGE_mean', 'WS50M_mean',
       'WS50M_MAX_mean', 'WS50M_MIN_mean', 'WS50M_RANGE_mean','lat', 'long','score_max']]

        return aggregated_data_validate.dropna()

    def light_weekly_aggregate_test(self):
        df = self.test_data
        #first create new features: month, weekday, weeknum
        df['year'] = pd.to_datetime(df['date']).dt.isocalendar().year
        df['week_num'] = pd.to_datetime(df['date']).dt.isocalendar().week

        #then encode the score as a new feature - not sure if we'll need it 
        df['score_day'] = df['score'].apply(lambda x: 'yes' if pd.notnull(x) == True else '')

        #then start aggregating by fips, month, week_num
        aggregated_data_test = df.groupby(['fips','year','week_num']).agg(
                                        {'PRECTOT': ['mean'],
                                        'PS': ['mean'],
                                        'QV2M': ['mean'],
                                        'T2M': ['mean'],
                                        'T2MDEW': ['mean'],
                                        'T2MWET': ['mean'],
                                        'T2M_MAX': ['mean'],
                                        'T2M_MIN': ['mean'],
                                        'T2M_RANGE': ['mean'],
                                         'TS': ['mean'],
                                         'WS10M': ['mean'],
                                         'WS10M_MAX': ['mean'],
                                         'WS10M_MIN': ['mean'],
                                         'WS10M_RANGE': ['mean'],
                                         'WS50M': ['mean'],
                                         'WS50M_MAX': ['mean'],
                                         'WS50M_MIN': ['mean'],
                                         'WS50M_RANGE': ['mean'],
                                         'score': 'max'}).reset_index().sort_values(['fips','year','week_num'])

        #finally, remove the multiindex from aggregated data_train so it looks neat and has flat column name structure
        #Then round scores to nearest integer
        aggregated_data_test.columns = ['_'.join(col) for col in aggregated_data_test.columns.values]
        aggregated_data_test['score_max'] = aggregated_data_test['score_max'].map(lambda x: np.round(x))

        fips_dict = self.fips_dict.drop(columns=['COUNTYNAME',"STATE",'geom']).rename(columns={'fips':'fips_'})
        fips_dict["lat_long"] = fips_dict["lat_long"].transform(lambda x: ast.literal_eval(x))
        fips_dict["lat"] = pd.DataFrame(fips_dict["lat_long"].tolist())[0]
        fips_dict["long"] = pd.DataFrame(fips_dict["lat_long"].tolist())[1]
        fips_dict.drop(columns=["lat_long"],inplace=True)
        aggregated_data_test = pd.merge(aggregated_data_test,fips_dict, on=["fips_"], how="inner")
        aggregated_data_test = aggregated_data_test[['fips_', 'year_', 'week_num_', 'PRECTOT_mean', 'PS_mean', 'QV2M_mean',
       'T2M_mean', 'T2MDEW_mean', 'T2MWET_mean', 'T2M_MAX_mean',
       'T2M_MIN_mean', 'T2M_RANGE_mean', 'TS_mean', 'WS10M_mean',
       'WS10M_MAX_mean', 'WS10M_MIN_mean', 'WS10M_RANGE_mean', 'WS50M_mean',
       'WS50M_MAX_mean', 'WS50M_MIN_mean', 'WS50M_RANGE_mean','lat', 'long','score_max']]

        return aggregated_data_test.dropna()

   
    def k_best_features(self):
        df = self.light_weekly_aggregate()
    
        y = round(df['score_max'])
        X = df.drop(columns=['fips_','week_num_','score_max'])
        
        k_best_f = SelectKBest(f_classif, k=10).fit(X, y)
        df_scores = pd.DataFrame({'features': X.columns, 'ANOVA F-value': k_best_f.scores_, 'pValue': k_best_f.pvalues_ })

        return df_scores.sort_values('ANOVA F-value', ascending=False).reset_index()
    
    def tree_feature_importance(self):
        df = self.light_weekly_aggregate()
    
        y = round(df['score_max'])
        X = df.drop(columns=['fips_','week_num_','score_max'])
        
        tree_clf = DecisionTreeClassifier(max_depth=6, random_state=2)
        tree_clf.fit(X,y)

        return pd.DataFrame({'features': X.columns, 'Feature Importance': tree_clf.feature_importances_})\
            .sort_values('Feature Importance', ascending=False).iloc[:20]
            
    def return_lagged_function(self, weeks_back=5):
        
        df = self.light_weekly_aggregate()
        
        top_features = ['T2M_RANGE_mean', 'PS_mean', 'T2M_MAX_mean', 'TS_mean', 
                        'T2MDEW_mean', 'QV2M_mean', 'WS10M_MAX_mean', 'PRECTOT_mean']
        
        all_features = [i for i in df.columns if i in top_features or i in ['fips_', 'year_', 'week_num_']]
        
        df_processed = df[all_features]
        
        for e in top_features:
            for i in range(1, weeks_back):
                df_processed[f'{e} - {i}'] = df_processed.groupby(['fips_'])[f'{e}'].shift(i)
                
    
       #return df_processed 

