# Processing & Visualization Functions

In [5]:
def get_codes_data(df, codes):
    """
    Filter DataFrame to show certain zipcodes.
    
    Arguments:
    df -- cleaned Pandas DataFrame in Long format.
    codes -- list of zipcodes to keep in output DataFrame.
    
    Return:
    Pandas DataFrame containing data for the zipcodes contained in 'codes'
    """
    #Filter df to keep data for selected zipcodes
    top_df = df[df['zipcode'].isin(codes)]
    
    #Filter out data from 2020.
    top_df = df.loc[df['time'].dt.year != 2020]
    
    #Set time as index
    top_df.set_index('time', inplace=True)
    
    #Drop unneeded columns
    top_df = top_df[['zipcode', 'value']]
    
    return top_df

In [6]:
def split_data_by_code(top_df, codes):
    """
    Split DataFrame into smaller DataFrames, each containing data for one zipcode.
    
    Arguments:
    top_df -- DataFrame containing data for zipcodes you want to separate.
    codes -- list of zipcodes.  A DataFrame will be created for each item in the list.
    
    Return:
    List of DataFrames where each DataFrame corresponds to data from one zipcode contained in 'codes'
    """
    df_list = []
    for i in range(len(codes)):
        df_list.append(top_df.loc[top_df['zipcode'] == codes[i]].drop('zipcode', axis=1))
        
    return df_list

In [7]:
def ts_train_test_split(df_list, train_percent):
    """
    Split each DataFrame in df_list into train and test sets.
    
    Arguments:
    df_list -- list of DataFrames, each containing data for one zipcode
    train_percent -- float representing the % of data that should be allocated to the training set.
    
    Return:
    train_list -- list of DataFrames, each containing training data for one zipcode
    test_list -- list of DataFrames, each containing test data for one zipcode
    """
    train_list = []
    test_list = []
    
    for i in range(len(df_list)):
        train = df_list[i][:round(df_list[i].shape[0]*train_percent)]
        train_list.append(train)
        
        test = df_list[i][round(df_list[i].shape[0]*train_percent):]
        test_list.append(test)
    
    return train_list, test_list

In [8]:
def plot_trends(df_list, codes):
    """
    Plot line graphs for DataFrames contained in df_list.
    
    Arguments:
    df_list -- list of DataFrames, each containing data for one zipcode
    codes -- list of zipcodes, each corresponding to one element in 'df_list'
    
    Return:
    No return value.  Prints a line graph.
    """
    for i in range(len(codes)):
        df_list[i]['value'].plot(label=codes[i], figsize=(15, 6))
        plt.legend()