In [1]:
import numpy as np
import pandas as pd
import pickle

In [2]:
def read_data(data_path):
    path = data_path
    ref_data = pd.read_csv(path)
    
    ref_data.columns = ref_data.columns.str.lower().\
        str.replace("\s*\(.*\)\s*", "", regex=True).\
            str.replace(' ', '_')
    return ref_data

In [3]:
def features_engineering(data: pd.DataFrame):
    '''transform/select features'''
    #from date column (string format) create new column with progressive day numbers (int type)
    data['day_number'] = pd.to_datetime(data['date'], dayfirst=True)
    data['day_number'] = (data['day_number'] - data['day_number'].min()).dt.days + 1
    data['day_number'] = data['day_number'].map({value: index+1 for index, value in enumerate(data['day_number'].unique())})

    #add a column with day of the week
    data['weekday'] = pd.to_datetime(data['date'], dayfirst=True).dt.strftime('%A')
    
    # qualitative maanual feature selection
    data = data[['temperature', 'humidity', 'hour', 'day_number', 
                         'rainfall', 'seasons', 'weekday', 'rented_bike_count']]
    return data

In [4]:
def main():
    '''
    save processed data
    '''
    ref_data_path = '../data/interim/ref_data.csv'
    ref_data = read_data(ref_data_path)
    ref_data_processed = features_engineering(ref_data)

    curr_data_path = '../data/interim/curr_data.csv'
    curr_data = read_data(curr_data_path)
    curr_data_processed = features_engineering(curr_data)
    
    ref_data_processed.to_csv('../data/processed/ref_data.csv', index=False)
    curr_data_processed.to_csv('../data/processed/curr_data.csv', index=False)    
    return curr_data_processed.shape, ref_data_processed.shape

In [5]:
main()

((2184, 8), (6576, 8))