In [1]:
import pandas as pd
import numpy as np # We need numpy for the next steps

# Load the dataset we created earlier
df = pd.read_csv('data/market_prices.csv')
df['date'] = pd.to_datetime(df['date'])


def create_features(df):
    """
    This function creates 'smart features' from basic data
    """
    df = df.sort_values(['market_name', 'commodity', 'date'])

    # Price history features (what happened before?)
    df['price_yesterday'] = df.groupby(['market_name', 'commodity'])['modal_price'].shift(1)
    df['price_last_week'] = df.groupby(['market_name', 'commodity'])['modal_price'].shift(7)
    df['price_avg_7days'] = df.groupby(['market_name', 'commodity'])['modal_price'].shift(1).rolling(7).mean()

    # Calendar features (when is it?)
    df['day_of_week'] = df['date'].dt.dayofweek # Monday=0, Sunday=6
    df['month'] = df['date'].dt.month
    df['is_weekend'] = df['day_of_week'].isin([5, 6]).astype(int)

    # Supply features (how much is available?)
    df['arrivals_yesterday'] = df.groupby(['market_name', 'commodity'])['arrivals_qty'].shift(1)

    # Remove rows with missing values that were created by the shift() function
    df = df.dropna()

    return df

# Apply feature engineering
df_features = create_features(df.copy()) # Use a copy to be safe

# Display the new columns and the first few rows of the enhanced dataframe
print("Features created! New columns:")
new_cols = [col for col in df_features.columns if col not in df.columns]
print(new_cols)

print("\nData with new features:")
print(df_features.head())

Features created! New columns:
['price_yesterday', 'price_last_week', 'price_avg_7days', 'day_of_week', 'month', 'is_weekend', 'arrivals_yesterday']

Data with new features:
          date  market_name  market_lat  market_lon commodity  modal_price  \
64  2023-01-08  Delhi_Mandi        28.6        77.2     Onion  2421.360880   
73  2023-01-09  Delhi_Mandi        28.6        77.2     Onion  1850.567540   
82  2023-01-10  Delhi_Mandi        28.6        77.2     Onion  2170.927819   
91  2023-01-11  Delhi_Mandi        28.6        77.2     Onion  2117.609937   
100 2023-01-12  Delhi_Mandi        28.6        77.2     Onion  1746.751721   

     arrivals_qty  price_yesterday  price_last_week  price_avg_7days  \
64            336      1894.768124      2307.649023      1942.218552   
73            917      2421.360880      1908.182963      1958.463103   
82            174      1850.567540      1669.136138      1950.232328   
91            210      2170.927819      2416.706548      2021.916854 