## <span style="color:#ff5f27">üìù Imports </span>

In [None]:
import datetime
from features.price import generate_historical_data, to_wide_format, plot_historical_id
from features.averages import calculate_second_order_features

import great_expectations as ge
from great_expectations.core import ExpectationSuite, ExpectationConfiguration

import warnings
warnings.filterwarnings('ignore')

## <span style="color:#ff5f27">‚öôÔ∏è Data Generation </span>

Let's define the `START_DATE` variable (format: %Y-%m-%d) which will indicate the start date for data generation.

In [None]:
# Define a constant START_DATE with a specific date (January 1, 2024)
START_DATE = datetime.date(2024, 1, 1)

In [None]:
# Generate synthetic historical data using the generate_historical_data function from START_DATE till current date
prices_data_generated = generate_historical_data(
    START_DATE,  # Start date for data generation (January 1, 2024)
)

# Display the first 3 rows of the generated data
prices_data_generated.head(3)

Look at historical values for 1 and 2 IDs.

In [None]:
plot_historical_id([1,2], prices_data_generated)

## <span style="color:#ff5f27"> üëÆüèª‚Äç‚ôÇÔ∏è Great Expectations </span>

In [None]:
# Convert the generated historical data DataFrame to a Great Expectations DataFrame
ge_price_df = ge.from_pandas(prices_data_generated)

# Retrieve the expectation suite associated with the ge DataFrame
expectation_suite_price = ge_price_df.get_expectation_suite()

# Set the expectation suite name to "price_suite"
expectation_suite_price.expectation_suite_name = "price_suite"

In [None]:
# Add expectation for the 'id' column values to be between 0 and 5000
expectation_suite_price.add_expectation(
    ExpectationConfiguration(
        expectation_type="expect_column_values_to_be_between",
        kwargs={
            "column": "id",
            "min_value": 0,
            "max_value": 5000,
        }
    )
)

# Add expectation for the 'price' column values to be between 0 and 1000
expectation_suite_price.add_expectation(
    ExpectationConfiguration(
        expectation_type="expect_column_values_to_be_between",
        kwargs={
            "column": "price",
            "min_value": 0,
            "max_value": 1000,
        }
    )
)

# Loop through specified columns ('date', 'id', 'price') and add expectations for null values
for column in ['date', 'id', 'price']:
    expectation_suite_price.add_expectation(
        ExpectationConfiguration(
            expectation_type="expect_column_values_to_be_null",
            kwargs={
                "column": column,
                "mostly": 0.0,
            }
        )
    )


## <span style="color:#ff5f27">‚öôÔ∏è Feature Engineering  </span>

We will engineer the next features:

- `ma_7`: This feature represents the 7-day moving average of the 'price' data, providing a smoothed representation of short-term price trends.

- `ma_14`: This feature represents the 14-day moving average of the 'price' data, offering a slightly longer-term smoothed price trend.

- `ma_30`: This feature represents the 30-day moving average of the 'price' data, providing a longer-term smoothed representation of price trends.

- `daily_rate_of_change`: This feature calculates the daily rate of change in prices as a percentage change, indicating how much the price has changed from the previous day.

- `volatility_30_day`: This feature measures the volatility of prices over a 30-day window using the standard deviation. Higher values indicate greater price fluctuations.

- `ema_02`: This feature calculates the exponential moving average (EMA) of 'price' with a smoothing factor of 0.2, giving more weight to recent data points in the calculation.

- `ema_05`: Similar to ema_02, this feature calculates the EMA of 'price' with a smoothing factor of 0.5, providing a different degree of responsiveness to recent data.

- `rsi`: The Relative Strength Index (RSI) is a momentum oscillator that measures the speed and change of price movements. It ranges from 0 to 100, with values above 70 indicating overbought conditions and values below 30 indicating oversold conditions.

In [None]:
# Calculate second-order features
averages_df = calculate_second_order_features(prices_data_generated)

# Display the first 3 rows of the resulting DataFrame
averages_df.head(3)

## <span style="color:#ff5f27">üîÆ Connect to Hopsworks Feature Store </span>

In [None]:
import hopsworks

project = hopsworks.login()

fs = project.get_feature_store() 

## <span style="color:#ff5f27">ü™Ñ Prices Feature Group </span>

In [None]:
# Get or create the 'price' feature group
prices_fg = fs.get_or_create_feature_group(
    name='prices',
    description='Price Data',
    version=1,
    primary_key=['id'],
    event_time='date',
    online_enabled=True,
    expectation_suite=expectation_suite_price,
)    
# Insert data
prices_fg.insert(prices_data_generated)
print('‚úÖ Done!')

## <span style="color:#ff5f27">ü™Ñ Averages Feature Group </span>

In [None]:
# Get or create the 'averages' feature group
averages_fg = fs.get_or_create_feature_group(
    name='averages',
    description='Calculated second order features',
    version=1,
    primary_key=['id'],
    event_time='date',
    online_enabled=True,
    parents=[prices_fg],
)
# Insert data
averages_fg.insert(averages_df, wait=True)

## <span style="color:#ff5f27">üïµüèª‚Äç‚ôÇÔ∏è Feature monitoring</span>


In [None]:
# Raw Price Monitoring
price_basic_monitoring = prices_fg.create_feature_monitoring(
    name="price_basic_monitoring",
    feature_name="price",
    description="Monitor daily price statistics and detect sudden changes",
    cron_expression="0 0 0 * * ? *"  # Daily at midnight (second, minute, hour, day, month, day-of-week, year)
).with_detection_window(
    time_offset="1d",
    row_percentage=1.0
).with_reference_window(
    time_offset="2d",
    window_length="1d",
    row_percentage=1.0
).compare_on(
    metric="mean",
    threshold=0.05
).save()

In [None]:
# Moving Average Cross Monitoring
ma_cross_monitoring = averages_fg.create_feature_monitoring(
    name="ma_cross_monitoring",
    description="Monitor crossovers between short and long-term moving averages",
    feature_name="ma_7",
    cron_expression="0 0 0 * * ? *"  # Daily at midnight
).with_detection_window(
    time_offset="1d",
    row_percentage=1.0
).with_reference_window(
    time_offset="31d",
    window_length="30d",
    row_percentage=1.0,
).compare_on(
    metric="mean",
    threshold=0.02
).save()

In [None]:
# Volatility Monitoring
volatility_monitoring = averages_fg.create_feature_monitoring(
    name="volatility_monitoring",
    feature_name="volatility_30_day",
    description="Monitor significant changes in 30-day volatility",
    cron_expression="0 0 0 * * ? *"  # Daily at midnight
).with_detection_window(
    time_offset="1d",
    row_percentage=1.0
).with_reference_window(
    time_offset="8d",
    window_length="7d",
    row_percentage=1.0
).compare_on(
    metric="mean",
    threshold=0.15
).save()

In [None]:
# RSI Extreme Monitoring
rsi_monitoring = averages_fg.create_feature_monitoring(
    name="rsi_monitoring",
    feature_name="rsi",
    description="Monitor RSI for overbought/oversold conditions",
    cron_expression="0 0 0 * * ? *"  # Daily at midnight
).with_detection_window(
    time_offset="1d",
    row_percentage=1.0
).with_reference_value(
    value=50
).compare_on(
    metric="mean",
    threshold=20
).save()

In [None]:
# Rate of Change Monitoring using MEAN
roc_monitoring = averages_fg.create_feature_monitoring(
    name="rate_of_change_monitoring",
    feature_name="daily_rate_of_change",
    description="Monitor for abnormal average price changes",
    cron_expression="0 0 0 * * ? *"  # Daily at midnight
).with_detection_window(
    time_offset="1d",
    row_percentage=1.0
).with_reference_window(
    time_offset="8d",
    window_length="7d",
    row_percentage=1.0
).compare_on(
    metric="mean", 
    threshold=1.0   # Threshold for mean daily rate of change (1% average change)
).save()

---