### Cell 7: Outlier Detection and CBF Scores

**Markdown Explanation:**

This cell defines two functions:
1. `detect_and_print_outliers`: Detects and logs outliers using the Interquartile Range (IQR) method.
2. `calculate_cbf_scores`: Calculates Content-Based Filtering (CBF) scores for all users and items.

In [None]:
def detect_and_print_outliers(scores, user_ids, item_ids):
    """
    Detect and print outliers using the IQR method.

    This function identifies and logs outliers based on the interquartile range (IQR) method.

    Parameters:
        scores (np.array): Array of scores.
        user_ids (list): List of user IDs.
        item_ids (list): List of item IDs.

    Returns:
        tuple: Filtered scores, user IDs, and item IDs.
    """
    if len(scores) == 0:
        return scores, user_ids, item_ids
    q1, q3 = np.percentile(scores, [25, 75])
    iqr = q3 - q1
    lower_bound = q1 - 1.5 * iqr
    upper_bound = q3 + 1.5 * iqr
    outlier_mask = (scores < lower_bound) | (scores > upper_bound)
    outliers = scores[outlier_mask]
    outlier_user_ids = np.array(user_ids)[outlier_mask].tolist()
    outlier_item_ids = np.array(item_ids)[outlier_mask].tolist()

    if len(outliers) > 0:
        logging.warning("Outliers detected (first 10):")
        for score, user_id, item_id in zip(outliers[:10], outlier_user_ids[:10], outlier_item_ids[:10]):
            logging.warning(f"User ID: {user_id}, Item ID: {item_id}, CBF Score: {score}")
    else:
        logging.warning("No outliers detected.")

    filtered_scores = scores[~outlier_mask]
    filtered_user_ids = np.array(user_ids)[~outlier_mask].tolist()
    filtered_item_ids = np.array(item_ids)[~outlier_mask].tolist()

    return filtered_scores, filtered_user_ids, filtered_item_ids

def calculate_cbf_scores(merged_df, item_features, user_preferences):
    """
    Calculate CBF (Content-Based Filtering) scores for all users and items.

    This function computes the dot product of item feature vectors and user preference vectors to generate CBF scores.

    Parameters:
        merged_df (pd.DataFrame): DataFrame containing merged movies and ratings data.
        item_features (pd.DataFrame): DataFrame of item features.
        user_preferences (pd.DataFrame): DataFrame of user preferences.

    Returns:
        tuple: Lists of CBF scores, normalized CBF scores, user IDs, and item IDs.
    """
    cbf_scores = []
    user_ids = []
    item_ids = []

    for user_id in user_preferences.index:
        for item_id in item_features.index:
            item_feature_vector = item_features.loc[item_id].values
            user_preference_vector = user_preferences.loc[user_id].values

            if len(item_feature_vector) != len(user_preference_vector):
                logging.error(f"Feature length mismatch for item {item_id}: {len(item_feature_vector)} vs. {len(user_preference_vector)}")
                continue

            content_score = np.dot(item_feature_vector, user_preference_vector)
            cbf_scores.append(content_score)
            user_ids.append(user_id)
            item_ids.append(item_id)

    logging.warning("Detecting and removing outliers from CBF scores...")
    cbf_scores, user_ids, item_ids = detect_and_print_outliers(np.array(cbf_scores), user_ids, item_ids)

    logging.warning("Normalizing CBF scores...")
    normalized_cbf_scores = normalize_scores(cbf_scores)
    logging.warning(f"CBF scores after normalization (first 10 scores): {normalized_cbf_scores[:10]}")

    return cbf_scores, normalized_cbf_scores, user_ids, item_ids
