In [1]:
! pip uninstall -y evidently


Found existing installation: evidently 0.7.7
Uninstalling evidently-0.7.7:
  Successfully uninstalled evidently-0.7.7


In [2]:
! pip install evidently==0.7.7


Collecting evidently==0.7.7
  Using cached evidently-0.7.7-py3-none-any.whl.metadata (10 kB)
Using cached evidently-0.7.7-py3-none-any.whl (5.2 MB)
Installing collected packages: evidently
Successfully installed evidently-0.7.7


In [None]:
import evidently
print(evidently.__version__)  


0.7.7


In [None]:
import pandas as pd
from evidently import Dataset, DataDefinition, Report
from evidently.descriptors import (
    Contains,
    DoesNotContain,
    IncludesWords,
    ExcludesWords,
    ContainsLink,
    ItemMatch,
    ItemNoMatch,
    WordMatch,
    WordNoMatch
)
 
# Load dataset
df = pd.read_csv("C:\\new\\health_and_fitness_qna.csv")
 
# Ensure we have a 'Text' column for analysis (combine Question + Answer)
df["Text"] = df["Question"].fillna("") + " " + df["Answer"].fillna("")
 
# Simulate row-level item/word expectations (for demo purposes)
df["Expected_items"] = [["fitness", "diet"]] * len(df)
df["Forbidden_items"] = [["injury", "risk"]] * len(df)
df["Expected_words"] = [["exercise", "healthy"]] * len(df)
df["Forbidden_words"] = [["junk", "drugs"]] * len(df)
 
# Define text column
data_def = DataDefinition(text_columns=["Text"])
 
# Build dataset with content-check descriptors
eval_dataset = Dataset.from_pandas(
    df,
    data_definition=data_def,
    descriptors=[
        # Static list checks
        Contains(column_name="Text", items=["fitness", "health"], mode="any", case_sensitive=False, alias="has_health_keywords"),
        DoesNotContain(column_name="Text", items=["illegal", "fake"], case_sensitive=False, alias="no_illegal_terms"),
        IncludesWords(column_name="Text", words_list=["exercise", "wellness"], mode="any", lemmatize=True, alias="includes_wellness_terms"),
        ExcludesWords(column_name="Text", words_list=["scam", "spam"], alias="excludes_bad_words"),
        ContainsLink(column_name="Text", alias="has_links"),
 
        # Row-level dynamic list checks
        ItemMatch(columns=["Text", "Expected_items"], mode="all", case_sensitive=False, alias="matches_expected_items"),
        ItemNoMatch(columns=["Text", "Forbidden_items"], alias="no_forbidden_items"),
        WordMatch(columns=["Text", "Expected_words"], mode="all", lemmatize=True, alias="matches_expected_words"),
        WordNoMatch(columns=["Text", "Forbidden_words"], mode="all", lemmatize=True, alias="no_forbidden_words")
    ]
)
 
# Run the report
report = Report(metrics=[])
report.run(eval_dataset)
 
# View result DataFrame
results_df = eval_dataset.as_dataframe()
print(results_df[[
    "Text",
    "has_health_keywords",
    "no_illegal_terms",
    "includes_wellness_terms",
    "excludes_bad_words",
    "has_links",
    "matches_expected_items",
    "no_forbidden_items",
    "matches_expected_words",
    "no_forbidden_words"
]])
 
 

                                                 Text  has_health_keywords  \
0   Should I eat breakfast before or after exercis...                False   
1   What are some healthy snacks I can eat during ...                 True   
2   Is it okay to have coffee or tea before a work...                False   
3   I don’t like drinking plain water. What are so...                False   
4   Can I replace water with sports drinks during ...                False   
5   I’m 40 and have never exercised before. Where ...                False   
6   How many times a week should I do strength tra...                 True   
7   How does intermittent fasting work? How do I k...                False   
8   How do I stay motivated to work out consistent...                 True   
9   How long should I rest between workout session...                False   
10  Can I build muscle without going to the gym an...                False   
11  How can I exercise at home if I can’t go to th...           

In [6]:
import pandas as pd
from evidently import Dataset, DataDefinition, Report
from evidently.descriptors import (
    Contains,
    DoesNotContain,
    IncludesWords,
    ExcludesWords,
    ContainsLink,
    ItemMatch,
    ItemNoMatch,
    WordMatch,
    WordNoMatch
)
 
# Load dataset
df = pd.read_csv("C:\\new\\health_and_fitness_qna.csv")
 
# Ensure we have a 'Text' column for analysis (combine Question + Answer)
df["Text"] = df["Question"].fillna("") + " " + df["Answer"].fillna("")
 
# Simulate row-level item/word expectations (for demo purposes)
df["Expected_items"] = [["fitness", "diet"]] * len(df)
df["Forbidden_items"] = [["injury", "risk"]] * len(df)
df["Expected_words"] = [["exercise", "healthy"]] * len(df)
df["Forbidden_words"] = [["junk", "drugs"]] * len(df)
 
# Define text column
data_def = DataDefinition(text_columns=["Text"])
 
# Build dataset with content-check descriptors
eval_dataset = Dataset.from_pandas(
    df,
    data_definition=data_def,
    descriptors=[
        # Static list checks
        Contains(column_name="Text", items=["fitness", "health"], mode="all", case_sensitive=False, alias="has_health_keywords"),
        DoesNotContain(column_name="Text", items=["illegal", "fake"], case_sensitive=False, alias="no_illegal_terms"),
        IncludesWords(column_name="Text", words_list=["exercise", "wellness"], mode="all", lemmatize=True, alias="includes_wellness_terms"),
        ExcludesWords(column_name="Text", words_list=["scam", "spam"], alias="excludes_bad_words"),
        ContainsLink(column_name="Text", alias="has_links"),
 
        # Row-level dynamic list checks
        ItemMatch(columns=["Text", "Expected_items"], mode="all", case_sensitive=False, alias="matches_expected_items"),
        ItemNoMatch(columns=["Text", "Forbidden_items"], alias="no_forbidden_items"),
        WordMatch(columns=["Text", "Expected_words"], mode="all", lemmatize=True, alias="matches_expected_words"),
        WordNoMatch(columns=["Text", "Forbidden_words"], mode="all", lemmatize=True, alias="no_forbidden_words")
    ]
)
 
# Run the report
report = Report(metrics=[])
report.run(eval_dataset)
 
# View result DataFrame
results_df = eval_dataset.as_dataframe()
print(results_df[[
    "Text",
    "has_health_keywords",
    "no_illegal_terms",
    "includes_wellness_terms",
    "excludes_bad_words",
    "has_links",
    "matches_expected_items",
    "no_forbidden_items",
    "matches_expected_words",
    "no_forbidden_words"
]])
 
 

                                                 Text  has_health_keywords  \
0   Should I eat breakfast before or after exercis...                False   
1   What are some healthy snacks I can eat during ...                False   
2   Is it okay to have coffee or tea before a work...                False   
3   I don’t like drinking plain water. What are so...                False   
4   Can I replace water with sports drinks during ...                False   
5   I’m 40 and have never exercised before. Where ...                False   
6   How many times a week should I do strength tra...                False   
7   How does intermittent fasting work? How do I k...                False   
8   How do I stay motivated to work out consistent...                False   
9   How long should I rest between workout session...                False   
10  Can I build muscle without going to the gym an...                False   
11  How can I exercise at home if I can’t go to th...           