# FALLACY CLASSIFIER TRAINING ~

# Dependencies and prep env

In [0]:
!pip install mlflow transformers torch scikit-learn nltk
%restart_python

[43mNote: you may need to restart the kernel using %restart_python or dbutils.library.restartPython() to use updated packages.[0m


In [0]:
import nltk
import torch
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertForSequenceClassification
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import classification_report

# Unified Data Loader

In [0]:
%python
import nltk
import random
nltk.download('punkt')

def load_fallacy_data(fallacy_name: str):
    try:
        # Try with BIGINT/INT comparison
        examples_df = spark.sql(f"""
            SELECT text 
            FROM logical_fallacy_data.{fallacy_name}
            WHERE label = 1
        """)

        non_examples_df = spark.sql(f"""
            SELECT text 
            FROM logical_fallacy_data.{fallacy_name}
            WHERE label = 0
        """)

    except Exception as e:
        try:
            # Try with BOOLEAN comparison
            examples_df = spark.sql(f"""
                SELECT text 
                FROM logical_fallacy_data.{fallacy_name}
                WHERE label = TRUE
            """)

            non_examples_df = spark.sql(f"""
                SELECT text 
                FROM logical_fallacy_data.{fallacy_name}
                WHERE label = FALSE
            """)

        except Exception as e:
            print(f"Error with BOOLEAN comparison for {fallacy_name}: {e}")
            return [], []

    # Convert to Pandas (safe in Databricks serverless)
    examples = examples_df.toPandas()['text'].tolist()
    non_examples = non_examples_df.toPandas()['text'].tolist()
    # Randomly select 50 examples and 50 non-examples
    examples = random.sample(examples, min(len(examples), 25))
    non_examples = random.sample(non_examples, min(len(non_examples), 75))
    conversation_examples = [
        "Hi Dana, how are you doing today?",
        "Hello John, it's nice to see you again!",
        "Good morning, Sarah. Did you sleep well?",
        "Hey Mike, what are your plans for the weekend?",
        "Hi Emma, have you finished your project?",
        "Hello Alex, how was your vacation?",
        "Good afternoon, Lisa. How's work going?",
        "Hey Tom, did you watch the game last night?",
        "Hi Rachel, are you free for lunch today?",
        "Hello Sam, how's your family doing?",
        "Good evening, Chris. Any plans for tonight?",
        "Hey Jessica, did you hear about the new restaurant?",
        "Hi David, how's your new job?",
        "Hello Megan, are you coming to the party?",
        "Good morning, James. Ready for the meeting?",
        "Hey Laura, how was your weekend?",
        "Hi Kevin, have you seen the latest movie?",
        "Hello Anna, how's your day going?",
        "Good afternoon, Brian. Any updates on the project?",
        "Hey Olivia, did you get my message?"
    ]

    for ex in conversation_examples:
        non_examples.append(ex)
    print(f"[{fallacy_name}] examples: {examples}")
    print(f"[{fallacy_name}] non-examples: {non_examples}")

    return examples, non_examples

fallacies = [
    "red_herring",
    "straw_man",
    "slippery_slope",
    "ad_hominem",
    "hasty_generalization",
    "ignorance",
    "hypocrisy",
    "stacking_deck"
]

fallacy_data = {}

for name in fallacies:
    examples, non_examples = load_fallacy_data(name)
    fallacy_data[name] = {
        "examples": examples,
        "non_examples": non_examples
    }


[nltk_data] Downloading package punkt to
[nltk_data]     /home/spark-77f5d428-456b-4581-88f9-18/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


[red_herring] examples: ['The fingerprints found at the scene don’t match anyone in the database, but what if the culprit is from out of town?', 'She brought up bullying in schools, and he pivoted to how school lunches used to be cheaper in the 90s.', 'Sure, I didn’t clean the kitchen, but you never take the trash out!', 'I know I made a mistake at work, but think of all the good things I’ve done.', 'The car was parked nearby, but I think we should focus on the suspicious construction workers in the area.', 'The room was locked from the inside, but I’ve been thinking about the victim’s past relationships.', 'We were analyzing the budget, but he insisted on discussing the origin of nachos instead.', 'I wanted to know why the internet bill is so high, and they told me about their cat’s new diet.', 'We need to fix flooding infrastructure, but did anyone hear that new ringtone trend?', 'The victim’s friend has an alibi, but the weather that night was absolutely horrific.', 'You were talkin

# Unified Training Function

In [0]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import os

def train_fallacy_model(fallacy_name, examples, non_examples, output_dir="models", epochs=2, batch_size=16, lr=2e-5):
    if not examples or not non_examples:
        print(f"⚠️ Skipping {fallacy_name}: empty examples or non-examples.")
        return
    # Prepare training data
    texts = examples + non_examples
    labels = [1] * len(examples) + [0] * len(non_examples)

    # Load tokenizer and encode
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    encodings = tokenizer(texts, truncation=True, padding=True, return_tensors='pt')
    inputs = encodings['input_ids']
    masks = encodings['attention_mask']
    labels_tensor = torch.tensor(labels)

    # Build dataset and split
    dataset = TensorDataset(inputs, masks, labels_tensor)
    train_data, test_data = train_test_split(dataset, test_size=0.2, random_state=42)
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_data, batch_size=batch_size)

    # Load model
    model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
    
    scaler = torch.cuda.amp.GradScaler()

    # Train loop
    model.train()
    for epoch in range(epochs):
        print(f"Epoch {epoch+1}/{epochs}")
        for batch in train_loader:
            b_input_ids, b_input_mask, b_labels = batch
            optimizer.zero_grad()
            with torch.cuda.amp.autocast():
                outputs = model(b_input_ids, attention_mask=b_input_mask, labels=b_labels)
                loss = outputs.loss
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

    # Evaluate
    model.eval()
    predictions, true_labels = [], []
    with torch.no_grad():
        for batch in test_loader:
            b_input_ids, b_input_mask, b_labels = batch
            outputs = model(b_input_ids, attention_mask=b_input_mask)
            logits = outputs.logits
            preds = torch.argmax(logits, dim=1)
            predictions.extend(preds.tolist())
            true_labels.extend(b_labels.tolist())

    print(f"\n[Evaluation for {fallacy_name}]")
    print(classification_report(true_labels, predictions))

    # Save model/tokenizer
    model_dir = os.path.join(output_dir, fallacy_name)
    os.makedirs(model_dir, exist_ok=True)
    model.save_pretrained(model_dir)
    tokenizer.save_pretrained(model_dir)
    print(f"✅ Model saved to {model_dir}")

# Load fallacy data and train

In [0]:
%python
# Safe temporary local storage
local_model_dir = "/local_disk0/tmp/fallacy_models"
os.makedirs(local_model_dir, exist_ok=True)

fallacies = [
    "red_herring",
    "straw_man",
    "slippery_slope",
    "ad_hominem",
    "hasty_generalization",
    "ignorance",
    "hypocrisy",
    "stacking_deck"
]

for fallacy_name in fallacies:
    examples, non_examples = load_fallacy_data(fallacy_name)
    train_fallacy_model(fallacy_name, examples, non_examples)
    if not examples or not non_examples:
        print(f"❌ No data for: {fallacy_name}")

[red_herring] examples: ['The victim’s friend has an alibi, but the weather that night was absolutely horrific.', 'They were debating surveillance laws, but someone interrupted with a story about a haunted coffee mug.', "The security footage shows someone wearing a mask, but we should first explore the victim's financial troubles.", 'You asked why the bridge collapsed, and he responded with concerns about dogs wearing sweaters.', 'Sure, oil prices matter, but do you know the history of paperclips?', 'There were multiple witnesses, but it’s worth considering the recent local protests happening around that time.', "You're asking about public safety, but nobody wants to talk about how pigeons are ruining the parks.", 'He criticized the CEO’s ethics, and she replied with how fun the company picnic was last year.', 'There’s a trail of footprints leading away from the scene, but let’s not forget the local gang activity.', 'Discussing healthcare costs? I think the bigger issue is how people a

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():


Epoch 1/2
Epoch 2/2

[Evaluation for red_herring]
              precision    recall  f1-score   support

           0       0.79      1.00      0.88        19
           1       0.00      0.00      0.00         5

    accuracy                           0.79        24
   macro avg       0.40      0.50      0.44        24
weighted avg       0.63      0.79      0.70        24



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


✅ Model saved to models/red_herring
[straw_man] examples: ["I think these brave female journalists in Spain can do plenty by protesting in the streets, it's important to get the word out about injustices to the common folk. I also am skeptical about your expert, I have never heard of this 'famous' Dr. John Doe. It honestly sounds like a very generic and made up name and only serves to denigrate your argument. I would hope that next time you would impart some of the qualifications of this Doctor, it would greatly improve the believability of his inclusion. Either way, Spain needs to close this pay gap and up the representation of women in the media. I am hopeful that almost all experts and commoners could find agreement in this!", "Just because Dana believes in education reform, doesn't mean we should create chaos. What next, dismantle the police?", "I think anything that empowers women like this is a good thing. It's also peaceful and no one is harmed in the process", 'It seems like th

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():


Epoch 1/2
Epoch 2/2

[Evaluation for straw_man]
              precision    recall  f1-score   support

           0       0.79      1.00      0.88        19
           1       0.00      0.00      0.00         5

    accuracy                           0.79        24
   macro avg       0.40      0.50      0.44        24
weighted avg       0.63      0.79      0.70        24



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


✅ Model saved to models/straw_man
[slippery_slope] examples: ["The legalization of pepper spray for use by women for self-protection sounds like a good idea, sure, but Jamaica must be careful. Taking this line can lead to unforeseen consequences, as things can get out of control very quickly. First, they will be given the right to use pepper spray, but what comes after that? Perhaps blackjacks. And then? Maybe tasers. And after that? Probably guns. And then there would be a troubling new layer of violence in Jamaica. This kind of approach to solving problems can evolve rapidly and get away from you, eventually creating a whole new set of problems. I'm not saying pepper spray is a bad idea, just that caution is highly advised.", 'If we allow flexible grading, students will stop striving for excellence.', 'If we allow one exception to the dress code, eventually no one will follow any rules and professionalism will vanish.', "If we continue to turn a deaf ear, we are indirectly encouragin

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():


Epoch 1/2
Epoch 2/2

[Evaluation for slippery_slope]
              precision    recall  f1-score   support

           0       0.79      1.00      0.88        19
           1       0.00      0.00      0.00         5

    accuracy                           0.79        24
   macro avg       0.40      0.50      0.44        24
weighted avg       0.63      0.79      0.70        24



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


✅ Model saved to models/slippery_slope
[ad_hominem] examples: ["His opinion on taxes doesn't count; he cheated on his own.", 'Why should we listen to him? He’s never held a real job.', 'She’s only saying that because she wants to sound progressive.', "His argument is invalid because he's just a bartender.", "Don't believe her argument about the environment; she flies everywhere.", 'I’m not taking moral advice from someone who’s been divorced three times.', "Don't listen to her views on nutrition—she eats fast food every week.", "His argument is invalid because he's just a janitor.", "He's just bitter because he wasn’t invited to the summit.", 'She doesn’t care about privacy; she shares her whole life on TikTok.', 'Of course she wants universal income—she’s lazy.', "You don’t have enough life experience to understand what's really going on.", "Don't take his advice on health; he's overweight.", 'I’d believe him if he didn’t look like a conspiracy theorist.', 'He’s the kind of person who

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():


Epoch 1/2
Epoch 2/2

[Evaluation for ad_hominem]
              precision    recall  f1-score   support

           0       0.95      1.00      0.97        19
           1       1.00      0.80      0.89         5

    accuracy                           0.96        24
   macro avg       0.97      0.90      0.93        24
weighted avg       0.96      0.96      0.96        24

✅ Model saved to models/ad_hominem
[hasty_generalization] examples: ['I tripped on a sidewalk crack this morning, so clearly every sidewalk in the city is a safety hazard.', 'The drone I bought last year malfunctioned after a month. Drones are just unreliable trash.', 'This just seems to be another typical result of colonialism. Is it ever any different? I feel like all the news I see suggests not. The impacts of colonial rule are always just that of suffering and injustice.', "I have some friends with family in the country and they tell me everything that happens involves this kind of dirty, shady corruption. They s

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():


Epoch 1/2
Epoch 2/2

[Evaluation for hasty_generalization]
              precision    recall  f1-score   support

           0       0.79      1.00      0.88        19
           1       0.00      0.00      0.00         5

    accuracy                           0.79        24
   macro avg       0.40      0.50      0.44        24
weighted avg       0.63      0.79      0.70        24



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


✅ Model saved to models/hasty_generalization
[ignorance] examples: ['We haven’t found life on Mars, so there must not be any.', 'It hasn’t been investigated, so it must be true.', "There's no proof disproving my claim, so it's valid.", 'Since we can’t see black holes with our eyes, they might not exist.', "You can't prove they are trustworthy, so it must be false.", 'There’s no conclusive proof he isn’t cheating, so he probably is.', 'You can’t prove that unicorns don’t exist, therefore they do.', 'There’s no evidence against it, so I’ll keep believing it.', "You can't disprove that the product works, so it must.", "There's no evidence that eating chocolate at night isn't good for memory, so it is.", 'Because no one has shown dragons never existed, they might have.', "No one has shown that reincarnation being real isn't true, so it must be.", 'No one has proved this species is extinct, so it’s still alive.', 'Since we haven’t found the creature yet, it’s still out there.', 'No evidence

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():


Epoch 1/2
Epoch 2/2

[Evaluation for ignorance]
              precision    recall  f1-score   support

           0       0.95      1.00      0.97        19
           1       1.00      0.80      0.89         5

    accuracy                           0.96        24
   macro avg       0.97      0.90      0.93        24
weighted avg       0.96      0.96      0.96        24

✅ Model saved to models/ignorance
[hypocrisy] examples: ['She thinks eating meat is unethical, but wears leather boots.', 'You want me to be more organized? Your room looks like a tornado hit it.', "You can't tell me to be humble—you brag all the time.", 'You say I should save money, but you’re constantly buying expensive gadgets.', 'You’re acting like a saint now, but remember Vegas last summer?', 'You always say we should be calm under pressure, yet you freaked out yesterday.', "You say I should be more focused, but you can't even finish a sentence without checking your phone.", "Don't tell me to avoid gossip—you sp

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():


Epoch 1/2
Epoch 2/2

[Evaluation for hypocrisy]
              precision    recall  f1-score   support

           0       0.79      1.00      0.88        19
           1       0.00      0.00      0.00         5

    accuracy                           0.79        24
   macro avg       0.40      0.50      0.44        24
weighted avg       0.63      0.79      0.70        24



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


✅ Model saved to models/hypocrisy
[stacking_deck] examples: ['This treatment is a success, based on the few patients who got better, without considering the ones it didn’t work for.', 'The documentary praised the system without giving critics a voice.', 'I compiled a list of all the benefits of smoking cigars — no, I didn’t include the health effects.', 'We’re proving that our approach works by showcasing only the positive outcomes, without addressing the negative results.', 'Everyone agrees that this is the top way to go, so we shouldn’t even consider alternatives.', 'All the promotional material features only the best moments from the trip.', 'Every article in our pamphlet is pro-growth. Anti-growth ones are unproductive.', 'You’ve only heard the good things because the bad stuff isn’t worth discussing.', 'Clearly, just fools would think differently.', 'You should date him — I listed all his good traits and none of the bad ones.', 'The report only considered the companies that thrive

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():


Epoch 1/2
Epoch 2/2

[Evaluation for stacking_deck]
              precision    recall  f1-score   support

           0       0.79      1.00      0.88        19
           1       0.00      0.00      0.00         5

    accuracy                           0.79        24
   macro avg       0.40      0.50      0.44        24
weighted avg       0.63      0.79      0.70        24



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


✅ Model saved to models/stacking_deck


# Move raw inference models

In [0]:
import os
import shutil

# Safe temporary local storage
local_model_dir = "/local_disk0/tmp/fallacy_models"
os.makedirs(local_model_dir, exist_ok=True)

# Copy only trained models into safe path
for fallacy in fallacies:
    src = f"models/{fallacy}"
    dst = f"{local_model_dir}/{fallacy}"
    if os.path.exists(src):
        shutil.copytree(src, dst, dirs_exist_ok=True)
    else:
        print(f"Model for {fallacy} does not exist at {src}")

# Serve via MLflow

In [0]:
%python
import os
import shutil
import mlflow.pyfunc
from transformers import BertTokenizer, BertForSequenceClassification

class FallacyEnsembleModel(mlflow.pyfunc.PythonModel):
    def load_context(self, context):
        self.fallacy_types = [
            "straw_man", "red_herring", "ad_hominem",
            "hasty_generalization", "appeal_to_ignorance",
            "hypocrisy", "stacking_deck"
        ]
        self.model_dir = context.artifacts["model_dir"]
        self.fallacy_models = {}

        for fallacy in self.fallacy_types:
            model_path = os.path.join(self.model_dir, fallacy)
            if not os.path.exists(model_path):
                print(f"Skipping {fallacy}: model not found at {model_path}")
                continue
            try:
                model = BertForSequenceClassification.from_pretrained(model_path)
                tokenizer = BertTokenizer.from_pretrained(model_path)
                model.eval()
                self.fallacy_models[fallacy] = (tokenizer, model)
            except Exception as e:
                print(f"Error loading {fallacy}: {e}")

    def predict(self, context, model_input):
        texts = model_input["text"].tolist()
        results = []
        for text in texts:
            fallacy_scores = []
            for fallacy, (tokenizer, model) in self.fallacy_models.items():
                inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
                with torch.no_grad():
                    logits = model(**inputs).logits
                    probs = torch.softmax(logits, dim=1)
                    fallacy_prob = probs[0][1].item()
                fallacy_scores.append((fallacy, fallacy_prob))
            fallacy_scores.sort(key=lambda x: x[1], reverse=True)
            results.append(fallacy_scores)
        return results

# Path to save the MLflow model
save_path = "/local_disk0/tmp/fallacy_ensemble_model_v1"
shutil.rmtree(save_path, ignore_errors=True)
run_id = ""
# Save the model using MLflow
with mlflow.start_run() as run:
    mlflow.pyfunc.save_model(
        path=save_path,
        python_model=FallacyEnsembleModel(),     # Class instance
        artifacts={"model_dir": "/local_disk0/tmp/fallacy_models"}
    )
    run_id = run.info.run_id



Skipping appeal_to_ignorance: model not found at /local_disk0/tmp/fallacy_models/appeal_to_ignorance


Downloading artifacts:   0%|          | 0/40 [00:00<?, ?it/s]

In [0]:
%python
mlflow.end_run()


In [0]:
%python
import os
import shutil
import pandas as pd
import mlflow.pyfunc
from transformers import BertTokenizer, BertForSequenceClassification
from mlflow.models.signature import ModelSignature
from mlflow.types.schema import Schema, ColSpec
from mlflow.types import DataType

import torch
from typing import List, Any

class FallacyEnsembleModel(mlflow.pyfunc.PythonModel):
    def load_context(self, context):
        self.fallacy_types = [
            "red_herring",
            "straw_man",
            "slippery_slope",
            "ad_hominem",
            "hasty_generalization",
            "ignorance",
            "hypocrisy",
            "stacking_deck"
        ]
        self.model_dir = context.artifacts["model_dir"]
        self.fallacy_models = {}

        for fallacy in self.fallacy_types:
            model_path = os.path.join(self.model_dir, fallacy)
            if not os.path.exists(model_path):
                print(f"Skipping {fallacy}: model not found at {model_path}")
                continue
            try:
                model = BertForSequenceClassification.from_pretrained(model_path)
                tokenizer = BertTokenizer.from_pretrained(model_path)
                model.eval()
                self.fallacy_models[fallacy] = (tokenizer, model)
            except Exception as e:
                print(f"Error loading {fallacy}: {e}")

    def predict(self, context: mlflow.pyfunc.PythonModelContext, model_input: pd.DataFrame) -> List[Any]:
        texts = model_input["text"].tolist()
        results = []
        for text in texts:
            fallacy_scores = []
            for fallacy, (tokenizer, model) in self.fallacy_models.items():
                inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True)
                with torch.no_grad():
                    logits = model(**inputs).logits
                    probs = torch.softmax(logits, dim=1)
                    fallacy_prob = probs[0][1].item()
                fallacy_scores.append((fallacy, fallacy_prob))
            fallacy_scores.sort(key=lambda x: x[1], reverse=True)
            results.append(fallacy_scores)
        return results

# Define input_data
input_data = pd.DataFrame({"text": ["Example text 1", "Example text 2"]})

# Create an instance of the model
fallacy_model = FallacyEnsembleModel()

# Dummy context for infer_signature
dummy_context = mlflow.pyfunc.PythonModelContext(
    artifacts={"model_dir": "/local_disk0/tmp/fallacy_models"},
    model_config={}
)

# Load context to initialize fallacy_models
fallacy_model.load_context(dummy_context)

# Define the input and output schema
input_schema = Schema([ColSpec("string", "text")])
output_schema = Schema([ColSpec(DataType.string, "predictions")])
signature = ModelSignature(inputs=input_schema, outputs=output_schema)


# Start MLflow run and log the model
run_id = ""
with mlflow.start_run() as run:
    mlflow.pyfunc.log_model(
        artifact_path="model",  # name of the logged artifact
        python_model=fallacy_model,
        artifacts={"model_dir": "/local_disk0/tmp/fallacy_models"},
        signature=signature,
        input_example=input_data
    )
    run_id = run.info.run_id
    model_uri = f"runs:/{run_id}/model"
    print("✅ Model logged at:", model_uri)


2025/05/07 02:10:07 INFO mlflow.pyfunc: Validating input example against model signature


Downloading artifacts:   0%|          | 0/40 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/51 [00:00<?, ?it/s]

✅ Model logged at: runs:/5e3de9dbab6c4ecb9208b9e463488ebd/model


# Register model to unity catalog
weird databricks specific thing with respect to mlflow model registries

In [0]:
%sql
SHOW CATALOGS;

catalog
ml_models
samples
system
workspace


In [0]:
import mlflow
from mlflow.models import infer_signature
import pandas as pd

print(f"RUN_ID = {run_id}")
# Set the registry URI to Unity Catalog
mlflow.set_registry_uri("databricks-uc")
# Example input data as a string
input_text = ["You're just a student."]

# Create a DataFrame with the input text
input_data = pd.DataFrame({"text": input_text})
# Define the registered model name
registered_model_name = "default.fallacy_ensemble_uc"

# Register the model in Unity Catalog
result = mlflow.register_model(f"runs:/{run_id}/model", registered_model_name)
version = result.version
print(f"Model registered with version: {version}")

# Define the model artifact path
model_artifact_path = f"models:/{registered_model_name}/{version}"

# Verify the model artifact path
destination_path = "/local_disk0/tmp/model"  # Use a local path
mlflow.artifacts.download_artifacts(
    artifact_uri=model_artifact_path,
    dst_path=destination_path
)

RUN_ID = 5e3de9dbab6c4ecb9208b9e463488ebd


Registered model 'default.fallacy_ensemble_uc' already exists. Creating a new version of this model...


Downloading artifacts:   0%|          | 0/51 [00:00<?, ?it/s]

Uploading artifacts:   0%|          | 0/51 [00:00<?, ?it/s]

Created version '4' of model 'workspace.default.fallacy_ensemble_uc'.


Model registered with version: 4


Downloading artifacts:   0%|          | 0/51 [00:00<?, ?it/s]

'/local_disk0/tmp/model/'

# Example Usage

In [0]:
model = mlflow.pyfunc.load_model(save_path)

import pandas as pd
df = pd.DataFrame({"text": ["Your just a student, you have no idea what yo're talking about"]})
predictions = model.predict(df)
print(predictions)


Skipping appeal_to_ignorance: model not found at /local_disk0/tmp/fallacy_ensemble_model_v1/artifacts/fallacy_models/appeal_to_ignorance
[[('ad_hominem', 0.48073244094848633), ('hasty_generalization', 0.3367263078689575), ('hypocrisy', 0.28081604838371277), ('red_herring', 0.2566094398498535), ('straw_man', 0.20217767357826233), ('stacking_deck', 0.1539662927389145)]]
