# Welcome!

This notebook will allow you to customize prompts with different language models on your data.

# 1. Install Prerequisite Libraries
The below code will depend on three Python "libraries" (software collections). Run the below cell once to install them.

In [None]:
%pip install requests pandas scikit-learn

# 2. Establish Your Working Directory

For our projects this semester we will upload a .csv file that has a "text" column. This will be our input to the language model.

First establish your working directory. Create a folder called "Jupyter" and put it in your Documents folder. Then run this cell.

In [None]:
# Import the libraries we need
from pathlib import Path  # This helps us work with file paths
import os                # This lets us change directories

def use_jupyter_folder():
    # Get the path to the Jupyter folder
    jupyter_folder = Path.home() / 'Documents' / 'Jupyter'
    
    # Try to change to that directory
    if jupyter_folder.exists():
        os.chdir(jupyter_folder)
        print(f"✅ Now using your Jupyter folder!")
        print(f"Current working directory: {Path.cwd()}")
    else:
        print("❌ Couldn't find the Jupyter folder in Documents.")
        print("Please make sure you've created it first.")

# Run this to switch to the Jupyter folder
use_jupyter_folder()

# 3. Upload Your Data

Next upload a .csv file of your choosing. Paste the filename where indicated at the bottom. This cell will output the column names.

In [None]:
########## CONFIGURATION VARIABLES ###########
FILENAME = "NarraDetect_Scalar.csv"  # Your CSV filename here

## Define Function
import pandas as pd

def load_csv(filename):
   """Load CSV file and display info"""
   try:
       df = pd.read_csv(filename)
       print(f"✅ Successfully loaded {filename}")
       print(f"Shape: {df.shape[0]} rows, {df.shape[1]} columns")
       print("\nColumns in this dataset:")
       for col in df.columns:
           print(f"- {col}")
       return df
   except FileNotFoundError:
       print(f"❌ Could not find {filename} in {Path.cwd()}")
   except Exception as e:
       print(f"❌ Error loading file: {str(e)}")

## Run function
df = load_csv(FILENAME)


# 4. Inspect Your Data

This cell will give you brief summary statistics on the input text column. This is the column you will use as part of your prompting.

In [None]:
########## CONFIGURATION VARIABLES ###########
TEXT_COLUMN = 'TEXT'    # Column containing text data
NUM_EXAMPLES = 2        # Number of example texts to display

########## FUNCTION DEFINITION ###########
def text_stats(df, text_column=TEXT_COLUMN, num_examples=NUM_EXAMPLES):
   """Display text statistics and examples"""
   # Calculate word counts
   word_counts = df[text_column].str.split().str.len()
   total_words = word_counts.sum()
   
   print(f"📊 Dataset Overview:")
   print(f"Total number of texts: {len(df)}")
   
   print(f"\n📝 Text Length Statistics:")
   print(f"Shortest text: {word_counts.min()} words")
   print(f"Longest text: {word_counts.max()} words")
   print(f"Average length: {word_counts.mean():.1f} words")
   print(f"Median length: {word_counts.median():.1f} words")
   print(f"Total words in dataset: {total_words:,} words")
   
   print(f"\n📚 Here are {num_examples} example texts from your data:")
   for i in range(num_examples):
       idx = df.index[i]
       text = df.loc[idx, text_column]
       length = len(text.split())
       print(f"Example {i+1}:")
       print(f"Length: {length} words")
       print(f"Text: {text}")

# Calculate statistics and show examples
text_stats(df)

# 5. Define your Ollama model

You will run this cell only once for the semester. Once the model is loaded you don't need to run it again.
But you do need to run it every time you want to test a new model.

In [None]:
model = "llama3:8b"  # Change this to your model name, e.g. "mistral", "codellama", etc.
#model = "deepseek-r1:7b"
#!ollama pull {model}
print("Done!")

# 6. Prompt Testing

In this cell you define your various parameters. These include your model, the column that has text passages, your prompt, and whether you want to use a structured output.

In [None]:
##### INPUT YOUR PARAMETERS HERE #####
MODEL_NAME = model 
COLUMN_NAME = "TEXT"   # Change dataframe column name here
PROMPT_TEMPLATE = "Is this passage from a story? Answer 1 for yes or 0 for no {text}" #Change your prompt here
STRUCTURED = False
LABELS = ["1", "0"]

## 7. Test a random passage

The cell chooses a random passage from the .csv and outputs the answer. You can run multiple times to keep testing answers on random passages.

In [None]:
import random
import random
import requests
import ast

def query_ollama(text):
    """Query local ollama model with text"""
    url = "http://localhost:11434/api/generate"
    prompt = PROMPT_TEMPLATE.format(text=text)
    
    data = {
        "model": MODEL_NAME,
        "prompt": prompt,
        "stream": False,
        "format": {
            "type": "object",
            "properties": {
                "label": {
                    "type": "string",
                    "enum" : LABELS
                },
            },
            "required": [
                    "label",
                ]
        } if STRUCTURED else ''
    }
    
    try:
        # Check if model exists
        model_url = "http://localhost:11434/api/tags"
        models = requests.get(model_url).json()
        available_models = [model['name'] for model in models['models']]
        
        if MODEL_NAME not in available_models:
            print(f"❌ Model '{MODEL_NAME}' not found.")
            print(f"Available models: {', '.join(available_models)}")
            print(f"\nTo install {MODEL_NAME}, run this in terminal:")
            print(f"ollama pull {MODEL_NAME}")
            return None

        response = requests.post(url, json=data)
        if response.status_code == 404:
            print("❌ Ollama service not running.")
            print("Start ollama by running 'ollama serve' in terminal")
            return None

        result = response.json()
        if STRUCTURED:
            return ast.literal_eval(result['response'])['label']
        return result['response']

    except requests.exceptions.ConnectionError:
        print("❌ Cannot connect to Ollama")
        print("1. Check if Ollama is installed") 
        print("2. Start Ollama by running 'ollama serve' in terminal")
        return None
    except Exception as e:
        print(f"❌ Error: {str(e)}")
        return None

def analyze_random_text(df):
  """Analyze a random text from dataset"""
  random_idx = random.randint(0, len(df)-1)
  text = df.iloc[random_idx][COLUMN_NAME]
  print("\n📖 SAMPLE PASSAGE:")
  print(text)
  print("\n🤖 MODEL RESPONSE:")
  return query_ollama(text)

# Run
result = analyze_random_text(df)
if result:
    print(result)

# 8. Sample your data

In this cell you will downsample your .csv file to run a mini test in class. For your final report you will run the model(s) against all rows (or a minimum sample of 100 where there are more than 100). This function allows you to determine the number of rows you sample and stores the new table.

** Note: every time you run this cell you will get a new random sample.

In [None]:
########## CONFIGURATION VARIABLES ###########
SAMPLE_SIZE = 20  # Number of random texts to sample

########## FUNCTION DEFINITION ###########
def sample_texts(df, n=SAMPLE_SIZE):
    """
    Sample n random rows from the dataset
    
    Parameters:
    df (pandas.DataFrame): Your dataset
    n (int): Number of samples to take
    """
    global sample_df
    sample_df = df.sample(n=n)
    
# Create sample with 3 rows
sample_texts(df)

# 9. Run your prompt on your sample data

In this cell you will run your prompt on the sampled data from above. The outputs will be stored as a new column named after the model you are using. In the next cell you can view those results. The cell will output "Completed" when complete.

** Note this takes parameters from Cell 6. Prompt Testing. If you want to change them go up and rerun that cell.

In [None]:
import requests

def query_ollama(text):
    """Query local ollama model with text"""
    url = "http://localhost:11434/api/generate"
    prompt = PROMPT_TEMPLATE.format(text=text)
    
    data = {
        "model": MODEL_NAME,
        "prompt": prompt,
        "stream": False,
        "format": {
            "type": "object",
            "properties": {
                "label": {
                    "type": "string",
                    "enum" : LABELS
                },
            },
            "required": [
                    "label",
            ]
        } if STRUCTURED else ''
    }
    
    try:
        # Check if model exists
        model_url = "http://localhost:11434/api/tags"
        models = requests.get(model_url).json()
        available_models = [model['name'] for model in models['models']]
        
        if MODEL_NAME not in available_models:
            print(f"❌ Model '{MODEL_NAME}' not found.")
            return None

        response = requests.post(url, json=data)
        if response.status_code == 404:
            print("❌ Ollama service not running.")
            return None

        result = response.json()
        if STRUCTURED:
            return ast.literal_eval(result['response'])['label']
        return result['response']
    except requests.exceptions.ConnectionError:
        print("❌ Cannot connect to Ollama")
        return None
    except Exception as e:
        print(f"❌ Error: {str(e)}")
        return None

def analyze_all_texts(df):
    """Analyze all texts in the dataframe"""
    # Create new column for responses using model name
    df[MODEL_NAME] = df[COLUMN_NAME].apply(query_ollama)
    return df

# Run analysis on all rows
sample_df = analyze_all_texts(sample_df)
print("Completed!")

# 10. Inspect your outputs

You can quickly scan your results by printing out the first N examples. Change the final integer to print more or less. Shows the passage + prompt output.

In [None]:
print(sample_df[[COLUMN_NAME, MODEL_NAME]].head(5))

Print a single passage by row number.

In [None]:
# Display a specific row (change row_number to view different rows)
row_number = 2  # Change this number to view different rows
print(f"\nDetailed view of row {row_number}:")
print(f"\nTEXT:\n{sample_df[COLUMN_NAME].iloc[row_number]}")
print(f"\n{MODEL_NAME} response:\n{sample_df[MODEL_NAME].iloc[row_number]}")

# 11. Compare your outputs to another reference column

In the following cells you will compare the accuracy of your outputs to already annotated data. First you need to identify the "reference" column. These are the annotations. Second, you need to align your outputs with those of the reference column. Typically these will consist of a few number of codes. So the first step is finding out these codes so you can align them with your outputs.

## 12. What are the annotation categories of my data

Output a table of the categories and their counts in your data. Change the reference column name accordingly.

In [None]:
########## CONFIGURATION VARIABLES ###########
REFERENCE_COLUMN = "Reader.Predicted.Label"  # Column name for reference categories

########## EXECUTE ANALYSIS ###########
reference_counts = sample_df[REFERENCE_COLUMN].value_counts()

print("Categories and their counts:")
for category, count in reference_counts.items():
    print(f"{category}: {count}")
print(f"\nTotal samples: {len(sample_df)}")

## 13. Clean your outputs to align with reference column

Here you need to input the fixed expressions you want to capture based on your prompt. 

In [None]:
########## CONFIGURATION VARIABLES ###########
# Define your input-output mappings here
CLEANING_CONFIG = {
    'input_patterns': ['1', '0'],  # List of input patterns to match (case-insensitive)
    'output_values': ['1', '0'],      # Corresponding output values
    'unknown_value': 'unknown'        # Value to use when no pattern matches
}

def clean_responses(df, model_column=MODEL_NAME, config=CLEANING_CONFIG):
    """
    Clean and standardize model responses based on provided configuration
    
    Parameters:
    -----------
    df : pandas.DataFrame
        DataFrame containing the responses to clean
    model_column : str
        Name of the column containing responses to clean
    config : dict
        Dictionary containing:
        - input_patterns: list of strings to match (case-insensitive)
        - output_values: corresponding output values
        - unknown_value: value to use when no pattern matches
        
    Returns:
    --------
    pandas.DataFrame
        DataFrame with new cleaned column added
    """
    def standardize_response(response):
        # Convert response to string and lowercase for matching
        response_str = str(response).lower().strip()
        
        # Try to match each input pattern
        for pattern, value in zip(config['input_patterns'], config['output_values']):
            if pattern.lower() in response_str:
                return value
                
        # If no match found, log warning and return unknown value
        print(f"Warning: Unexpected response format: '{response}'")
        return config['unknown_value']
            
    # Create new cleaned column
    cleaned_column = f"{model_column}_cleaned"
    df[cleaned_column] = df[model_column].apply(standardize_response)
    
    # Show the counts of each category
    cleaned_counts = df[cleaned_column].value_counts()
    print(f"\nCleaned response categories:")
    for category, count in cleaned_counts.items():
        print(f"{category}: {count}")
    print(f"\nTotal samples: {len(df)}")
    
    return df

# Clean the responses using the configuration
sample_df = clean_responses(sample_df)

## 14. Make sure your reference and model labels are the same

In [None]:
# First, let's check if the data types and values are compatible
def check_column_compatibility(df, model_column=MODEL_NAME, reference_column=REFERENCE_COLUMN):
    """
    Check if model outputs and reference labels are compatible for comparison
    """
    # Get the cleaned model column name
    model_cleaned = f"{model_column}_cleaned"
    
    # Get data types
    ref_dtype = df[reference_column].dtype
    model_dtype = df[model_cleaned].dtype
    
    # Get unique values
    ref_values = sorted(df[reference_column].unique())
    model_values = sorted(df[model_cleaned].unique())
    
    print("Data Type Check:")
    print(f"Reference column ({reference_column}): {ref_dtype}")
    print(f"Model column ({model_cleaned}): {model_dtype}")
    print("\nUnique Values Check:")
    print(f"Reference values: {ref_values}")
    print(f"Model values: {model_values}")
    
    # Check if types and values match
    types_match = ref_dtype == model_dtype
    values_match = set(ref_values) == set(model_values)
    
    if types_match and values_match:
        print("\n✅ Columns are compatible! You can proceed to creating the confusion matrix.")
        return True
    else:
        print("\n❌ Columns need conversion. Run the conversion cell below.")
        return False

# Run the compatibility check
columns_compatible = check_column_compatibility(sample_df)


## 15. Simple fix

If you get an X it is most likely due to numbers / letters not aligning. Here is a simple fix.

In [None]:
########## CONVERSION CODE ###########
import pandas as pd

def convert_columns_to_numeric(df, model_column=MODEL_NAME, reference_column=REFERENCE_COLUMN):
    """
    Convert model outputs to numeric format to match reference labels
    """
    # Get the cleaned model column name
    model_cleaned = f"{model_column}_cleaned"
    
    # Convert model outputs to numeric
    df[model_cleaned] = pd.to_numeric(df[model_cleaned])
    
    # Show the results
    print("Updated column dtypes:")
    print(f"Reference column: {df[reference_column].dtype}")
    print(f"Model column: {df[model_cleaned].dtype}")
    print("\nValue counts:")
    print("\nReference counts:")
    print(df[reference_column].value_counts())
    print("\nModel counts:")
    print(df[model_cleaned].value_counts())
    
    return df

# Convert columns
sample_df = convert_columns_to_numeric(sample_df)

# 16. Compare your results to the reference column

This cell outputs a "confusion matrix." These are great ways to observe how your model is doing.

In [None]:
from sklearn.metrics import confusion_matrix
import pandas as pd

def display_confusion_matrix(df, model_column=MODEL_NAME, reference_column=REFERENCE_COLUMN):
    """
    Create and display a confusion matrix comparing model predictions to reference labels
    using dynamic labels from the reference column
    
    Parameters:
    -----------
    df : pandas.DataFrame
        DataFrame containing the data
    model_column : str
        Name of the column containing cleaned model predictions
    reference_column : str
        Name of the column containing reference labels
    """
    # Get the reference and cleaned model outputs
    y_true = df[reference_column]
    y_pred = df[f'{model_column}_cleaned']
    
    # Get unique values from reference column (sorted to ensure consistent order)
    unique_labels = sorted(df[reference_column].unique())
    
    # Create confusion matrix with dynamic labels
    cm = confusion_matrix(y_true, y_pred, labels=unique_labels)
    
    # Create display labels based on unique values
    display_labels = [f"Class {label}" for label in unique_labels]
    
    # Convert to pandas DataFrame for better display
    cm_df = pd.DataFrame(
        cm, 
        index=[f'True {label}' for label in display_labels],
        columns=[f'Predicted {label}' for label in display_labels]
    )
    
    print("Confusion Matrix:")
    print(f"Model: {model_column}")
    print(cm_df)
    print("\nReading the matrix:")
    print(f"True Positives (Correct {display_labels[0]}): {cm[0,0]}")
    print(f"False Negatives (Missed {display_labels[0]}): {cm[0,1]}")
    print(f"False Positives (Wrong {display_labels[0]}): {cm[1,0]}")
    print(f"True Negatives (Correct {display_labels[1]}): {cm[1,1]}")

# Display confusion matrix
display_confusion_matrix(sample_df)

# Calculate Precision, Recall, and F1 Score

These are measures of agreement we will use this semester to see how well a model + prompt performs. Make sure to adjust the variables at the beginning to match your goals.

In [None]:
########## CONFIGURATION VARIABLES ###########
# Define your reference column and positive class
REFERENCE_COLUMN = "Reader.Predicted.Label"  # Column with reference labels
POSITIVE_CLASS = 1                          # Value that represents the positive class, i.e. the one you want to measure

########## CALCULATE METRICS ###########
from sklearn.metrics import precision_score, recall_score, f1_score

def calculate_metrics(df, model_column=MODEL_NAME, reference_column=REFERENCE_COLUMN, positive_class=POSITIVE_CLASS):
    """
    Calculate precision, recall, and F1 score for model predictions
    
    Parameters:
    -----------
    df : pandas.DataFrame
        DataFrame containing the data
    model_column : str
        Name of the column containing model predictions
    reference_column : str
        Name of the column containing reference labels
    positive_class : int or str
        Value that represents the positive class in your data
    """
    # Get the cleaned model column name
    model_cleaned = f"{model_column}_cleaned"
    
    # Get true and predicted labels
    y_true = df[reference_column]
    y_pred = df[model_cleaned]
    
    # Calculate metrics
    precision = precision_score(y_true, y_pred, pos_label=positive_class)
    recall = recall_score(y_true, y_pred, pos_label=positive_class)
    f1 = f1_score(y_true, y_pred, pos_label=positive_class)
    
    # Print results
    print(f"Metrics for {model_column} (positive class = {positive_class}):")
    print(f"Precision: {precision:.3f}")
    print(f"Recall: {recall:.3f}")
    print(f"F1 Score: {f1:.3f}")
    
    # Print interpretation
    print("\nInterpretation:")
    print(f"- Precision {precision:.1%}: When the model predicts class {positive_class}, it is correct this fraction of the time")
    print(f"- Recall {recall:.1%}: Of all actual class {positive_class} instances, the model found this fraction")
    print(f"- F1 Score {f1:.1%}: The harmonic mean of precision and recall")

# Calculate metrics
calculate_metrics(sample_df)

## Inspect errors

Your errors can take the form of false positives (e.g. when the model thinks a passage is a story but isn't) or false negatives (e.g. when your model thinks the passage isn't a story but is).

In [None]:
def show_error_examples(df, model_column=MODEL_NAME, reference_column=REFERENCE_COLUMN, text_column='TEXT'):
    """
    Display examples of false positives and false negatives with their corresponding text passages
    
    Parameters:
    -----------
    df : pandas.DataFrame
        DataFrame containing the data
    model_column : str
        Name of the column containing model predictions
    reference_column : str
        Name of the column containing reference labels
    text_column : str
        Name of the column containing text passages to display
    """
    # Get the cleaned model column name
    pred_col = f'{model_column}_cleaned'
    
    # Get unique values from reference column (sorted to ensure consistent order)
    unique_labels = sorted(df[reference_column].unique())
    
    if len(unique_labels) != 2:
        print(f"Error: Expected binary classification with 2 classes, but found {len(unique_labels)} classes.")
        return
        
    positive_class = unique_labels[1]  # Usually 1 or positive class
    negative_class = unique_labels[0]  # Usually 0 or negative class
    
    # Find false positives (predicted positive when actually negative)
    false_positives = df[
        (df[pred_col] == positive_class) & 
        (df[reference_column] == negative_class)
    ]
    
    # Find false negatives (predicted negative when actually positive)
    false_negatives = df[
        (df[pred_col] == negative_class) & 
        (df[reference_column] == positive_class)
    ]
    
    # Display one example of each if available
    print(f"=== FALSE POSITIVE EXAMPLE ===")
    print(f"(Model incorrectly predicted {positive_class} when true label was {negative_class})")
    if len(false_positives) > 0:
        fp_example = false_positives.sample(1).iloc[0]
        print(f"\nPassage:")
        print(fp_example[text_column])
        print(f"\nModel response (original):")
        print(fp_example[model_column])
        print(f"Model response (cleaned):")
        print(fp_example[pred_col])
        print(f"True label:")
        print(fp_example[reference_column])
    else:
        print("No false positives found!")
        
    print(f"\n=== FALSE NEGATIVE EXAMPLE ===")
    print(f"(Model incorrectly predicted {negative_class} when true label was {positive_class})")
    if len(false_negatives) > 0:
        fn_example = false_negatives.sample(1).iloc[0]
        print(f"\nPassage:")
        print(fn_example[text_column])
        print(f"\nModel response (original):")
        print(fn_example[model_column])
        print(f"Model response (cleaned):")
        print(fn_example[pred_col])
        print(f"True label:")
        print(fn_example[reference_column])
    else:
        print("No false negatives found!")

# Show error examples
show_error_examples(sample_df)