# Binary Classification: Equal Input/Output (Hindi IndicGEC)

This notebook trains a binary classifier for the task: label = 1 if output sentence == input sentence, else 0.

It includes:
- Robust loading of train.csv (auto-detect input/output columns).
- Label creation.
- Baseline rule (exact string equality).
- TF-IDF (char n-grams) + Logistic Regression model.
- Evaluation metrics.
- Model saving and an inference helper.

Note: Char n-grams are language-agnostic and work well with Hindi text.

In [1]:
# Install dependencies if missing (run once)
import sys, subprocess

def ensure(pkg_name, import_name=None):
    name = (import_name or pkg_name).replace('-', '_')
    try:
        __import__(name)
    except Exception:
        print(f'Installing {pkg_name} ...')
        subprocess.check_call([sys.executable, '-m', 'pip', 'install', pkg_name])

ensure('pandas')
ensure('numpy')
ensure('scikit-learn', 'sklearn')


In [2]:
# Imports
import os
from pathlib import Path
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, f1_score, classification_report, confusion_matrix, roc_auc_score
import joblib

RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)


In [3]:
# Configuration
DATA_PATH = 'train.csv'  # assumes this notebook is in the same folder as train.csv
assert Path(DATA_PATH).exists(), f'Could not find {DATA_PATH}. Please place train.csv next to this notebook.'


In [4]:
# Load data and detect column names
df = pd.read_csv(DATA_PATH)
print('Columns:', list(df.columns))

def find_col(candidates):
    # Return the first column whose lowercase name contains any candidate substring
    lowered = {c: str(c).strip().lower() for c in df.columns}
    for key in candidates:
        for col, lc in lowered.items():
            if key in lc:
                return col
    return None

input_col = find_col(['input', 'source', 'src'])
output_col = find_col(['output', 'target', 'tgt', 'reference', 'gold'])

if input_col is None or output_col is None:
    # Fallback to first two columns
    cols = list(df.columns)
    if len(cols) < 2:
        raise ValueError('train.csv must have at least two columns (input and output).')
    input_col, output_col = cols[0], cols[1]
    print(f'Warning: Could not auto-detect columns. Using first two columns: {input_col!r}, {output_col!r}')
else:
    print(f'Detected columns -> input: {input_col!r}, output: {output_col!r}')

# Clean and ensure string type
df[input_col] = df[input_col].fillna('').astype(str).str.strip()
df[output_col] = df[output_col].fillna('').astype(str).str.strip()

# Create label: 1 if equal, else 0
df['label'] = (df[input_col] == df[output_col]).astype(int)
df.head()


Columns: ['Input sentence', 'Output sentence', 'Unnamed: 2']
Detected columns -> input: 'Input sentence', output: 'Output sentence'


Unnamed: 0,Input sentence,Output sentence,Unnamed: 2,label
0,‡§∂‡§ø‡§ï‡•ç‡§∑‡§æ ‡§ï‡•ç‡§Ø‡§æ ‡§π‡•à?,‡§∂‡§ø‡§ï‡•ç‡§∑‡§æ ‡§ï‡•ç‡§Ø‡§æ ‡§π‡•à?,,1
1,‡§ï‡§ø‡§∏‡•Ä ‡§≠‡•Ä ‡§ï‡§æ‡§∞‡•ç‡§Ø ‡§ï‡•ã ‡§∏‡•Ä‡§ñ ‡§≤‡•á‡§®‡•á ‡§ï‡•Ä ‡§ï‡•ç‡§∞‡§ø‡§Ø‡§æ ‡§ï‡•ã ‡§∂‡§ø‡§ï‡•ç‡§∑‡§æ ...,‡§ï‡§ø‡§∏‡•Ä ‡§≠‡•Ä ‡§ï‡§æ‡§∞‡•ç‡§Ø ‡§ï‡•ã ‡§∏‡•Ä‡§ñ ‡§≤‡•á‡§®‡•á ‡§ï‡•Ä ‡§ï‡•ç‡§∞‡§ø‡§Ø‡§æ ‡§ï‡•ã ‡§∂‡§ø‡§ï‡•ç‡§∑‡§æ ...,,1
2,‡§Ø‡•á ‡§ï‡•á‡§µ‡§≤ ‡§ï‡§ø‡§§‡§æ‡§¨‡•Ä ‡§ú‡•ç‡§û‡§æ‡§® ‡§Ö‡§∞‡•ç‡§ú‡§® ‡§§‡§ï ‡§π‡•Ä ‡§∏‡§ø‡§Æ‡§ø‡§§ ‡§®‡§π‡•Ä‡§Ç ‡§π‡•à‡•§,‡§Ø‡•á ‡§ï‡•á‡§µ‡§≤ ‡§ï‡§ø‡§§‡§æ‡§¨‡•Ä ‡§ú‡•ç‡§û‡§æ‡§® ‡§Ö‡§∞‡•ç‡§ú‡§® ‡§§‡§ï ‡§π‡•Ä ‡§∏‡•Ä‡§Æ‡§ø‡§§ ‡§®‡§π‡•Ä‡§Ç ‡§π‡•à‡•§,,0
3,‡§Ø‡§π ‡§ï‡§à ‡§µ‡§ø‡§≠‡§æ‡§ó‡•ã‡§Ç ‡§Æ‡•á‡§Ç ‡§¨‡§æ‡§Ç‡§ü‡§æ ‡§ú‡§æ ‡§∏‡§ï‡§§‡§æ ‡§π‡•à‡•§,‡§Ø‡§π ‡§ï‡§à ‡§µ‡§ø‡§≠‡§æ‡§ó‡•ã‡§Ç ‡§Æ‡•á‡§Ç ‡§¨‡§æ‡§Ç‡§ü‡§æ ‡§ú‡§æ ‡§∏‡§ï‡§§‡§æ ‡§π‡•à‡•§,,1
4,"‡§ú‡•à‡§∏‡•á - ‡§µ‡•ç‡§Ø‡§æ‡§µ‡§π‡§æ‡§∞‡§ø‡§ï ‡§∂‡§ø‡§ï‡•ç‡§∑‡§æ, ‡§ï‡§ø‡§§‡§æ‡§¨‡•Ä ‡§∂‡§ø‡§ï‡•ç‡§∑‡§æ ‡§Ö‡§•‡§µ‡§æ ‡§Ö...","‡§ú‡•à‡§∏‡•á - ‡§µ‡•ç‡§Ø‡§æ‡§µ‡§π‡§æ‡§∞‡§ø‡§ï ‡§∂‡§ø‡§ï‡•ç‡§∑‡§æ, ‡§ï‡§ø‡§§‡§æ‡§¨‡•Ä ‡§∂‡§ø‡§ï‡•ç‡§∑‡§æ ‡§Ö‡§•‡§µ‡§æ ‡§Ü...",,0


In [5]:
# Train/validation split
X_pair = (df[input_col] + ' [SEP] ' + df[output_col]).values
y = df['label'].values

X_train, X_val, y_train, y_val = train_test_split(
    X_pair, y, test_size=0.2, random_state=RANDOM_STATE, stratify=y if len(np.unique(y)) > 1 else None
)
len(X_train), len(X_val), np.mean(y)


(479, 120, np.float64(0.09682804674457429))

In [7]:
# Helper for metrics
def print_metrics(y_true, y_pred, y_proba=None, title=None):
    if title:
        print('='*len(title))
        print(title)
        print('='*len(title))
    acc = accuracy_score(y_true, y_pred)
    f1m = f1_score(y_true, y_pred, average='macro', zero_division=0)
    f1b = f1_score(y_true, y_pred, average='binary', zero_division=0)
    print(f'Accuracy: {acc:.4f}')
    print(f'F1-macro: {f1m:.4f} | F1-binary(positive=1): {f1b:.4f}')
    if y_proba is not None:
        try:
            auc = roc_auc_score(y_true, y_proba)
            print(f'ROC-AUC: {auc:.4f}')
        except Exception as e:
            print('ROC-AUC unavailable:', e)
# print('Classification Report:\n', classification_report(y_true, y_pred, zero_division=0))
# print('Confusion Matrix:\n', confusion_matrix(y_true, y_pred))


In [8]:
# Baseline rule: exact match on raw strings
def rule_predict(batch_pairs):
    preds = []
    for s in batch_pairs:
        try:
            a, b = s.split(' [SEP] ', 1)
        except ValueError:
            # if separator missing, treat as not equal
            preds.append(0)
            continue
        preds.append(1 if a.strip() == b.strip() else 0)
    return np.array(preds)

rule_val = rule_predict(X_val)
print_metrics(y_val, rule_val, title='Baseline: Exact Equality Rule')


Baseline: Exact Equality Rule
Accuracy: 1.0000
F1-macro: 1.0000 | F1-binary(positive=1): 1.0000


In [9]:
# TF-IDF (char n-grams) + Logistic Regression
tfidf_lr = Pipeline([
    ('tfidf', TfidfVectorizer(analyzer='char', ngram_range=(2, 5), min_df=2)),
    ('clf', LogisticRegression(max_iter=1000, class_weight='balanced', solver='liblinear'))
])

tfidf_lr.fit(X_train, y_train)
pred_val = tfidf_lr.predict(X_val)
proba_val = None
if hasattr(tfidf_lr, 'predict_proba'):
    proba_val = tfidf_lr.predict_proba(X_val)[:, 1]

print_metrics(y_val, pred_val, proba_val, title='TF-IDF + Logistic Regression')


TF-IDF + Logistic Regression
Accuracy: 0.9000
F1-macro: 0.5982 | F1-binary(positive=1): 0.2500
ROC-AUC: 0.7330


In [10]:
# Save model
Path('models').mkdir(exist_ok=True)
MODEL_PATH = Path('models') / 'binary_eq_model.joblib'
joblib.dump(tfidf_lr, MODEL_PATH)
print(f'Model saved to: {MODEL_PATH.resolve()}')


Model saved to: D:\CODING\IndicGEC2025\Hindi\models\binary_eq_model.joblib


In [11]:
# Inference helper
def predict_equal(input_sentence: str, output_sentence: str) -> int:
    s = f'{input_sentence.strip()} [SEP] {output_sentence.strip()}'
    return int(tfidf_lr.predict([s])[0])

# Example usage:
examples = [
    ('‡§Ø‡§π ‡§µ‡§æ‡§ï‡•ç‡§Ø ‡§π‡•à‡•§', '‡§Ø‡§π ‡§µ‡§æ‡§ï‡•ç‡§Ø ‡§π‡•à‡•§'),
    ('‡§Æ‡•á‡§∞‡§æ ‡§®‡§æ‡§Æ ‡§∞‡§æ‡§π‡•Å‡§≤ ‡§π‡•à‡•§', '‡§Æ‡•á‡§∞‡§æ ‡§®‡§æ‡§Æ ‡§∞‡•ã‡§π‡§ø‡§§ ‡§π‡•à‡•§'),
]
for a, b in examples:
    print(a, '|', b, '->', predict_equal(a, b))


‡§Ø‡§π ‡§µ‡§æ‡§ï‡•ç‡§Ø ‡§π‡•à‡•§ | ‡§Ø‡§π ‡§µ‡§æ‡§ï‡•ç‡§Ø ‡§π‡•à‡•§ -> 0
‡§Æ‡•á‡§∞‡§æ ‡§®‡§æ‡§Æ ‡§∞‡§æ‡§π‡•Å‡§≤ ‡§π‡•à‡•§ | ‡§Æ‡•á‡§∞‡§æ ‡§®‡§æ‡§Æ ‡§∞‡•ã‡§π‡§ø‡§§ ‡§π‡•à‡•§ -> 0


# Model Evaluation on Development Set

Let's test our trained model on the development set (`dev.csv`) and calculate the GLEU score for comprehensive evaluation.

In [None]:
# Install GLEU score dependencies
try:
    import nltk
    from nltk.translate.gleu_score import sentence_gleu
    print("NLTK GLEU already available")
except ImportError:
    print("Installing NLTK for GLEU score calculation...")
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'nltk'])
    import nltk
    from nltk.translate.gleu_score import sentence_gleu

# Download required NLTK data
try:
    nltk.data.find('tokenizers/punkt')
except LookupError:
    print("Downloading NLTK punkt tokenizer...")
    nltk.download('punkt', quiet=True)

In [None]:
# Load and process development set
DEV_PATH = 'dev.csv'

# Check if dev.csv exists
if not Path(DEV_PATH).exists():
    print(f"‚ùå {DEV_PATH} not found in current directory.")
    print(f"Current directory: {Path.cwd()}")
    print(f"Available files: {list(Path('.').glob('*.csv'))}")
    raise FileNotFoundError(f"Please place {DEV_PATH} in the same folder as this notebook.")

# Load development data
print(f"üìä Loading development set: {DEV_PATH}")
dev_df = pd.read_csv(DEV_PATH)
print(f"Development set shape: {dev_df.shape}")
print(f"Columns: {list(dev_df.columns)}")

# Auto-detect columns for dev set (same logic as training)
def autodetect_dev_cols(df):
    def find_col(df, candidates):
        lowered = {c: str(c).strip().lower() for c in df.columns}
        for key in candidates:
            for col, lc in lowered.items():
                if key in lc:
                    return col
        return None
    
    ic = find_col(df, ['input', 'source', 'src']) or df.columns[0]
    oc = find_col(df, ['output', 'target', 'tgt', 'reference', 'gold']) or df.columns[1]
    return ic, oc

dev_input_col, dev_output_col = autodetect_dev_cols(dev_df)
print(f"üéØ Detected columns -> input: '{dev_input_col}', output: '{dev_output_col}'")

# Clean development data
dev_df[dev_input_col] = dev_df[dev_input_col].fillna('').astype(str).str.strip()
dev_df[dev_output_col] = dev_df[dev_output_col].fillna('').astype(str).str.strip()

# Create labels for dev set
dev_df['label_true'] = (dev_df[dev_input_col] == dev_df[dev_output_col]).astype(int)

print(f"üìà Dev set statistics:")
print(f"  - Total samples: {len(dev_df)}")
print(f"  - Identical pairs (label=1): {dev_df['label_true'].sum()} ({dev_df['label_true'].mean():.2%})")
print(f"  - Different pairs (label=0): {(1-dev_df['label_true']).sum()} ({(1-dev_df['label_true']).mean():.2%})")

dev_df.head()

In [None]:
# Generate predictions on development set
print("ü§ñ Generating predictions on development set...")

# Prepare input pairs for model prediction
dev_X_pair = (dev_df[dev_input_col] + ' [SEP] ' + dev_df[dev_output_col]).values
dev_y_true = dev_df['label_true'].values

# Get model predictions
dev_y_pred = tfidf_lr.predict(dev_X_pair)
dev_y_proba = tfidf_lr.predict_proba(dev_X_pair)[:, 1]

# Add predictions to dataframe
dev_df['label_pred'] = dev_y_pred
dev_df['confidence'] = dev_y_proba

# Binary classification metrics
print("üìä Binary Classification Results on Development Set:")
print_metrics(dev_y_true, dev_y_pred, dev_y_proba, title='Development Set Performance')

# Detailed breakdown
print(f"\nüîç Detailed Analysis:")
print(f"  - Correct predictions: {(dev_y_true == dev_y_pred).sum()}/{len(dev_y_true)}")
print(f"  - True Positives (correctly identified identical): {((dev_y_true == 1) & (dev_y_pred == 1)).sum()}")
print(f"  - True Negatives (correctly identified different): {((dev_y_true == 0) & (dev_y_pred == 0)).sum()}")
print(f"  - False Positives (incorrectly said identical): {((dev_y_true == 0) & (dev_y_pred == 1)).sum()}")
print(f"  - False Negatives (incorrectly said different): {((dev_y_true == 1) & (dev_y_pred == 0)).sum()}")

In [None]:
# GLEU Score Calculation
print("üìè Calculating GLEU Scores...")

def tokenize_text(text):
    """Simple tokenization for Hindi text"""
    # Basic tokenization - splits on whitespace and common punctuation
    import re
    # Split on whitespace and keep punctuation as separate tokens
    tokens = re.findall(r'\S+', str(text).strip())
    return tokens

def calculate_gleu_scores(references, hypotheses):
    """Calculate GLEU scores for a set of reference-hypothesis pairs"""
    gleu_scores = []
    
    for ref, hyp in zip(references, hypotheses):
        # Tokenize both reference and hypothesis
        ref_tokens = tokenize_text(ref)
        hyp_tokens = tokenize_text(hyp)
        
        # Calculate sentence-level GLEU
        # sentence_gleu expects reference as list of token lists, hypothesis as token list
        try:
            gleu = sentence_gleu([ref_tokens], hyp_tokens)
            gleu_scores.append(gleu)
        except Exception as e:
            # In case of any tokenization issues, use 0.0
            gleu_scores.append(0.0)
    
    return gleu_scores

# Calculate GLEU scores for all sentence pairs
print("üî§ Tokenizing and calculating GLEU scores...")

# For GLEU, we compare input vs output sentences (regardless of our model's prediction)
references = dev_df[dev_input_col].tolist()  # Original sentences
hypotheses = dev_df[dev_output_col].tolist()  # Corrected/target sentences

# Calculate GLEU scores
gleu_scores = calculate_gleu_scores(references, hypotheses)
dev_df['gleu_score'] = gleu_scores

# Overall GLEU statistics
mean_gleu = np.mean(gleu_scores)
median_gleu = np.median(gleu_scores)
std_gleu = np.std(gleu_scores)

print(f"\nüìä GLEU Score Results on Development Set:")
print(f"  üìà Mean GLEU Score: {mean_gleu:.4f}")
print(f"  üìä Median GLEU Score: {median_gleu:.4f}")
print(f"  üìè Standard Deviation: {std_gleu:.4f}")
print(f"  üéØ Min GLEU Score: {min(gleu_scores):.4f}")
print(f"  üöÄ Max GLEU Score: {max(gleu_scores):.4f}")

# GLEU score distribution
perfect_matches = sum(1 for score in gleu_scores if score >= 0.99)
high_scores = sum(1 for score in gleu_scores if 0.8 <= score < 0.99)
medium_scores = sum(1 for score in gleu_scores if 0.5 <= score < 0.8)
low_scores = sum(1 for score in gleu_scores if score < 0.5)

print(f"\nüìã GLEU Score Distribution:")
print(f"  üéØ Perfect/Near-perfect (‚â•0.99): {perfect_matches} ({perfect_matches/len(gleu_scores):.1%})")
print(f"  ‚úÖ High similarity (0.8-0.99): {high_scores} ({high_scores/len(gleu_scores):.1%})")
print(f"  ‚ö†Ô∏è Medium similarity (0.5-0.8): {medium_scores} ({medium_scores/len(gleu_scores):.1%})")
print(f"  ‚ùå Low similarity (<0.5): {low_scores} ({low_scores/len(gleu_scores):.1%})")

In [None]:
# Correlation Analysis: Model Predictions vs GLEU Scores
print("üîç Analyzing correlation between model predictions and GLEU scores...")

# Group by model predictions
identical_pairs = dev_df[dev_df['label_pred'] == 1]
different_pairs = dev_df[dev_df['label_pred'] == 0]

print(f"\nüìä GLEU Scores by Model Prediction:")
print(f"  üü¢ Pairs predicted as IDENTICAL (label=1): {len(identical_pairs)} samples")
print(f"    - Mean GLEU: {identical_pairs['gleu_score'].mean():.4f}")
print(f"    - Median GLEU: {identical_pairs['gleu_score'].median():.4f}")
print(f"    - Std GLEU: {identical_pairs['gleu_score'].std():.4f}")

print(f"  üî¥ Pairs predicted as DIFFERENT (label=0): {len(different_pairs)} samples")
print(f"    - Mean GLEU: {different_pairs['gleu_score'].mean():.4f}")
print(f"    - Median GLEU: {different_pairs['gleu_score'].median():.4f}")
print(f"    - Std GLEU: {different_pairs['gleu_score'].std():.4f}")

# Correlation coefficient
correlation = np.corrcoef(dev_df['label_pred'], dev_df['gleu_score'])[0, 1]
print(f"\nüîó Correlation between model predictions and GLEU scores: {correlation:.4f}")

# Performance on perfect vs imperfect matches
perfect_gleu_mask = dev_df['gleu_score'] >= 0.99
imperfect_gleu_mask = dev_df['gleu_score'] < 0.99

perfect_accuracy = (dev_df[perfect_gleu_mask]['label_true'] == dev_df[perfect_gleu_mask]['label_pred']).mean()
imperfect_accuracy = (dev_df[imperfect_gleu_mask]['label_true'] == dev_df[imperfect_gleu_mask]['label_pred']).mean()

print(f"\nüéØ Model Accuracy Analysis:")
print(f"  ‚ú® On perfect GLEU matches (‚â•0.99): {perfect_accuracy:.4f} ({perfect_gleu_mask.sum()} samples)")
print(f"  ‚ö° On imperfect GLEU matches (<0.99): {imperfect_accuracy:.4f} ({imperfect_gleu_mask.sum()} samples)")

print(f"\nüìà Summary Report:")
print(f"  üéØ Overall Development Set Accuracy: {(dev_y_true == dev_y_pred).mean():.4f}")
print(f"  üìè Mean GLEU Score: {mean_gleu:.4f}")
print(f"  üìä Total Samples Evaluated: {len(dev_df)}")
print(f"  üîó Prediction-GLEU Correlation: {correlation:.4f}")

In [None]:
# Sample Analysis: Show some examples
print("üîç Sample Analysis - Examples from Development Set:")
print("="*80)

# Show some interesting examples
sample_indices = [0, 1, 2, 3, 4]  # First 5 examples
if len(dev_df) > 10:
    # Add some random samples if dataset is large enough
    sample_indices.extend(np.random.choice(range(5, len(dev_df)), size=min(5, len(dev_df)-5), replace=False))

for i in sample_indices:
    row = dev_df.iloc[i]
    print(f"\nüìù Example {i+1}:")
    print(f"  Input:  '{row[dev_input_col]}'")
    print(f"  Output: '{row[dev_output_col]}'")
    print(f"  True Label: {row['label_true']} | Predicted: {row['label_pred']} | Confidence: {row['confidence']:.3f}")
    print(f"  GLEU Score: {row['gleu_score']:.4f}")
    
    # Interpretation
    if row['label_true'] == row['label_pred']:
        status = "‚úÖ CORRECT"
    else:
        status = "‚ùå INCORRECT"
    
    if row['gleu_score'] >= 0.99:
        gleu_status = "üéØ Perfect match"
    elif row['gleu_score'] >= 0.8:
        gleu_status = "‚úÖ High similarity"
    elif row['gleu_score'] >= 0.5:
        gleu_status = "‚ö†Ô∏è Medium similarity"
    else:
        gleu_status = "‚ùå Low similarity"
    
    print(f"  Status: {status} | GLEU: {gleu_status}")

print(f"\n" + "="*80)

# Save results to CSV
results_path = 'dev_results_with_gleu.csv'
dev_df.to_csv(results_path, index=False)
print(f"üíæ Results saved to: {results_path}")
print(f"   - Columns: {list(dev_df.columns)}")
print(f"   - Rows: {len(dev_df)}")

print(f"\nüéâ Evaluation Complete!")
print(f"  üìä Mean GLEU Score: {mean_gleu:.4f}")
print(f"  üéØ Classification Accuracy: {(dev_y_true == dev_y_pred).mean():.4f}")
print(f"  üìà Total Samples: {len(dev_df)}")

## Optional: Batch inference on a file
If you later have a file like `test.csv` with the same two columns, you can run:

1. Load it with `pd.read_csv('test.csv')`.
2. Auto-detect the columns the same way as above.
3. Build pairs with `' [SEP] '`.
4. Use `tfidf_lr.predict(pairs)` to get labels (1 if equal, else 0).
5. Save the predictions to a CSV.

In [None]:
# (Optional) Batch inference template
# test_df = pd.read_csv('test.csv')
# def autodetect_cols(df):
#     def find_col(df, candidates):
#         lowered = {c: str(c).strip().lower() for c in df.columns}
#         for key in candidates:
#             for col, lc in lowered.items():
#                 if key in lc:
#                     return col
#         return None
#     ic = find_col(df, ['input', 'source', 'src']) or df.columns[0]
#     oc = find_col(df, ['output', 'target', 'tgt', 'reference', 'gold']) or df.columns[1]
#     return ic, oc
#
# ic, oc = autodetect_cols(test_df)
# test_df[ic] = test_df[ic].fillna('').astype(str).str.strip()
# test_df[oc] = test_df[oc].fillna('').astype(str).str.strip()
# pairs = (test_df[ic] + ' [SEP] ' + test_df[oc]).values
# preds = tfidf_lr.predict(pairs).astype(int)
# out = test_df.copy()
# out['label_pred'] = preds
# out.to_csv('predictions.csv', index=False)
# print('Saved predictions.csv')
