In [1]:
#Importing necessary libraries in ond place
import pandas as pd
import nltk
import re
import numpy as np
import json
import logging
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import (
    f1_score, mean_squared_error, classification_report,
    confusion_matrix, ConfusionMatrixDisplay,
    accuracy_score, precision_score, recall_score, r2_score
)
from rouge_score import rouge_scorer
from joblib import dump
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Bidirectional, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

#Configuring logging for debugging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('summarization_combined.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

#Downloading NLTK data
try:
    nltk.download('punkt', quiet=True)
    logger.info("NLTK punkt tokenizer downloaded successfully")
except Exception as e:
    logger.error(f"Failed to download NLTK punkt tokenizer: {str(e)}")
    raise

2025-08-13 16:22:23,832 - INFO - NLTK punkt tokenizer downloaded successfully


In [2]:
#Loading and preprocessing dataset
def clean_text(text):
    """Clean text by removing special characters and extra whitespace."""
    try:
        text = re.sub(r'[^\w\s.]', '', text)
        cleaned = text.strip().lower()
        return cleaned
    except Exception as e:
        logger.error(f"Error in clean_text: {str(e)}")
        raise

try:
    logger.info("Loading dataset from clipped_dataset.csv")
    df = pd.read_csv('dataset.csv')
    logger.info(f"Dataset loaded with {len(df)} articles")
    
    #Applying text cleaning
    df['article'] = df['article'].apply(clean_text)
    df['highlights'] = df['highlights'].apply(clean_text)
    df['sentences'] = df['article'].apply(nltk.sent_tokenize)
except Exception as e:
    logger.error(f"Error loading or preprocessing dataset: {str(e)}")
    raise

2025-08-13 16:22:23,854 - INFO - Loading dataset from clipped_dataset.csv
2025-08-13 16:22:25,141 - INFO - Dataset loaded with 10000 articles


In [3]:
#Generating labels for sentences
def generate_labels(df):
    try:
        scorer = rouge_scorer.RougeScorer(['rouge1'], use_stemmer=True)
        data = []
        logger.info("Generating labels for sentences")
        for idx, row in df.iterrows():
            sentences = row['sentences']
            summary = row['highlights']
            for i, sent in enumerate(sentences):
                rouge = scorer.score(sent, summary)['rouge1'].fmeasure
                #Giving label of 0 or 1 for the classification model
                label_class = 1 if rouge > 0.3 else 0 
                #Applying rouge label for regression models
                label_reg = rouge 
                data.append({
                    'doc_id': idx,
                    'sentence': sent,
                    'label_class': label_class,
                    'label_reg': label_reg,
                    'position': i / len(sentences),
                    'length': len(sent.split())
                })
        return pd.DataFrame(data)
    #Exception Hanlding
    except Exception as e:
        logger.error(f"Error in generate_labels: {str(e)}")
        raise

flat_df = generate_labels(df)

2025-08-13 16:22:36,885 - INFO - Using default tokenizer.
2025-08-13 16:22:36,887 - INFO - Generating labels for sentences


In [4]:
#Extracting TF-IDF and metadata features
def extract_features(flat_df, for_lstm=False):
    try:
        logger.info("Extracting TF-IDF and metadata features")
        vectorizer = TfidfVectorizer(max_features=500, stop_words='english')
        tfidf_matrix = vectorizer.fit_transform(flat_df['sentence'])
        
        features = np.hstack([
            tfidf_matrix.toarray(),
            flat_df[['position', 'length']].values
        ])
        
        #Saving the vectorizer
        dump(vectorizer, 'tfidf_vectorizer.pkl')
        logger.info("TF-IDF vectorizer saved to tfidf_vectorizer.pkl")
        
        if for_lstm:
            #Reshaping for LSTM
            features = np.expand_dims(features, axis=1)
        
        #Returning features and vectorizer
        return features, vectorizer
    except Exception as e:
        logger.error(f"Error in extract_features: {str(e)}")
        raise

#Extracting features for traditional ML and LSTM
features_ml, vectorizer = extract_features(flat_df, for_lstm=False)
features_lstm, _ = extract_features(flat_df, for_lstm=True)

2025-08-13 16:30:24,465 - INFO - Extracting TF-IDF and metadata features


2025-08-13 16:30:34,537 - INFO - TF-IDF vectorizer saved to tfidf_vectorizer.pkl
2025-08-13 16:30:34,540 - INFO - Extracting TF-IDF and metadata features
2025-08-13 16:30:45,450 - INFO - TF-IDF vectorizer saved to tfidf_vectorizer.pkl


In [None]:
#Training and evaluating traditional ML models
def train_and_evaluate_ml(features, flat_df):
    results = {}
    try:
        logger.info("Starting traditional ML model training and evaluation with hyperparameter tuning")
        
        #Spliting for Classification models
        X_train, X_test, y_train_class, y_test_class = train_test_split(
            features, flat_df['label_class'], test_size=0.2, stratify=flat_df['label_class'], random_state=42
        )
        
        #Logistic Regression with GridSearchCV
        logger.info("Tuning Logistic Regression")
        clf_lr = LogisticRegression(class_weight='balanced', max_iter=1000)
        param_grid_lr = {
            'C': [0.01, 0.1, 1, 10, 100],
            'solver': ['lbfgs', 'liblinear']
        }
        grid_lr = GridSearchCV(
            clf_lr, param_grid_lr, cv=5, scoring='f1', n_jobs=-1, verbose=1
        )
        grid_lr.fit(X_train, y_train_class)
        clf_lr = grid_lr.best_estimator_
        preds_lr = clf_lr.predict(X_test)
        results['Logistic Regression'] = {
            'f1_score': f1_score(y_test_class, preds_lr),
            'precision': precision_score(y_test_class, preds_lr),
            'recall': recall_score(y_test_class, preds_lr),
            'accuracy': accuracy_score(y_test_class, preds_lr),
            'classification_report': classification_report(y_test_class, preds_lr, output_dict=True),
            'confusion_matrix': confusion_matrix(y_test_class, preds_lr).tolist(),
            'best_params': grid_lr.best_params_
        }
        logger.info(f"Best Logistic Regression params: {grid_lr.best_params_}")
        ConfusionMatrixDisplay.from_predictions(y_test_class, preds_lr)
        plt.title("Logistic Regression Confusion Matrix")
        plt.savefig('logistic_regression_cm.png')
        plt.close()
        dump(clf_lr, 'logistic_regression_model.pkl')
        logger.info("Logistic Regression model saved")
        
        #Decision Tree Classifier with GridSearchCV
        logger.info("Tuning Decision Tree Classifier")
        clf_dt = DecisionTreeClassifier(class_weight='balanced', random_state=42)
        param_grid_dt = {
            'max_depth': [None, 10, 20, 30],
            'min_samples_split': [2, 5, 10],
            'min_samples_leaf': [1, 2, 4],
            'criterion': ['gini', 'entropy']
        }
        grid_dt = GridSearchCV(
            clf_dt, param_grid_dt, cv=5, scoring='f1', n_jobs=-1, verbose=1
        )
        grid_dt.fit(X_train, y_train_class)
        clf_dt = grid_dt.best_estimator_
        preds_dt = clf_dt.predict(X_test)
        results['Decision Tree Classifier'] = {
            'f1_score': f1_score(y_test_class, preds_dt),
            'precision': precision_score(y_test_class, preds_dt),
            'recall': recall_score(y_test_class, preds_dt),
            'accuracy': accuracy_score(y_test_class, preds_dt),
            'classification_report': classification_report(y_test_class, preds_dt, output_dict=True),
            'confusion_matrix': confusion_matrix(y_test_class, preds_dt).tolist(),
            'best_params': grid_dt.best_params_
        }
        logger.info(f"Best Decision Tree Classifier params: {grid_dt.best_params_}")
        ConfusionMatrixDisplay.from_predictions(y_test_class, preds_dt)
        plt.title("Decision Tree Classifier Confusion Matrix")
        plt.savefig('decision_tree_classifier_cm.png')
        plt.close()
        dump(clf_dt, 'decision_tree_classifier_model.pkl')
        logger.info("Decision Tree Classifier model saved")
        
        #Spliting data for Regression models
        X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(
            features, flat_df['label_reg'], test_size=0.2, random_state=42
        )
        
        #Linear Regression with GridSearchCV
        logger.info("Tuning Linear Regression")
        reg_lr = LinearRegression()
        param_grid_lr_reg = {
            'fit_intercept': [True, False]
        }
        grid_lr_reg = GridSearchCV(
            reg_lr, param_grid_lr_reg, cv=5, scoring='neg_mean_squared_error', n_jobs=-1, verbose=1
        )
        grid_lr_reg.fit(X_train_reg, y_train_reg)
        reg_lr = grid_lr_reg.best_estimator_
        preds_lr_reg = reg_lr.predict(X_test_reg)
        results['Linear Regression'] = {
            'mse': mean_squared_error(y_test_reg, preds_lr_reg),
            'r2': r2_score(y_test_reg, preds_lr_reg),
            'best_params': grid_lr_reg.best_params_
        }
        logger.info(f"Best Linear Regression params: {grid_lr_reg.best_params_}")
        dump(reg_lr, 'linear_regression_model.pkl')
        logger.info("Linear Regression model saved")
        
        #Decision Tree Regressor with GridSearchCV
        logger.info("Tuning Decision Tree Regressor")
        reg_dt = DecisionTreeRegressor(random_state=42)
        param_grid_dt_reg = {
            'max_depth': [None, 10, 20, 30],
            'min_samples_split': [2, 5, 10],
            'min_samples_leaf': [1, 2, 4],
            'criterion': ['squared_error', 'absolute_error']
        }
        grid_dt_reg = GridSearchCV(
            reg_dt, param_grid_dt_reg, cv=5, scoring='neg_mean_squared_error', n_jobs=-1, verbose=1
        )
        grid_dt_reg.fit(X_train_reg, y_train_reg)
        reg_dt = grid_dt_reg.best_estimator_
        preds_dt_reg = reg_dt.predict(X_test_reg)
        results['Decision Tree Regressor'] = {
            'mse': mean_squared_error(y_test_reg, preds_dt_reg),
            'r2': r2_score(y_test_reg, preds_dt_reg),
            'best_params': grid_dt_reg.best_params_
        }
        logger.info(f"Best Decision Tree Regressor params: {grid_dt_reg.best_params_}")
        dump(reg_dt, 'decision_tree_regressor_model.pkl')
        logger.info("Decision Tree Regressor model saved")
        
        #Returning models and results
        return clf_lr, clf_dt, reg_lr, reg_dt, results
    
    except Exception as e:
        logger.error(f"Error in train_and_evaluate_ml: {str(e)}")
        raise

#Training all the models with hyperparameter tuning
clf_lr, clf_dt, reg_lr, reg_dt, results_ml = train_and_evaluate_ml(features_ml, flat_df)

2025-08-13 16:30:45,516 - INFO - Starting traditional ML model training and evaluation with hyperparameter tuning


2025-08-13 16:30:47,461 - INFO - Tuning Logistic Regression


Fitting 5 folds for each of 10 candidates, totalling 50 fits


2025-08-13 16:48:08,950 - INFO - Best Logistic Regression params: {'C': 0.01, 'solver': 'lbfgs'}
2025-08-13 16:48:12,423 - INFO - Logistic Regression model saved
2025-08-13 16:48:12,425 - INFO - Tuning Decision Tree Classifier


Fitting 5 folds for each of 72 candidates, totalling 360 fits


In [None]:
#Training and evaluate tuned LSTM Regressor
def train_and_evaluate_lstm(features, flat_df):
    results = {}
    try:
        logger.info("Starting tuned LSTM Regressor model training and evaluation")
        
        #Spliting data for Regression
        X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(
            features, flat_df['label_reg'], test_size=0.2, random_state=42
        )
        
        #Hyperparameter tuned LSTM for Regression
        #Sequential LSTM Model (RNN enhancements: Bidirectional, stacked layers, dropout)
        logger.info("Training tuned LSTM for Regression")
        model_lstm_reg = Sequential([
            #Bidirectional LSTM layers with dropout for regularization
            Bidirectional(LSTM(100, input_shape=(1, 502), return_sequences=True)),
            Dropout(0.25),
            Bidirectional(LSTM(100, return_sequences=False)),
            Dropout(0.25),
            #Using relu activation function
            Dense(50, activation='relu'),
            Dense(1)
        ])
        model_lstm_reg.compile(optimizer=Adam(learning_rate=0.0005), loss='mse', metrics=['mae'])
        
        #Early stopping to prevent overfitting in RNN training
        early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
        
        #Fitting the lstm model
        history_reg = model_lstm_reg.fit(
            X_train_reg, y_train_reg, 
            epochs=20, 
            batch_size=32, 
            validation_split=0.2, 
            verbose=1,
            callbacks=[early_stopping]
        )
        
        #Evaluating the model
        preds_lstm_reg = model_lstm_reg.predict(X_test_reg).flatten()
        results['LSTM Regressor'] = {
            'mse': mean_squared_error(y_test_reg, preds_lstm_reg),
            'r2': r2_score(y_test_reg, preds_lstm_reg)
        }
        
        #Plotting training history
        plt.figure()
        plt.plot(history_reg.history['loss'], label='Training Loss')
        plt.plot(history_reg.history['val_loss'], label='Validation Loss')
        plt.title('Tuned LSTM Regressor Training History')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.savefig('lstm_regressor_history.png')
        plt.close()
        
        #Saving trained model
        model_lstm_reg.save('lstm_regressor_model.h5')
        logger.info("Tuned LSTM Regressor model saved")
        
        #Returning model and result
        return model_lstm_reg, results
    except Exception as e:
        logger.error(f"Error in train_and_evaluate_lstm: {str(e)}")
        raise

model_lstm_reg, results_lstm = train_and_evaluate_lstm(features_lstm, flat_df)

2025-08-13 15:10:42,344 - INFO - Starting tuned LSTM Regressor model training and evaluation
2025-08-13 15:10:44,197 - INFO - Training tuned LSTM for Regression
  super().__init__(**kwargs)


Epoch 1/20
[1m7522/7522[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 9ms/step - loss: 0.0069 - mae: 0.0595 - val_loss: 0.0064 - val_mae: 0.0562
Epoch 2/20
[1m7522/7522[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 9ms/step - loss: 0.0064 - mae: 0.0570 - val_loss: 0.0063 - val_mae: 0.0564
Epoch 3/20
[1m7522/7522[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 9ms/step - loss: 0.0063 - mae: 0.0569 - val_loss: 0.0063 - val_mae: 0.0567
Epoch 4/20
[1m7522/7522[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 9ms/step - loss: 0.0062 - mae: 0.0565 - val_loss: 0.0063 - val_mae: 0.0571
Epoch 5/20
[1m7522/7522[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 9ms/step - loss: 0.0062 - mae: 0.0564 - val_loss: 0.0064 - val_mae: 0.0569
Epoch 6/20
[1m7522/7522[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 9ms/step - loss: 0.0062 - mae: 0.0562 - val_loss: 0.0063 - val_mae: 0.0561
Epoch 7/20
[1m7522/7522[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [

2025-08-13 15:28:27,840 - INFO - Tuned LSTM Regressor model saved


In [None]:
#Function to generate a summary from an article
def generate_summary(article, model, vectorizer, k=3, is_classifier=True, is_lstm=False):
    try:
        #Splitting the article into sentences after cleaning the text
        sentences = nltk.sent_tokenize(clean_text(article))
        if not sentences:
            #Return if the article has no sentences
            return "No sentences found."  

        #Convertting sentences into TF-IDF vectors using the provided vectorizer
        tfidf = vectorizer.transform(sentences)

        #Feature engineering: calculate sentence lengths (number of words)
        lengths = [len(s.split()) for s in sentences]

        #Calculating relative positions of sentences in the article (0 to 1)
        positions = [i / len(sentences) for i in range(len(sentences))]

        #Combining TF-IDF features with sentence lengths and positions
        features = np.hstack([tfidf.toarray(), np.array([positions, lengths]).T])

        #If using LSTM, expand feature dimensions to match expected input shape
        if is_lstm:
            features = np.expand_dims(features, axis=1)  

        if is_classifier:
            #If the model is a classifier, predict 0/1 labels for sentences
            preds = model.predict(features)

            #Selecting sentences predicted as important (label == 1)
            selected = [sentences[i] for i in range(len(sentences)) if preds[i] == 1]
        else:
            #If the model is a regressor, predict scores for each sentence
            scores = model.predict(features).flatten()

            #Select top-k sentences based on predicted scores
            selected_indices = np.argsort(scores)[-k:]
            selected = [sentences[i] for i in selected_indices]

        #Returning the selected sentences joined as a single summary string
        return ' '.join(selected) if selected else "No sentences selected."
    except Exception as e:
        logger.error(f"Error in generate_summary: {str(e)}")
        raise


In [None]:
#Evaluating summaries using ROUGE metrics
def evaluate_summaries(df, model, vectorizer, model_name, is_classifier=True, is_lstm=False):
    try:
        #Calculating ROUGE scores for generated summaries
        scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
        rouge_scores = {'rouge1': [], 'rouge2': [], 'rougeL': []}
        
        for _, row in df.iterrows():
            generated = generate_summary(row['article'], model, vectorizer, is_classifier=is_classifier, is_lstm=is_lstm)
            scores = scorer.score(row['highlights'], generated)
            for metric in rouge_scores:
                rouge_scores[metric].append(scores[metric].fmeasure)
        
        #Returning average ROUGE scores for the model
        return {
            'model': model_name,
            'rouge1': np.mean(rouge_scores['rouge1']),
            'rouge2': np.mean(rouge_scores['rouge2']),
            'rougeL': np.mean(rouge_scores['rougeL'])
        }
    except Exception as e:
        logger.error(f"Error in evaluate_summaries: {str(e)}")
        raise

#Evaluating all models
rouge_results = [
    evaluate_summaries(df, clf_lr, vectorizer, 'Logistic Regression', is_classifier=True),
    evaluate_summaries(df, clf_dt, vectorizer, 'Decision Tree Classifier', is_classifier=True),
    evaluate_summaries(df, reg_lr, vectorizer, 'Linear Regression', is_classifier=False),
    evaluate_summaries(df, reg_dt, vectorizer, 'Decision Tree Regressor', is_classifier=False),
    evaluate_summaries(df, model_lstm_reg, vectorizer, 'LSTM Regressor', is_classifier=False, is_lstm=True)
]

2025-08-13 15:29:17,807 - INFO - Using default tokenizer.
2025-08-13 15:31:59,917 - INFO - Using default tokenizer.
2025-08-13 15:33:12,133 - INFO - Using default tokenizer.
2025-08-13 15:34:38,762 - INFO - Using default tokenizer.
2025-08-13 15:35:51,480 - INFO - Using default tokenizer.


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25

In [None]:
#Saving evaluation results
def save_results(results_ml, results_lstm, rouge_results):
    try:
        #Saving the classification, regression metrics and rouge scores to a JSON file
        output = {
            'classification_metrics': {
                model: metrics for model, metrics in results_ml.items() if 'classification_report' in metrics
            },
            'regression_metrics': {
                **{model: metrics for model, metrics in results_ml.items() if 'mse' in metrics},
                **results_lstm
            },
            'rouge_scores': rouge_results
        }
        
        #Writing results to a JSON file
        with open('summarization_combined_results.json', 'w') as f:
            json.dump(output, f, indent=4)
        logger.info("Results saved to summarization_combined_results.json")
        
    #Exception handling while saving results
    except Exception as e:
        logger.error(f"Error in save_results: {str(e)}")
        raise

save_results(results_ml, results_lstm, rouge_results)

2025-08-13 16:04:09,482 - INFO - Results saved to summarization_combined_results.json


In [None]:
#Generating summaries for the first article to check the model
new_article = df['article'].iloc[0]
print("\nExample Summaries:")
print("Logistic Regression:", generate_summary(new_article, clf_lr, vectorizer, is_classifier=True))
print("Decision Tree Classifier:", generate_summary(new_article, clf_dt, vectorizer, is_classifier=True))
print("Linear Regression:", generate_summary(new_article, reg_lr, vectorizer, is_classifier=False))
print("Decision Tree Regressor:", generate_summary(new_article, reg_dt, vectorizer, is_classifier=False))
print("LSTM Regressor:", generate_summary(new_article, model_lstm_reg, vectorizer, is_classifier=False, is_lstm=True))


Example Summaries:
Logistic Regression: working with biologists she has traveled to deserts and islands from the australian outback to antarctica to photograph organisms that are 2000 years old or older. her new book of photographs and essays the oldest living things in the world came out on earth day april 22. follow her on twitter oltw . a visit to japan in 2004 resulted in a surprising and eyeopening adventure to a supposedly 7000yearold tree which ended up being the ultimate catalyst that brought all these different threads together. examples of this include the map lichens in greenland that grow only 1 centimeter every 100 years and the spruce tree on the cover of the book which despite its spindly appearance has been growing clonally for 9950 years. i urge everyone to travel responsibly and remember the girl scout motto to always leave a place in better shape than when you found it what are a few things that one can start doing today to become more environmentally conscious my s