In [None]:
import gdown
import zipfile
import os
from tqdm import tqdm

# Step 1: Download the ZIP file from Google Drive (shared link)
file_id = '1juliMp2yJwMe08Yh0s2uYpu_3U4miUAR'  # Extracted from shared URL
zip_path = 'downloaded_file.zip'

gdown.download(f'https://drive.google.com/uc?id={file_id}', zip_path, quiet=False)

# Step 2: Create the output directory
output_dir = 'model_assets'
os.makedirs(output_dir, exist_ok=True)

# Step 3: Unzip the file with progress
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    file_list = zip_ref.namelist()
    for file in tqdm(file_list, desc="Extracting files"):
        zip_ref.extract(member=file, path=output_dir)

print(f"\n✅ Done! Files extracted to '{output_dir}'")


Downloading...
From (original): https://drive.google.com/uc?id=1juliMp2yJwMe08Yh0s2uYpu_3U4miUAR
From (redirected): https://drive.google.com/uc?id=1juliMp2yJwMe08Yh0s2uYpu_3U4miUAR&confirm=t&uuid=a4f005ee-2292-4004-a212-b8788a8b95b6
To: /content/downloaded_file.zip
100%|██████████| 1.05G/1.05G [00:08<00:00, 124MB/s]
Extracting files: 100%|██████████| 6/6 [00:12<00:00,  2.09s/it]


✅ Done! Files extracted to 'model_assets'





In [None]:
import torch
import joblib
from torch.nn.functional import softmax
from transformers import BertTokenizer, BertForSequenceClassification

# Pre-load assets once (outside the prediction function)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
le = joblib.load("model_assets/label_encoder.joblib")

# Load model with optimizations
loaded_model = BertForSequenceClassification.from_pretrained(
    'bert-base-uncased',
    num_labels=len(le.classes_)
).to(device)
loaded_model.load_state_dict(torch.load("model_assets/best_model.pt")["model_state_dict"])

# Apply optimizations
loaded_model.eval()
torch.backends.cudnn.benchmark = True  # Enable cuDNN auto-tuner
if torch.cuda.is_available():
    loaded_model = torch.compile(loaded_model)  # PyTorch 2.0 compiler

def predict(text, model=loaded_model, tokenizer=tokenizer, label_encoder=le):
    """Returns prediction with confidence score"""
    with torch.no_grad(), torch.inference_mode():
        inputs = tokenizer(
            text,
            truncation=True,
            padding='max_length',
            max_length=128,
            return_tensors="pt"
        ).to(device)

        # Use fused kernels where available
        with torch.backends.cuda.sdp_kernel(enable_flash=True):
            outputs = model(**inputs)

        # Get probabilities and prediction
        probs = softmax(outputs.logits, dim=1)
        confidence, pred_idx = torch.max(probs, dim=1)

        return {
            'prediction': label_encoder.inverse_transform(pred_idx.cpu().numpy())[0],
            'confidence': round(confidence.item(), 4)  # Rounded to 4 decimal places
        }

def predict_batch(texts, batch_size=32):
    """Batch prediction with confidence scores"""
    encodings = tokenizer(
        texts,
        truncation=True,
        padding='max_length',
        max_length=128,
        return_tensors="pt",
        add_special_tokens=True
    )

    dataset = torch.utils.data.TensorDataset(
        encodings['input_ids'],
        encodings['attention_mask']
    )

    loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size)

    results = []
    with torch.no_grad(), torch.inference_mode():
        for batch in loader:
            inputs = {k: v.to(device) for k, v in zip(['input_ids', 'attention_mask'], batch)}
            with torch.backends.cuda.sdp_kernel(enable_flash=True):
                outputs = loaded_model(**inputs)

            # Process batch with confidence
            probs = softmax(outputs.logits, dim=1)
            confidences, pred_indices = torch.max(probs, dim=1)
            predictions = label_encoder.inverse_transform(pred_indices.cpu().numpy())

            results.extend([
                {
                    'prediction': pred,
                    'confidence': round(conf.item(), 4)
                }
                for pred, conf in zip(predictions, confidences)
            ])

    return results

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
result = predict("Limited access to online resources has do it difficult to find and access the materials needful to complete assignments and study for exams.")
print(f"Prediction: {result['prediction']} (Confidence: {result['confidence']:.2%})")

  self.gen = func(*args, **kwds)


Prediction: Online learning (Confidence: 88.21%)


In [None]:
# Cell 1: Setup and imports
import torch
import joblib
import pandas as pd
import numpy as np
from torch.nn.functional import softmax
from transformers import BertTokenizer, BertForSequenceClassification
from sentence_transformers import CrossEncoder
import ipywidgets as widgets
from IPython.display import display, clear_output

# Load your existing model and assets
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
le = joblib.load("model_assets/label_encoder.joblib")

loaded_model = BertForSequenceClassification.from_pretrained(
    'bert-base-uncased',
    num_labels=len(le.classes_)
).to(device)
loaded_model.load_state_dict(torch.load("model_assets/best_model.pt")["model_state_dict"])
loaded_model.eval()

# Load your dataset (replace with your actual path)
data_test = pd.read_excel("/content/Final_Sentiment analysis.xlsx")  # Update this path

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# Cell 2: Prediction function (using your existing code)
def predict_genre(text):
    """Returns prediction with confidence score using your existing model"""
    with torch.no_grad(), torch.inference_mode():
        inputs = tokenizer(
            text,
            truncation=True,
            padding='max_length',
            max_length=128,
            return_tensors="pt"
        ).to(device)

        outputs = loaded_model(**inputs)
        probs = softmax(outputs.logits, dim=1)
        confidence, pred_idx = torch.max(probs, dim=1)

        return {
            'prediction': le.inverse_transform(pred_idx.cpu().numpy())[0],
            'confidence': round(confidence.item(), 4)
        }

In [None]:
# Cell 3: Static solutions mapping (customize with your actual solutions)
STATIC_SOLUTIONS = {
    "Academic": "Please contact your academic advisor to discuss course options and requirements.",
    "Financial": "Visit the financial aid office for assistance with tuition and fees.",
    "Personal": "The counseling center offers confidential support for personal matters.",
    "Technical": "Submit a ticket to the IT helpdesk for technical issues.",
    "Administrative": "Contact the registrar's office for administrative inquiries.",
    # Add all your genres and corresponding solutions here
}

def get_static_solution(genre):
    """Returns static solution based on predicted genre"""
    return STATIC_SOLUTIONS.get(genre, "Please contact student services for assistance.")

In [None]:
# Cell 4: User interaction and satisfaction feedback
def handle_report_submission():
    # Create widgets
    report_text = widgets.Textarea(description="Your Report:", layout={'width': '600px'})
    submit_button = widgets.Button(description="Submit Report")
    output = widgets.Output()

    # Satisfaction widgets (will be shown after prediction)
    satisfaction = widgets.RadioButtons(
        options=['Neutral', 'Satisfied', 'Dissatisfied'],
        description='Satisfaction:',
        disabled=False
    )
    submit_feedback = widgets.Button(description="Submit Feedback")
    feedback_output = widgets.Output()

    def on_submit(_):
        with output:
            clear_output()
            report = report_text.value
            if not report.strip():
                print("Please enter a valid report.")
                return

            # Predict genre
            prediction = predict_genre(report)
            solution = get_static_solution(prediction['prediction'])

            print(f"\nPredicted Genre: {prediction['prediction']} (Confidence: {prediction['confidence']:.1%})")
            print(f"\nRecommended Solution:\n{solution}\n")

            # Show satisfaction question
            display(satisfaction)
            display(submit_feedback)

    def on_feedback_submit(_):
        with feedback_output:
            clear_output()
            user_satisfaction = satisfaction.value

            if user_satisfaction == 'Dissatisfied':
                print("\nWe're sorry to hear that. Finding alternative solutions...")
                # Get predicted genre from the report
                report = report_text.value
                prediction = predict_genre(report)
                genre = prediction['prediction']

                # Filter by genre and find similar reports
                similar_solutions = find_similar_solutions(report, genre)

                if not similar_solutions.empty:
                    print("\nAlternative solutions from similar cases:")
                    for i, (_, row) in enumerate(similar_solutions.iterrows(), 1):
                        print(f"\nOption {i}:")
                        print(f"Original Report: {row['Reports'][:150]}...")
                        print(f"Solution: {row['Resolution']}")
                else:
                    print("No similar cases found. Please contact student services directly.")
            else:
                print("\nThank you for your feedback!")

    submit_button.on_click(on_submit)
    submit_feedback.on_click(on_feedback_submit)

    display(report_text, submit_button, output, feedback_output)

# Start the interaction
handle_report_submission()

Textarea(value='', description='Your Report:', layout=Layout(width='600px'))

Button(description='Submit Report', style=ButtonStyle())

Output()

Output()

In [None]:
# Cell 5: Filter solutions by genre
def filter_by_genre(genre):
    """Filter the dataset by predicted genre"""
    if genre not in data_test['Genre'].unique():
        return pd.DataFrame()  # Return empty if genre not found

    return data_test[data_test['Genre'] == genre].copy()

In [None]:
# Cell 6: Find similar solutions using Cross-Encoder
def find_similar_solutions(report, genre, top_n=1):
    """Find most similar solutions from the same genre using BERT Cross-Encoder"""
    # Load Cross-Encoder model (lazy loading)
    if not hasattr(find_similar_solutions, 'cross_encoder'):
        find_similar_solutions.cross_encoder = CrossEncoder('cross-encoder/stsb-roberta-base')

    # Filter by genre first
    genre_df = filter_by_genre(genre)
    if genre_df.empty:
        return pd.DataFrame()

    # Prepare pairs for similarity calculation
    reports = genre_df['Reports'].tolist()
    pairs = [(report, other_report) for other_report in reports]

    # Calculate similarities
    similarities = find_similar_solutions.cross_encoder.predict(pairs)
    genre_df['similarity'] = similarities

    # Sort and return top N
    top_solutions = genre_df.sort_values('similarity', ascending=False).head(top_n)

    return top_solutions[['Reports', 'Resolution', 'similarity']]

In [None]:
# Cell 7: Example usage of the similarity finder (for testing)
# This is just for demonstration, the actual usage is in the feedback handler
test_report = "Limited access to online resources has do it difficult to find and access the materials needful to complete assignments and study for exams."
test_genre = predict_genre(test_report)['prediction']
similar_solutions = find_similar_solutions(test_report, test_genre)

if not similar_solutions.empty:
    print(f"Top similar solutions for '{test_genre}':")
    for i, (_, row) in enumerate(similar_solutions.iterrows(), 1):
        print(f"\nOption {i} (Similarity: {row['similarity']:.2f}):")
        print(f"Original Report: {row['Reports'][:150]}...")
        print(f"Solution: {row['Resolution']}")
else:
    print("No similar solutions found.")

Top similar solutions for 'Online learning':

Option 1 (Similarity: 1.00):
Original Report: Limited access to online resources has made it difficult to find and access the materials needed to complete assignments and study for exams....
Solution: Expand access to online resources and provide training on how to effectively use them for coursework.


In [None]:
from IPython.display import display, HTML

def display_sample_report(df, sample_size=1):
    # Sample rows from the DataFrame
    sample_df = df.sample(sample_size)

    for idx, row in sample_df.iterrows():
        # Consistent color scheme
        bg_color = "#f8f9fa"  # Light gray background
        border_color = "#3498db"  # Consistent blue border
        card_shadow = "0 4px 12px rgba(52, 152, 219, 0.2)"  # Blue-tinted shadow

        # Handle sentiment values
        def process_sentiment(value):
            if isinstance(value, str):
                value = value.lower().strip()
                if value == "positive":
                    return ("Positive", "#2ecc71", "👍")  # Green
                elif value == "negative":
                    return ("Negative", "#e74c3c", "👎")  # Red
                else:
                    return ("Neutral", "#f39c12", "➖")  # Orange
            elif isinstance(value, (int, float)):
                # Format numeric sentiment to 3 decimal places
                formatted = f"{float(value):.3f}"
                if value > 0:
                    return (formatted, "#2ecc71", "↑")  # Green for positive
                elif value < 0:
                    return (formatted, "#e74c3c", "↓")  # Red for negative
                else:
                    return (formatted, "#f39c12", "→")  # Orange for neutral
            else:
                return (str(value), "#95a5a6", "?")  # Gray for unknown

        res_sentiment, res_color, res_icon = process_sentiment(row.get('Resolution_sentiment'))
        fb_sentiment, fb_color, fb_icon = process_sentiment(row.get('Feedback_sentiment'))

        # Handle satisfaction score (ensure it's between 0-1)
        satisfaction_score = max(0, min(1, float(row.get('satisfaction_score', 0))))
        satisfaction_color = f"hsl({int(satisfaction_score * 120)}, 80%, 45%)"  # Red (0) to Green (120)
        satisfaction_width = f"{satisfaction_score * 100}%"
        satisfaction_text = f"{satisfaction_score:.3f}/1.000"  # Consistent 3 decimal format
        levelofsatification=row.get("satisfaction_level")

        # The HTML content is now properly indented within the f-string
        html_content = f"""
<div style="font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
            max-width: 800px;
            margin: 20px auto;
            padding: 25px;
            border-radius: 12px;
            box-shadow: {card_shadow};
            background: {bg_color};
            border-left: 6px solid {border_color};
            position: relative;
            overflow: hidden;">

    <div style="position: absolute;
                top: 0;
                right: 0;
                background: {border_color};
                color: white;
                padding: 4px 12px;
                font-size: 13px;
                font-weight: bold;
                border-bottom-left-radius: 12px;
                box-shadow: 0 2px 4px rgba(0,0,0,0.1);">Case {idx}</div>

    <h2 style="color: #2c3e50;
               margin-top: 5px;
               margin-bottom: 20px;
               text-align: center;
               font-size: 20px;
               font-weight: 600;">Case Analysis Report</h2>

    <div style="display: flex; margin-bottom: 20px; gap: 15px;">
        <div style="flex: 1;">
            <h3 style="color: #3498db;
                       font-size: 15px;
                       margin-bottom: 8px;
                       display: flex;
                       align-items: center;
                       gap: 6px;">
                <span style="font-size: 18px;">🔧</span> SOLUTION
            </h3>
            <div style="background-color: white;
                       padding: 12px;
                       border-radius: 8px;
                       line-height: 1.6;
                       color: #34495e;
                       font-size: 14px;
                       box-shadow: 0 2px 4px rgba(0,0,0,0.05);">
                {row.get('Resolution', 'N/A')}
            </div>
        </div>

        <div style="width: 130px;">
            <div style="background-color: white;
                       border-radius: 8px;
                       padding: 12px;
                       text-align: center;
                       box-shadow: 0 2px 4px rgba(0,0,0,0.05);
                       height: 100%;
                       display: flex;
                       flex-direction: column;
                       justify-content: center;">
                <div style="font-size: 24px; margin-bottom: 5px; color: {res_color};">
                    {res_icon}
                </div>
                <div style="font-weight: bold; color: {res_color}; font-size: 15px;">
                    {res_sentiment}
                </div>
                <div style="font-size: 11px; color: #7f8c8d; margin-top: 5px;">
                    Solution Sentiment
                </div>
            </div>
        </div>
    </div>

    <div style="display: flex; margin-bottom: 20px; gap: 15px;">
        <div style="flex: 1;">
            <h3 style="color: #3498db;
                       font-size: 15px;
                       margin-bottom: 8px;
                       display: flex;
                       align-items: center;
                       gap: 6px;">
                <span style="font-size: 18px;">💬</span> FEEDBACK
            </h3>
            <div style="background-color: white;
                       padding: 12px;
                       border-radius: 8px;
                       line-height: 1.6;
                       color: #34495e;
                       font-size: 14px;
                       box-shadow: 0 2px 4px rgba(0,0,0,0.05);">
                {row.get('Feedback', 'N/A')}
            </div>
        </div>

        <div style="width: 130px;">
            <div style="background-color: white;
                       border-radius: 8px;
                       padding: 12px;
                       text-align: center;
                       box-shadow: 0 2px 4px rgba(0,0,0,0.05);
                       height: 100%;
                       display: flex;
                       flex-direction: column;
                       justify-content: center;">
                <div style="font-size: 24px; margin-bottom: 5px; color: {fb_color};">
                    {fb_icon}
                </div>
                <div style="font-weight: bold; color: {fb_color}; font-size: 15px;">
                    {fb_sentiment}
                </div>
                <div style="font-size: 11px; color: #7f8c8d; margin-top: 5px;">
                    Feedback Sentiment
                </div>
            </div>
        </div>
    </div>

    <div style="background-color: white;
                border-radius: 8px;
                padding: 15px;
                margin-top: 15px;
                box-shadow: 0 2px 4px rgba(0,0,0,0.05);">
        <h3 style="color: #3498db;
                   font-size: 15px;
                   margin-bottom: 12px;
                   display: flex;
                   align-items: center;
                   gap: 6px;">
            <span style="font-size: 18px;">⭐</span> SATISFACTION METRIC : {levelofsatification}
        </h3>

        <div style="display: flex; align-items: center; gap: 15px;">
            <div style="flex: 1;">
                <div style="background: #ecf0f1;
                            border-radius: 50px;
                            height: 20px;
                            position: relative;">
                    <div style="position: absolute;
                                top: 0;
                                left: 0;
                                height: 100%;
                                width: {satisfaction_width};
                                background: {satisfaction_color};
                                border-radius: 50px;"></div>
                </div>
            </div>
            <div style="font-weight: bold;
                       color: {satisfaction_color};
                       font-size: 14px;
                       min-width: 90px;
                       text-align: right;">
                {satisfaction_text}
            </div>
        </div>
    </div>
</div>
"""
        display(HTML(html_content))


In [None]:
display_sample_report(data_test, sample_size=1)  # Display 3 random cases