In [1]:
!pip install pandas numpy ibm-watson huggingface_hub transformers flask pyngrok flask-cors torch requests

Collecting ibm-watson
  Downloading ibm_watson-9.0.0.tar.gz (342 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m342.8/342.8 kB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting pyngrok
  Downloading pyngrok-7.2.5-py3-none-any.whl.metadata (8.9 kB)
Collecting flask-cors
  Downloading flask_cors-5.0.1-py3-none-any.whl.metadata (961 bytes)
Collecting ibm_cloud_sdk_core==3.*,>=3.3.6 (from ibm-watson)
  Downloading ibm_cloud_sdk_core-3.23.0-py3-none-any.whl.metadata (8.7 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==

In [2]:
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device: {torch.cuda.get_device_name(0)}")
    print(f"CUDA version: {torch.version.cuda}")

CUDA available: True
CUDA device: Tesla T4
CUDA version: 12.4


In [None]:
import pandas as pd
import numpy as np
import os
import json
import datetime
from flask import Flask, request, jsonify
from flask_cors import CORS
from pyngrok import ngrok
import requests
import warnings
warnings.filterwarnings('ignore')

# Import Hugging Face Hub library to access models
from huggingface_hub import InferenceClient
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSequenceClassification, pipeline
import torch
from torch.nn.functional import softmax

# Global variables to store sentiment models
sentiment_tokenizer = None
sentiment_model = None
nlp_pipeline = None

# Function to detect sentiment using local Hugging Face model
def analyze_sentiment(hf_client, text, sentiment_model_id="tabularisai/multilingual-sentiment-analysis"):
    """
    Analyzes sentiment from text using local sentiment model with AutoModelForSequenceClassification
    """
    global sentiment_tokenizer, sentiment_model
    
    if not text or not isinstance(text, str) or len(text.strip()) < 5:
        return {'score': 0, 'label': 'neutral'}
    
    try:
        # Initialize sentiment models if not already done
        if sentiment_tokenizer is None or sentiment_model is None:
            print(f"Loading sentiment analysis model: {sentiment_model_id}")
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
            sentiment_tokenizer = AutoTokenizer.from_pretrained(sentiment_model_id)
            sentiment_model = AutoModelForSequenceClassification.from_pretrained(sentiment_model_id).to(device)
            print(f"Sentiment model loaded on {device}")
        
        # Get the device where the model is loaded
        device = next(sentiment_model.parameters()).device
        
        # Tokenize and get prediction
        encoded_input = sentiment_tokenizer(text, return_tensors='pt', truncation=True, max_length=512).to(device)
        output = sentiment_model(**encoded_input)
        scores = softmax(output.logits, dim=1).detach().cpu().numpy()[0]
        
        # Get prediction
        predicted_class = torch.argmax(output.logits, dim=1).item()
        id2label = sentiment_model.config.id2label
        label = id2label[predicted_class].lower()
        score = float(scores[predicted_class])
        
        # Adjust score format to match the original function
        sentiment = {'label': label}
        if label == 'positive':
            sentiment['score'] = score
        elif label == 'negative':
            sentiment['score'] = -score
        else:
            sentiment['score'] = 0
            
        return sentiment
        
    except Exception as e:
        print(f"Error analyzing sentiment with local model: {e}")
        # Fallback to API call if local model fails
        try:
            print("Falling back to API client for sentiment analysis")
            result = hf_client.text_classification(text, model=sentiment_model_id)
            
            # Adjust output format
            sentiment = {'label': result[0]['label'].lower()}
            if sentiment['label'] == 'positive':
                sentiment['score'] = result[0]['score']
            elif sentiment['label'] == 'negative':
                sentiment['score'] = -result[0]['score']
            else:
                sentiment['score'] = 0
                
            return sentiment
        except Exception as e:
            print(f"Error in fallback sentiment analysis: {e}")
            return {'score': 0, 'label': 'neutral'}

# Function to load Hugging Face client
def load_hf_client(hf_token):
    """
    Initialize Hugging Face client and pipeline for IBM Granite
    """
    global nlp_pipeline
    
    print("Initializing Hugging Face client for IBM Granite...")
    
    # Initialize inference client for API calls
    client = InferenceClient(token=hf_token)
    
    # Model ID for IBM Granite
    model_id = "ibm-granite/granite-3.3-2b-instruct"
    
    # Check if GPU is available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")
    
    # Initialize tokenizer and model using AutoTokenizer and AutoModelForCausalLM
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(model_id).to(device)
    
    # Initialize pipeline for text generation
    nlp_pipeline = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        device=0 if torch.cuda.is_available() else -1
    )
    
    print("Hugging Face client for IBM Granite initialized!")
    return client, model_id, tokenizer, model

# Function to prepare performance data for prompt
def prepare_performance_data(employee_data, comparative_data=None):
    """
    Format the employee performance data for the prompt
    """
    # Format performance data 
    performance_text = f"""
    Employee Name: {employee_data['Employee Name']}
    Employee ID: {employee_data['Employee ID']}
    
    WEEKLY KPIs:
    - Productivity: {employee_data['Productivity: Number of tasks completed']} tasks completed, {employee_data['Productivity: Time to complete tasks (hours/task)']:.2f} hours/task
    - Work Quality: Error rate {employee_data['Quality of Work: Error rate (%)']:.2f}%, Customer satisfaction {employee_data['Quality of Work: Customer satisfaction rate (%)']:.2f}%
    - Attendance & Punctuality: Attendance {employee_data['Presence and Punctuality: Attendance rate (%)']:.2f}%, Punctuality {employee_data['Presence and Punctuality: Punctuality rate (%)']:.2f}%
    - Goals & Objectives: Individual achievement {employee_data['Goals and Objectives: Individual goal achievement (%)']:.2f}%, Team achievement {employee_data['Goals and Objectives: Team goal achievement (%)']:.2f}%, Contribution {employee_data['Goals and Objectives: Contribution to company vision (1-5)']:.2f}/5
    - Collaboration & Teamwork: Communication {employee_data['Collaboration and Teamwork: Communication skills (1-5)']:.2f}/5, Teamwork {employee_data['Collaboration and Teamwork: Ability to work in a team (1-5)']:.2f}/5
    """
    
    # Add comparative data if available
    if comparative_data is not None:
        performance_text += f"""
        COMPARISON WITH PREVIOUS WEEK:
        - Change in number of tasks: {employee_data['Productivity: Number of tasks completed'] - comparative_data['Productivity: Number of tasks completed']}
        - Change in time per task: {employee_data['Productivity: Time to complete tasks (hours/task)'] - comparative_data['Productivity: Time to complete tasks (hours/task)']:.2f} hours
        - Change in error rate: {employee_data['Quality of Work: Error rate (%)'] - comparative_data['Quality of Work: Error rate (%)']:.2f}%
        - Change in customer satisfaction: {employee_data['Quality of Work: Customer satisfaction rate (%)'] - comparative_data['Quality of Work: Customer satisfaction rate (%)']:.2f}%
        """
    
    # Add survey data if available
    if 'survey_data' in employee_data:
        survey = employee_data['survey_data']
        performance_text += f"""
        MONTHLY SURVEY:
        - Self-Performance: {survey.get('Self-Performance', 'No data')}
        - Goals Achieved: {survey.get('Goals Achieved', 'No data')}
        - Personal Challenges: {survey.get('Personal Challenges', 'No data')}
        - Stress/Anxiety: {survey.get('Stress or Anxiety', 'No data')}
        - Relationship with Colleagues: {survey.get('Relationship with Colleagues', 'No data')}
        - Communication Issues: {survey.get('Communication Issues', 'No data')}
        - Team Conflicts: {survey.get('Team Conflicts', 'No data')}
        - Team Collaboration: {survey.get('Team Collaboration', 'No data')}
        """
    
    return performance_text

# Function to create prompt for model
def create_summary_prompt(performance_text):
    """
    Create the prompt for the model using performance data
    """
    prompt = f"""
    You are an HR assistant expert in analyzing employee performance.
    
    Task:
    Analyze the following employee performance data and provide a summary that assesses:
    1. Whether performance is good or poor (compare with threshold: min. 15 tasks, max. 3 hours/task, max. 5% error, min. 80% satisfaction)
    2. How it compares to the previous week (up/down)
    3. Which areas need improvement
    4. Whether this employee needs a personal psychologist or conflict resolution (based on survey)
    
    EMPLOYEE DATA:
    {performance_text}
    
    Output format:
    Performance Summary: [good/poor and explanation]
    Comparison: [summary comparison with previous week]
    Improvement Areas: [1-3 main areas that need improvement]
    Recommendation: [psychologist/conflict resolution/not needed] and reason
    """
    return prompt

# Function to extract summary from model response
def extract_summary(response):
    """
    Extract and format the summary from the model response
    """
    # Remove any prompt text that might have been included in the response
    if "You are an HR assistant" in response:
        response = response.split("You are an HR assistant", 1)[0]
    
    if "EMPLOYEE DATA:" in response:
        response = response.split("EMPLOYEE DATA:", 1)[0]
    
    if "Output format:" in response:
        response = response.split("Output format:", 1)[1]
    
    # Clean up and format the summary
    summary = response.strip()
    
    # Ensure the response includes the required sections
    sections = ["Performance Summary:", "Comparison:", "Improvement Areas:", "Recommendation:"]
    
    formatted_summary = {}
    for section in sections:
        if section in summary:
            section_index = summary.find(section)
            next_section_index = float('inf')
            
            for next_section in sections:
                if next_section != section and next_section in summary and summary.find(next_section) > section_index:
                    next_section_index = min(next_section_index, summary.find(next_section))
            
            if next_section_index == float('inf'):
                section_content = summary[section_index + len(section):].strip()
            else:
                section_content = summary[section_index + len(section):next_section_index].strip()
            
            formatted_summary[section.replace(":", "")] = section_content
        else:
            formatted_summary[section.replace(":", "")] = "No information available"
    
    return formatted_summary

# Function to summarize employee performance using pipeline approach
def summarize_employee_performance(hf_client, model_id, employee_data, comparative_data=None, tokenizer=None, model=None):
    """
    Summarize employee performance report using pipeline approach with fallback strategy
    """
    global nlp_pipeline
    
    # PIPELINE APPROACH:
    # 1. Prepare the performance data
    performance_text = prepare_performance_data(employee_data, comparative_data)
    
    # 2. Create the prompt for the model
    prompt = create_summary_prompt(performance_text)
    
    # 3. Generate summary using model with fallback options
    # List of models to try, from most preferred to fallback
    models_to_try = [
        model_id,  # IBM Granite (original)
        "google/flan-t5-large",  # Fallback option 1
        "google/flan-t5-base",   # Fallback option 2
        "facebook/bart-large-cnn"  # Fallback option 3
    ]
    
    # Parameter options for various models
    model_params = {
        model_id: {
            "max_new_tokens": 250,
            "temperature": 0.2,
            "top_p": 0.95,
            "do_sample": True
        },
        "google/flan-t5-large": {
            "max_new_tokens": 250,
            "temperature": 0.3
        },
        "google/flan-t5-base": {
            "max_new_tokens": 250,
            "temperature": 0.3
        },
        "facebook/bart-large-cnn": {
            "max_new_tokens": 250,
            "temperature": 0.3
        }
    }
    
    # Try models one by one until successful
    last_error = None
    for model_choice in models_to_try:
        try:
            print(f"Trying to use model: {model_choice}")
            params = model_params.get(model_choice, {"max_new_tokens": 250, "temperature": 0.3})
            
            # If using the primary model and we have pipeline initialized
            if model_choice == model_id and nlp_pipeline is not None:
                # Use pipeline approach
                result = nlp_pipeline(
                    prompt,
                    max_new_tokens=params["max_new_tokens"],
                    temperature=params["temperature"],
                    top_p=params.get("top_p", 0.95),
                    do_sample=params.get("do_sample", True)
                )
                response = result[0]['generated_text']
                
                # Extract just the generated part (remove the prompt)
                if prompt in response:
                    response = response.replace(prompt, "").strip()
            
            # If using the primary model and we have tokenizer/model loaded locally
            elif model_choice == model_id and tokenizer is not None and model is not None:
                # Use local AutoTokenizer and AutoModelForCausalLM with GPU support
                device = next(model.parameters()).device  # Get the device the model is on
                inputs = tokenizer(prompt, return_tensors="pt").to(device)
                outputs = model.generate(
                    inputs.input_ids,
                    max_new_tokens=params["max_new_tokens"],
                    temperature=params["temperature"],
                    top_p=params.get("top_p", 0.95),
                    do_sample=params.get("do_sample", True)
                )
                response = tokenizer.decode(outputs[0], skip_special_tokens=True)
                
                # Extract just the generated part (remove the prompt)
                if prompt in response:
                    response = response.replace(prompt, "").strip()
            else:
                # Use API client as fallback
                response = hf_client.text_generation(
                    prompt=prompt,
                    model=model_choice,
                    **params
                )
            
            print(f"Successfully generated summary using {model_choice}")
            
            # 4. Extract and format the summary
            formatted_summary = extract_summary(response)
            
            # Return the formatted summary
            return formatted_summary
            
        except Exception as e:
            last_error = e
            print(f"Error with model {model_choice}: {e}")
            continue
    
    # If all models fail, create a simple rule-based summary
    print("All models failed. Generating rule-based summary.")
    
    # Analyze metrics based on thresholds
    performance_rating = "good" if len(employee_data.get('bad_metrics', [])) == 0 else "poor"
    
    # Compare with previous week if available
    comparison = "No comparison data available." 
    if comparative_data is not None:
        task_diff = employee_data['Productivity: Number of tasks completed'] - comparative_data['Productivity: Number of tasks completed']
        error_diff = employee_data['Quality of Work: Error rate (%)'] - comparative_data['Quality of Work: Error rate (%)']
        
        if task_diff > 0 and error_diff < 0:
            comparison = f"Performance improved (tasks +{task_diff}, error {error_diff:.2f}%)."
        elif task_diff < 0 or error_diff > 0:
            comparison = f"Performance declined (tasks {task_diff}, error {error_diff:.2f}%)."
        else:
            comparison = "Performance relatively stable compared to previous week."
    
    # Improvement areas
    areas = employee_data.get('bad_metrics', [])
    improvement = "No areas requiring urgent improvement." if not areas else f"Needs improvement in: {', '.join(areas)}."
    
    # Recommendation
    recommendation = "Not needed"
    if employee_data.get('need_psychologist', False):
        recommendation = "Psychologist - signs of stress/anxiety detected"
    elif employee_data.get('need_conflict_resolution', False):
        recommendation = "Conflict resolution - signs of team conflict detected"
    
    # Create manual summary in formatted structure
    rule_based_summary = {
        "Performance Summary": performance_rating,
        "Comparison": comparison,
        "Improvement Areas": improvement,
        "Recommendation": recommendation
    }
    
    return rule_based_summary

# Function to process data and generate summary
def process_employee_data(kpi_week1_df, kpi_week2_df, survey_df, hf_client, model_id, tokenizer=None, model=None, employee_id=None):
    """
    Process employee data and generate performance summaries
    """
    # Thresholds to determine if performance is good or poor
    thresholds = {
        'tasks_completed': 15,  # Minimum tasks to be completed
        'time_per_task': 3,     # Maximum time per task (hours)
        'error_rate': 5,        # Maximum error rate (%)
        'customer_satisfaction': 80,  # Minimum customer satisfaction (%)
    }
    
    all_summaries = {}
    
    # If employee_id is provided, only process that employee
    if employee_id:
        employee_ids = [employee_id]
    else:
        employee_ids = kpi_week2_df['Employee ID'].unique()
    
    # Iterate through each employee
    for emp_id in employee_ids:
        try:
            # Get employee data from all sources
            if emp_id not in kpi_week2_df['Employee ID'].values:
                continue
                
            emp_week2 = kpi_week2_df[kpi_week2_df['Employee ID'] == emp_id].iloc[0].to_dict()
            emp_week1 = kpi_week1_df[kpi_week1_df['Employee ID'] == emp_id].iloc[0].to_dict() if emp_id in kpi_week1_df['Employee ID'].values else None
            emp_survey = survey_df[survey_df['Employee ID'] == emp_id].iloc[0].to_dict() if emp_id in survey_df['Employee ID'].values else None

            # Merge survey data to week 2 KPI data if available
            if emp_survey is not None:
                emp_week2['survey_data'] = emp_survey
            
            # Evaluate employee performance
            bad_metrics = []
            if emp_week2['Productivity: Number of tasks completed'] < thresholds['tasks_completed']:
                bad_metrics.append('number of tasks')
            if emp_week2['Productivity: Time to complete tasks (hours/task)'] > thresholds['time_per_task']:
                bad_metrics.append('time per task')
            if emp_week2['Quality of Work: Error rate (%)'] > thresholds['error_rate']:
                bad_metrics.append('error rate')
            if emp_week2['Quality of Work: Customer satisfaction rate (%)'] < thresholds['customer_satisfaction']:
                bad_metrics.append('customer satisfaction')
            
            # Detect issues from survey
            need_psychologist = False
            need_conflict_resolution = False
            
            if emp_survey is not None:
                # Sentiment analysis for personal and team issues
                stress_text = str(emp_survey.get('Stress or Anxiety', ''))
                conflict_text = str(emp_survey.get('Team Conflicts', ''))
                
                if len(stress_text) > 5:
                    stress_analysis = analyze_sentiment(hf_client, stress_text)
                    need_psychologist = stress_analysis['label'] == 'negative' and stress_analysis['score'] < -0.3
                
                if len(conflict_text) > 5:
                    conflict_analysis = analyze_sentiment(hf_client, conflict_text)
                    need_conflict_resolution = conflict_analysis['label'] == 'negative' and conflict_analysis['score'] < -0.3
            
            # Add evaluation results to employee data
            emp_week2['bad_metrics'] = bad_metrics
            emp_week2['need_psychologist'] = need_psychologist
            emp_week2['need_conflict_resolution'] = need_conflict_resolution
            
            # Generate summary with IBM Granite via Hugging Face
            summary_data = summarize_employee_performance(hf_client, model_id, emp_week2, emp_week1, tokenizer, model)
            
            # Create a combined summary string from the structured data
            combined_summary = "\n".join([f"{key}: {value}" for key, value in summary_data.items()])
            
            # Store summary
            all_summaries[emp_id] = {
                'employee_name': emp_week2['Employee Name'],
                'employee_id': emp_id,
                'summary': combined_summary,
                'summary_data': summary_data,  # Add the structured summary data as a new column
                'need_psychologist': need_psychologist,
                'need_conflict_resolution': need_conflict_resolution,
                'bad_metrics': bad_metrics
            }
            
        except Exception as e:
            print(f"Error processing employee {emp_id}: {e}")
            continue
    
    return all_summaries

# Initialize Flask app
app = Flask(__name__)
CORS(app)

# Global variables to store data and model
kpi_week1_df = None
kpi_week2_df = None
survey_df = None
hf_client = None
model_id = None
tokenizer = None
model = None
sentiment_tokenizer = None
sentiment_model = None

# Route for file upload
@app.route('/upload', methods=['POST'])
def upload_files():
    global kpi_week1_df, kpi_week2_df, survey_df
    
    try:
        # Check if files are received
        if 'kpi_week1' not in request.files or 'kpi_week2' not in request.files or 'survey' not in request.files:
            return jsonify({'error': 'Missing required files'}), 400
            
        # Save files to temporary directory
        kpi_week1_file = request.files['kpi_week1']
        kpi_week2_file = request.files['kpi_week2']
        survey_file = request.files['survey']
        
        # Read CSV files
        kpi_week1_df = pd.read_csv(kpi_week1_file)
        kpi_week2_df = pd.read_csv(kpi_week2_file)
        survey_df = pd.read_csv(survey_file)
        
        return jsonify({
            'message': 'Files uploaded successfully',
            'employee_count': len(kpi_week2_df['Employee ID'].unique()),
            'kpi_week1_shape': kpi_week1_df.shape,
            'kpi_week2_shape': kpi_week2_df.shape,
            'survey_shape': survey_df.shape
        })
        
    except Exception as e:
        return jsonify({'error': str(e)}), 500

# Route for model initialization
@app.route('/init_model', methods=['POST'])
def init_model():
    global hf_client, model_id, tokenizer, model, sentiment_tokenizer, sentiment_model, nlp_pipeline
    
    try:
        data = request.json
        hf_token = 'your_token_here'  # Replace with your actual Hugging Face token
        
        # Load Hugging Face client and model
        hf_client, model_id, tokenizer, model = load_hf_client(hf_token)
        
        # Initialize sentiment model
        sentiment_model_id = "tabularisai/multilingual-sentiment-analysis"
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        
        print(f"Loading sentiment analysis model: {sentiment_model_id}")
        sentiment_tokenizer = AutoTokenizer.from_pretrained(sentiment_model_id)
        sentiment_model = AutoModelForSequenceClassification.from_pretrained(sentiment_model_id).to(device)
        print(f"Sentiment model loaded on {device}")
        
        return jsonify({'message': 'Hugging Face client and models initialized successfully'})
        
    except Exception as e:
        return jsonify({'error': str(e)}), 500

# Route for processing data and generating summaries
@app.route('/process', methods=['POST'])
def process_data():
    global kpi_week1_df, kpi_week2_df, survey_df, hf_client, model_id, tokenizer, model
    
    try:
        # Check if data and model are initialized
        if kpi_week1_df is None or kpi_week2_df is None or survey_df is None:
            return jsonify({'error': 'Data not uploaded yet'}), 400
            
        if hf_client is None:
            return jsonify({'error': 'Hugging Face client not initialized yet'}), 400
        
        # Get employee_id from request if present
        data = request.json
        employee_id = data.get('employee_id', None)
        
        # Process data and generate summaries
        summaries = process_employee_data(kpi_week1_df, kpi_week2_df, survey_df, hf_client, model_id, tokenizer, model, employee_id)
        
        return jsonify({
            'message': 'Processing completed successfully',
            'summaries': summaries
        })
        
    except Exception as e:
        return jsonify({'error': str(e)}), 500

# Route for checking status
@app.route('/status', methods=['GET'])
def check_status():
    status = {
        'data_loaded': {
            'kpi_week1': kpi_week1_df is not None,
            'kpi_week2': kpi_week2_df is not None,
            'survey': survey_df is not None
        },
        'models_loaded': {
            'hf_client': hf_client is not None,
            'tokenizer': tokenizer is not None,
            'model': model is not None,
            'nlp_pipeline': nlp_pipeline is not None,
            'sentiment_tokenizer': sentiment_tokenizer is not None,
            'sentiment_model': sentiment_model is not None
        },
        'device': 'cuda' if torch.cuda.is_available() else 'cpu',
        'gpu_info': torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'N/A'
    }
    
    return jsonify(status)

# Initialize ngrok and run app
def run_app():
    # Configure ngrok
    ngrok_auth_token = 'your_ngrok_auth_token_here'  # Replace with your actual ngrok auth token
    if not ngrok_auth_token:
        raise ValueError("Please set your ngrok auth token.")
    ngrok.set_auth_token(ngrok_auth_token)
    
    # Open ngrok tunnel
    public_url = ngrok.connect(5000).public_url
    print(f"Server running at: {public_url}")
    
    # Run app
    app.run(host='0.0.0.0', port=5000)

# Main
if __name__ == '__main__':
    run_app()

2025-05-03 08:47:00.843901: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746262021.052538      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746262021.113330      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Server running at: https://c411-34-91-121-186.ngrok-free.app                                        
 * Serving Flask app '__main__'
 * Debug mode: off
Initializing Hugging Face client for IBM Granite...
Using device: cuda


tokenizer_config.json:   0%|          | 0.00/9.93k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/777k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/442k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/3.48M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/207 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/801 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/787 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/29.8k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/67.1M [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

Device set to use cuda:0


Hugging Face client for IBM Granite initialized!
Loading sentiment analysis model: tabularisai/multilingual-sentiment-analysis


tokenizer_config.json:   0%|          | 0.00/1.20k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/996k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.92M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/902 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/541M [00:00<?, ?B/s]

Sentiment model loaded on cuda
Trying to use model: ibm-granite/granite-3.3-2b-instruct
Successfully generated summary using ibm-granite/granite-3.3-2b-instruct
Trying to use model: ibm-granite/granite-3.3-2b-instruct
Successfully generated summary using ibm-granite/granite-3.3-2b-instruct
Trying to use model: ibm-granite/granite-3.3-2b-instruct
Successfully generated summary using ibm-granite/granite-3.3-2b-instruct
Trying to use model: ibm-granite/granite-3.3-2b-instruct
Successfully generated summary using ibm-granite/granite-3.3-2b-instruct
Trying to use model: ibm-granite/granite-3.3-2b-instruct
Successfully generated summary using ibm-granite/granite-3.3-2b-instruct
Trying to use model: ibm-granite/granite-3.3-2b-instruct
Successfully generated summary using ibm-granite/granite-3.3-2b-instruct
Trying to use model: ibm-granite/granite-3.3-2b-instruct
Successfully generated summary using ibm-granite/granite-3.3-2b-instruct
Trying to use model: ibm-granite/granite-3.3-2b-instruct
Su