In [None]:
!pip install transformers pandas numpy spacy indicnlp googletrans==3.1.0a0
!python -m spacy download xx_ent_wiki_sm
!python -m spacy download en_core_web_sm

In [None]:
!pip install indic-nlp-library

In [7]:
import os
os.environ["INDIC_RESOURCES_PATH"] = "/content/indic_nlp_resources"
import sys
sys.path.append("/content/indic_nlp_library")


In [None]:
!pip install nltk


In [8]:
import pandas as pd
import numpy as np
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
from indicnlp.tokenize import sentence_tokenize
from googletrans import Translator
import spacy
import re

class HindiConversationAnalyzer:
    def __init__(self):
        # Initialize required models and tokenizers
        self.sentiment_model = pipeline(
            "sentiment-analysis",
            model="nlptown/bert-base-multilingual-uncased-sentiment"
        )
        self.translator = Translator()
        self.nlp_hi = spacy.load('xx_ent_wiki_sm')  # For Hindi text processing
        self.nlp_en = spacy.load('en_core_web_sm')  # For English text processing

    def preprocess_text(self, text):
        """Simplified Preprocess Hindi text"""
        cleaned = re.sub(r'[^\w\s।]', '', text)
        cleaned = re.sub(r'\s+', ' ', cleaned)
        return cleaned.strip()

    def parse_conversation(self, conversation_text):
        """Parse conversation into structured format"""
        lines = conversation_text.strip().split('\n')
        dialogue = []

        for line in lines:
            if line.startswith('Recovery Agent (RA):') or line.startswith('RA:'):
                speaker = 'RA'
                text = line.split(':', 1)[1].strip()
                dialogue.append({'speaker': speaker, 'text': text})
            elif line.startswith('Borrower (B):') or line.startswith('B:'):
                speaker = 'B'
                text = line.split(':', 1)[1].strip()
                dialogue.append({'speaker': speaker, 'text': text})

        return pd.DataFrame(dialogue)

    def translate_to_english(self, text):
        """Translate Hindi text to English"""
        try:
            translation = self.translator.translate(text, src='hi', dest='en')
            return translation.text
        except:
            return "Translation failed"

    def extract_action_items(self, df):
        """Extract action items from conversation"""
        action_items = []
        english_texts = df['text'].apply(self.translate_to_english)
        action_keywords = ['will', 'shall', 'must', 'need to', 'going to', 'plan', 'next']

        for text in english_texts:
            doc = self.nlp_en(text)
            for sent in doc.sents:
                if any(keyword in sent.text.lower() for keyword in action_keywords):
                    action_items.append(sent.text)

        return list(set(action_items))[:5]  # Limit to 4-5 items

    def analyze_sentiment(self, df):
        """Analyze sentiment for each utterance"""
        sentiments = []
        sentiment_flow_ra = []
        sentiment_flow_b = []

        for _, row in df.iterrows():
            english_text = self.translate_to_english(row['text'])
            sentiment = self.sentiment_model(english_text)[0]

            # Map sentiment to emotion
            emotion = {
                "1 star": "Very Negative",
                "2 stars": "Negative",
                "3 stars": "Neutral",
                "4 stars": "Positive",
                "5 stars": "Very Positive"
            }.get(sentiment['label'], "Unknown")

            sentiments.append({
                'speaker': row['speaker'],
                'text': row['text'],
                'english_text': english_text,
                'sentiment': sentiment['label'],
                'emotion': emotion,
                'score': sentiment['score']
            })

            # Track sentiment flow for RA and Borrower separately
            if row['speaker'] == 'RA':
                sentiment_flow_ra.append(f"{emotion} ({sentiment['score']:.2f})")
            elif row['speaker'] == 'B':
                sentiment_flow_b.append(f"{emotion} ({sentiment['score']:.2f})")

        return pd.DataFrame(sentiments), sentiment_flow_ra, sentiment_flow_b

    def generate_summary(self, df):
        """Generate a concise summary of the conversation"""
        full_text = ' '.join(df['text'])
        english_text = self.translate_to_english(full_text)
        summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

        summary = summarizer(english_text, max_length=150, min_length=50, do_sample=False)
        return summary[0]['summary_text']

    def analyze_conversation(self, conversation_text):
        """Main function to analyze the conversation"""
        df = self.parse_conversation(conversation_text)
        summary = self.generate_summary(df)
        action_items = self.extract_action_items(df)
        sentiment_analysis, sentiment_flow_ra, sentiment_flow_b = self.analyze_sentiment(df)

        return {
            'summary': summary,
            'action_items': action_items,
            'sentiment_analysis': {
                'detailed_analysis': sentiment_analysis,
                'sentiment_flow_ra': sentiment_flow_ra,
                'sentiment_flow_b': sentiment_flow_b
            }
        }

    def generate_report(self, analysis_results):
        """Generate and print a formatted report"""
        def format_dialogue(dialogue_row):
            return f"""
        ┌──────────────────────────────────────────────┐
        │ Speaker: {dialogue_row['speaker']}                             │
        │ Original: {dialogue_row['text']}            │
        │ English: {dialogue_row['english_text']}         │
        │ Sentiment: {dialogue_row['emotion']} ({dialogue_row['score']:.2f})  │
        └──────────────────────────────────────────────┘
        """

        dialogue_analysis = "\n".join(
            format_dialogue(row) for _, row in analysis_results['sentiment_analysis']['detailed_analysis'].iterrows()
        )

        report = (
    f"""
# Conversation Analysis Report

## Summary
{analysis_results['summary']}

## Key Actions
""" +
    "\n".join([f"- {item}" for item in analysis_results['action_items']]) +
    f"""

## Sentiment Analysis
### Recovery Agent Sentiment Flow
{' -> '.join(analysis_results['sentiment_analysis']['sentiment_flow_ra'])}

### Borrower Sentiment Flow
{' -> '.join(analysis_results['sentiment_analysis']['sentiment_flow_b'])}

### Dialogue Analysis
{dialogue_analysis}
"""
)

        print(report)
        return report

# Example usage
if __name__ == "__main__":
    analyzer = HindiConversationAnalyzer()
    conversation_text = """Recovery Agent (RA): नमस्तेश्री कुमार, मैंएक्स वाई जेड फाइनेंस सेबोल रहा हूं। आपके लोन के बारेमेंबात
करनी थी।
Borrower (B): हां, बोलि ए। क्या बात है?
RA: सर, आपका पि छलेमहीनेका EMI अभी तक नहींआया है। क्या कोई समस्या है?
B: हां, थोड़ी दि क्कत है। मेरी नौकरी चली गई हैऔर मैंनया काम ढूंढ रहा हूं।
RA: ओह, यह तो बरुा हुआ। लेकि न सर, आपको समझना होगा कि लोन का भगु तान समय पर करना बहुत जरूरी
है।
B: मैंसमझता हूं, लेकि न अभी मेरेपास पसै ेनहीं हैं। क्या कुछ समय मि ल सकता है?
RA: हम समझतेहैंआपकी स्थि ति । क्या आप अगलेहफ्तेतक कुछ भगु तान कर सकतेहैं?
B: मैंकोशि श करूंगा, लेकि न परूा EMI नहींदेपाऊंगा। क्या आधा भगु तान चलेगा?
RA: ठीक है, आधा भगु तान अगलेहफ्तेतक कर दीजि ए। बाकी का क्या प्लान हैआपका?
B: मझु ेउम्मीद हैकि अगलेमहीनेतक मझु ेनया काम मि ल जाएगा। तब मैंबाकी बकाया चकु ा दंगू ा।
RA: ठीक है। तो हम ऐसा करतेहैं- आप अगलेहफ्तेतक आधा EMI जमा कर दीजि ए, और अगलेमहीनेके 15
तारीख तक बाकी का भगु तान कर दीजि ए। क्या यह आपको स्वीकार है?
B: हां, यह ठीक रहेगा। मैंइस प्लान का पालन करनेकी परूी कोशि श करूंगा।

RA: बहुत अच्छा। मैंआपको एक SMS भेज रहा हूं जि समेंभगु तान की डि टेल्स होंगी। कृपया इसका पालन करें
और समय पर भगु तान करें।
B: ठीक है, धन्यवाद आपके समझनेके लि ए।
RA: आपका स्वागत है। अगर कोई और सवाल हो तो मझु ेबताइएगा। अलवि दा।
B: अलवि दा।"""
    results = analyzer.analyze_conversation(conversation_text)
    report = analyzer.generate_report(results)
    with open('conversation_analysis_report.md', 'w', encoding='utf-8') as f:
        f.write(report)


Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.



# Conversation Analysis Report

## Summary
Finance company calls to ask if Kumar can pay back his loan on time. Kumar tells them he has lost his job and is looking for a new job. He says he will try to pay half of the payment by next week. The rest will be paid by the 15th of the next month.

## Key Actions
- So we do this - you deposit half the EMI by next week, and by 15th of the next month
- I will try, but will not be able to pay the full EMI.
- I will try my best to follow this plan.
- Okay, please do half the work till next week.
- Will half the Bhagu Tan do?

## Sentiment Analysis
### Recovery Agent Sentiment Flow
Very Negative (0.51) -> Very Negative (0.40) -> Very Negative (0.55) -> Positive (0.41) -> Neutral (0.72) -> Neutral (0.70) -> Very Positive (0.51) -> Very Positive (0.72)

### Borrower Sentiment Flow
Very Negative (0.35) -> Neutral (0.55) -> Neutral (0.36) -> Negative (0.42) -> Neutral (0.34) -> Positive (0.39) -> Neutral (0.72) -> Very Positive (0.57)

### Dialogue 