In [None]:
!pip install PyMuPDF pandas openai gradio plotly transformers torch -q


In [None]:
import fitz  # PyMuPDF
import pandas as pd
import re
from datetime import datetime
import os
import gradio as gr
import json
from collections import defaultdict
import warnings
warnings.filterwarnings('ignore')

# Clear proxy settings
os.environ.pop('HTTP_PROXY', None)
os.environ.pop('HTTPS_PROXY', None)
os.environ.pop('http_proxy', None)
os.environ.pop('https_proxy', None)

# Import OpenAI
from openai import OpenAI

# Import Hugging Face
from transformers import pipeline


In [None]:
def extract_text_from_pdf(pdf_path):
    """Extract text from PDF using PyMuPDF"""
    try:
        doc = fitz.open(pdf_path)
        text = ""
        for page in doc:
            text += page.get_text()
        doc.close()
        return text
    except Exception as e:
        print(f"Error reading PDF: {str(e)}")
        return None

def parse_paytm_statement(text):
    """Parse Paytm UPI statement and extract transaction details"""
    transactions = []

    lines = text.split('\n')
    date_pattern = r'(\d{1,2}\s+(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec))'
    time_pattern = r'(\d{1,2}:\d{2}\s+(?:AM|PM))'

    i = 0
    while i < len(lines):
        line = lines[i].strip()

        if re.search(date_pattern, line):
            try:
                date_match = re.search(date_pattern, line)
                time_match = re.search(time_pattern, lines[i+1] if i+1 < len(lines) else "")

                if date_match and time_match:
                    date = date_match.group(1)
                    time = time_match.group(1)

                    transaction_detail = ""
                    amount = 0
                    transaction_type = ""
                    category = ""

                    for j in range(i, min(i+15, len(lines))):
                        current_line = lines[j].strip()

                        if 'Paid to' in current_line:
                            transaction_detail = current_line.replace('Paid to', '').strip()
                            transaction_type = 'Debit'
                        elif 'Received from' in current_line:
                            transaction_detail = current_line.replace('Received from', '').strip()
                            transaction_type = 'Credit'
                        elif 'Money sent to' in current_line:
                            transaction_detail = current_line.replace('Money sent to', '').strip()
                            transaction_type = 'Debit'

                        amount_match = re.search(r'[+-]\s*Rs\.?([\d,]+(?:\.\d{2})?)', current_line)
                        if amount_match:
                            amount = float(amount_match.group(1).replace(',', ''))
                            if '-' in current_line:
                                transaction_type = 'Debit'
                            elif '+' in current_line:
                                transaction_type = 'Credit'

                        if 'Tag:' in current_line and '#' in current_line:
                            category = current_line.split('#')[1].strip() if '#' in current_line else ""

                    if transaction_detail and amount > 0:
                        transactions.append({
                            'Date': date,
                            'Time': time,
                            'Description': transaction_detail,
                            'Amount': amount,
                            'Type': transaction_type,
                            'Category': category
                        })
            except Exception as e:
                pass

        i += 1

    return transactions

In [None]:
def clean_and_structure_data(transactions):
    """Clean and structure transaction data"""
    df = pd.DataFrame(transactions)

    if df.empty:
        return df

    df = df.drop_duplicates()
    df['Category'] = df['Category'].str.strip()
    df['Category'] = df['Category'].replace('', 'Uncategorized')
    df['Month'] = df['Date'].apply(lambda x: x.split()[1] if isinstance(x, str) else '')
    df = df.sort_values('Date').reset_index(drop=True)

    return df

In [None]:
def analyze_spending(df):
    """Perform comprehensive financial analysis"""
    if df.empty:
        return {}

    analysis = {}

    analysis['total_income'] = float(df[df['Type'] == 'Credit']['Amount'].sum())
    analysis['total_expenses'] = float(df[df['Type'] == 'Debit']['Amount'].sum())
    analysis['net_savings'] = analysis['total_income'] - analysis['total_expenses']

    category_spending = df[df['Type'] == 'Debit'].groupby('Category')['Amount'].sum().sort_values(ascending=False).to_dict()
    analysis['category_spending'] = category_spending

    top_expenses = df[df['Type'] == 'Debit'].nlargest(5, 'Amount')[['Description', 'Amount', 'Category']].to_dict('records')
    analysis['top_expenses'] = top_expenses

    analysis['total_transactions'] = len(df)
    analysis['credit_count'] = len(df[df['Type'] == 'Credit'])
    analysis['debit_count'] = len(df[df['Type'] == 'Debit'])
    analysis['avg_expense'] = float(df[df['Type'] == 'Debit']['Amount'].mean()) if len(df[df['Type'] == 'Debit']) > 0 else 0
    analysis['avg_income'] = float(df[df['Type'] == 'Credit']['Amount'].mean()) if len(df[df['Type'] == 'Credit']) > 0 else 0

    return analysis


In [None]:
#openAi Apikey('sk-'):
def generate_openai_recommendations(analysis_data, df, api_key):
    """Generate recommendations using OpenAI GPT"""

    try:
        api_key = api_key.strip()

        if not api_key.startswith('sk-'):
            raise ValueError("Invalid API key format")

        client = OpenAI(
            api_key=api_key,
            timeout=30.0,
            max_retries=2
        )

        summary = f"""
        Financial Summary:
        - Total Income: ‚Çπ{analysis_data.get('total_income', 0):.2f}
        - Total Expenses: ‚Çπ{analysis_data.get('total_expenses', 0):.2f}
        - Net Savings: ‚Çπ{analysis_data.get('net_savings', 0):.2f}
        - Total Transactions: {analysis_data.get('total_transactions', 0)}

        Category-wise Spending:
        {json.dumps(analysis_data.get('category_spending', {}), indent=2)}

        Top 5 Expenses:
        {json.dumps(analysis_data.get('top_expenses', []), indent=2)}
        """

        prompt = f"""
        You are a personal finance advisor. Based on the following UPI transaction data, provide:

        1. Spending Pattern Analysis (3-4 sentences)
        2. Key Insights (4-5 bullet points)
        3. Personalized Recommendations (5-6 actionable suggestions)
        4. Budget Allocation Advice (50-30-20 rule)
        5. Wasteful Spending Detection

        Transaction Data:
        {summary}

        Provide a comprehensive financial advisory report in markdown format.
        """

        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are an expert personal finance advisor specializing in budget planning, savings optimization, and financial wellness."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.7,
            max_tokens=1500
        )

        ai_response = response.choices[0].message.content

        return f"""
# ü§ñ OpenAI GPT-3.5 Analysis

{ai_response}

---
*Generated using OpenAI GPT-3.5-turbo*
"""

    except Exception as e:
        raise Exception(f"OpenAI Error: {str(e)}")


In [None]:
# Global variable to store the model
_hf_model = None

def load_huggingface_model():
    """Load Hugging Face model (lazy loading)"""
    global _hf_model
    if _hf_model is None:
        print("üîÑ Loading Hugging Face model (FLAN-T5)...")
        try:
            _hf_model = pipeline(
                "text2text-generation",
                model="google/flan-t5-base",
                max_length=512,
                device=-1  # CPU
            )
            print("‚úÖ Model loaded successfully!")
        except Exception as e:
            print(f"‚ùå Error loading model: {str(e)}")
            _hf_model = None
    return _hf_model

def generate_huggingface_recommendations(analysis_data, df):
    """Generate recommendations using Hugging Face FLAN-T5"""

    try:
        model = load_huggingface_model()

        if model is None:
            raise Exception("Failed to load Hugging Face model")

        savings_rate = (analysis_data['net_savings'] / analysis_data['total_income'] * 100) if analysis_data['total_income'] > 0 else 0

        # Create concise prompt for FLAN-T5
        prompt = f"""Analyze this financial data and provide 3 key recommendations:
Income: ‚Çπ{analysis_data['total_income']:.0f}
Expenses: ‚Çπ{analysis_data['total_expenses']:.0f}
Savings: ‚Çπ{analysis_data['net_savings']:.0f} ({savings_rate:.1f}%)
Top spending: {', '.join([f"{k}: ‚Çπ{v:.0f}" for k, v in list(analysis_data['category_spending'].items())[:3]])}

Provide financial advice:"""

        print("ü§ñ Generating Hugging Face recommendations...")
        response = model(prompt, max_length=300, num_return_sequences=1)
        ai_text = response[0]['generated_text']

        # Format the response
        result = f"""
# ü§ó Hugging Face FLAN-T5 Analysis

## üí∞ Financial Overview
- **Total Income:** ‚Çπ{analysis_data['total_income']:.2f}
- **Total Expenses:** ‚Çπ{analysis_data['total_expenses']:.2f}
- **Net Savings:** ‚Çπ{analysis_data['net_savings']:.2f}
- **Savings Rate:** {savings_rate:.1f}%

## ü§ñ AI-Generated Insights

{ai_text}

## üìä Category Breakdown
"""

        for i, (category, amount) in enumerate(list(analysis_data['category_spending'].items())[:5], 1):
            percentage = (amount / analysis_data['total_expenses'] * 100) if analysis_data['total_expenses'] > 0 else 0
            result += f"{i}. **{category}:** ‚Çπ{amount:.2f} ({percentage:.1f}%)\n"

        result += f"""

## üéØ Additional Recommendations

Based on your financial data:
"""

        if savings_rate < 20:
            result += "- ‚ö†Ô∏è **Increase Savings:** Your savings rate is below 20%. Try to reduce discretionary expenses.\n"
        elif savings_rate > 30:
            result += "- ‚úÖ **Excellent Savings:** You're saving over 30% - keep it up!\n"

        if 'Food' in analysis_data['category_spending']:
            food_pct = (analysis_data['category_spending']['Food'] / analysis_data['total_expenses'] * 100)
            if food_pct > 30:
                result += f"- üçΩÔ∏è **Food Optimization:** Food is {food_pct:.1f}% of expenses. Consider meal planning.\n"

        result += f"- üí≥ **Average Transaction:** ‚Çπ{analysis_data['avg_expense']:.2f} - Monitor small purchases.\n"
        result += f"- üìà **Transaction Count:** {analysis_data['debit_count']} payments made - Track spending frequency.\n"

        result += "\n---\n*Generated using Hugging Face FLAN-T5-base model*"

        return result

    except Exception as e:
        raise Exception(f"Hugging Face Error: {str(e)}")


In [None]:
def generate_rule_based_recommendations(analysis):
    """Generate comprehensive rule-based recommendations"""

    savings_rate = (analysis['net_savings'] / analysis['total_income'] * 100) if analysis['total_income'] > 0 else 0

    report = f"""
# üìä RULE-BASED FINANCIAL ANALYSIS

## üí∞ Financial Overview
- **Total Income:** ‚Çπ{analysis['total_income']:.2f}
- **Total Expenses:** ‚Çπ{analysis['total_expenses']:.2f}
- **Net Savings:** ‚Çπ{analysis['net_savings']:.2f}
- **Savings Rate:** {savings_rate:.1f}%

## üìà Spending Pattern Analysis
"""

    if savings_rate < 0:
        report += "- üö® **Critical Alert:** Spending exceeds income! Immediate action required.\n"
    elif savings_rate < 20:
        report += "- ‚ö†Ô∏è **Below Target:** Savings rate is under 20%. Room for improvement.\n"
    elif savings_rate > 30:
        report += "- ‚úÖ **Excellent:** Savings rate above 30%. Great financial discipline!\n"
    else:
        report += "- üëç **Good Progress:** Savings rate between 20-30%. Keep improving!\n"

    report += f"- Made **{analysis['debit_count']}** expense transactions\n"
    report += f"- Received **{analysis['credit_count']}** income transactions\n"
    report += f"- Average expense: **‚Çπ{analysis['avg_expense']:.2f}** per transaction\n"

    if analysis['category_spending']:
        report += "\n## üí≥ Category-wise Spending\n\n"
        for i, (category, amount) in enumerate(list(analysis['category_spending'].items())[:6], 1):
            percentage = (amount / analysis['total_expenses'] * 100) if analysis['total_expenses'] > 0 else 0
            report += f"{i}. **{category}:** ‚Çπ{amount:.2f} ({percentage:.1f}%)\n"

    report += "\n## üí° Key Insights\n\n"

    insights = []

    if savings_rate < 0:
        insights.append("‚ö†Ô∏è **Urgent:** Create an emergency budget. Cut all non-essential expenses.")
    elif savings_rate < 20:
        insights.append("üìâ Savings rate below recommended 20%. Identify areas to cut spending.")

    if 'Food' in analysis['category_spending']:
        food_pct = (analysis['category_spending']['Food'] / analysis['total_expenses'] * 100)
        if food_pct > 30:
            insights.append(f"üçΩÔ∏è Food expenses high ({food_pct:.1f}%). Meal planning can save 15-20%.")

    if 'Shopping' in analysis['category_spending']:
        shopping_pct = (analysis['category_spending']['Shopping'] / analysis['total_expenses'] * 100)
        if shopping_pct > 25:
            insights.append(f"üõçÔ∏è Shopping at {shopping_pct:.1f}% of expenses. Review necessity of purchases.")

    if 'Financial Services' in analysis['category_spending']:
        fin_amount = analysis['category_spending']['Financial Services']
        insights.append(f"üí∞ Financial services cost: ‚Çπ{fin_amount:.2f}. Review loan terms.")

    for insight in insights:
        report += f"- {insight}\n"

    report += "\n## üéØ Actionable Recommendations\n\n"

    recommendations = [
        f"**Track Daily:** Monitor expenses daily to stay within budget.",
        f"**Emergency Fund:** Build ‚Çπ{analysis['total_expenses']*3:.2f} as 3-month safety net.",
        f"**50-30-20 Rule:** Allocate 50% needs, 30% wants, 20% savings.",
        f"**Automate Savings:** Set up auto-transfer of ‚Çπ{analysis['total_income']*0.2:.2f} monthly.",
        f"**Review Subscriptions:** Cancel unused services to save ‚Çπ500-1000/month.",
    ]

    if savings_rate < 20:
        recommendations.insert(0, "**Priority:** Increase savings to 20% by reducing discretionary spending.")

    for i, rec in enumerate(recommendations, 1):
        report += f"{i}. {rec}\n"

    if analysis['top_expenses']:
        report += "\n## üîù Top 5 Expenses\n\n"
        for i, expense in enumerate(analysis['top_expenses'], 1):
            report += f"{i}. **{expense['Description']}** - ‚Çπ{expense['Amount']:.2f} ({expense['Category']})\n"

    report += "\n## üìã This Month's Action Plan\n\n"
    report += "‚úÖ Set budget limits for each category\n"
    report += "‚úÖ Cut one unnecessary expense\n"
    report += "‚úÖ Start emergency fund with ‚Çπ500\n"
    report += "‚úÖ Use 24-hour rule for purchases >‚Çπ500\n"
    report += "‚úÖ Review progress weekly\n"

    if analysis['total_income'] > 0:
        report += f"\n## üíµ Suggested Budget (50-30-20 Rule)\n\n"
        needs = analysis['total_income'] * 0.50
        wants = analysis['total_income'] * 0.30
        savings = analysis['total_income'] * 0.20

        report += f"- **Needs (50%):** ‚Çπ{needs:.2f} - Bills, food, transport\n"
        report += f"- **Wants (30%):** ‚Çπ{wants:.2f} - Entertainment, shopping\n"
        report += f"- **Savings (20%):** ‚Çπ{savings:.2f} - Emergency fund, investments\n"

        gap = savings - analysis['net_savings']
        if gap > 0:
            report += f"\nüí° **Gap:** You're ‚Çπ{gap:.2f} short of ideal savings. Focus on reducing 'wants'.\n"

    report += "\n---\n*üí° Small daily changes lead to significant long-term savings!*"

    return report


In [None]:
def process_upi_statement(pdf_file, llm_choice, openai_key=None):
    """Main function to process UPI statement with LLM choice"""

    try:
        if pdf_file is None:
            return "‚ùå Please upload a PDF file first.", None, None

        # Extract and parse
        text = extract_text_from_pdf(pdf_file.name)
        if text is None:
            return "‚ùå Failed to read PDF.", None, None

        transactions = parse_paytm_statement(text)
        if not transactions:
            return "‚ùå No transactions found.", None, None

        df = clean_and_structure_data(transactions)
        if df.empty:
            return "‚ùå No valid transactions.", None, None

        analysis = analyze_spending(df)

        # Generate recommendations based on LLM choice
        success_msg = f"""
# ‚úÖ Successfully Processed {len(df)} Transactions!

## üìä Quick Summary

| Metric | Value |
|--------|-------|
| üí∞ Total Income | ‚Çπ{analysis['total_income']:.2f} |
| üí∏ Total Expenses | ‚Çπ{analysis['total_expenses']:.2f} |
| üíµ Net Savings | ‚Çπ{analysis['net_savings']:.2f} |
| üìà Savings Rate | {(analysis['net_savings']/analysis['total_income']*100) if analysis['total_income'] > 0 else 0:.1f}% |

---

"""

        try:
            if llm_choice == "OpenAI GPT-3.5":
                if not openai_key or len(openai_key.strip()) < 20:
                    recommendations = "‚ö†Ô∏è **OpenAI API Key Required**\n\nPlease provide a valid API key to use OpenAI.\n\n" + generate_rule_based_recommendations(analysis)
                else:
                    recommendations = generate_openai_recommendations(analysis, df, openai_key)

            elif llm_choice == "Hugging Face (FLAN-T5)":
                recommendations = generate_huggingface_recommendations(analysis, df)

            else:  # Rule-based
                recommendations = generate_rule_based_recommendations(analysis)

        except Exception as e:
            error_msg = str(e)
            recommendations = f"‚ö†Ô∏è **LLM Error:** {error_msg}\n\n**Falling back to rule-based analysis...**\n\n---\n\n{generate_rule_based_recommendations(analysis)}"

        success_msg += recommendations

        csv_output = df.to_csv(index=False)

        return success_msg, csv_output, df.to_html(index=False, classes='table table-striped')

    except Exception as e:
        return f"‚ùå Error: {str(e)}", None, None


In [None]:
def create_gradio_interface():
    """Create Gradio interface with LLM selection"""

    with gr.Blocks(title="UPI Financial Analyzer with Multi-LLM Support", theme=gr.themes.Soft()) as app:

        gr.Markdown("""
        # üí∞ Personal UPI Financial Analyzer
        ## ü§ñ Multi-LLM Support: OpenAI | Hugging Face | Rule-Based

        Upload your UPI statement and choose your preferred AI model for analysis!
        """)

        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown("### üì§ Upload & Settings")

                pdf_input = gr.File(
                    label="Upload UPI Statement PDF",
                    file_types=[".pdf"],
                    file_count="single"
                )

                llm_choice = gr.Radio(
                    choices=[
                        "Rule-Based (FREE, Fast)",
                        "Hugging Face (FLAN-T5)",
                        "OpenAI GPT-3.5"
                    ],
                    value="Rule-Based (FREE, Fast)",
                    label="ü§ñ Select AI Model",
                    info="Choose your preferred analysis method"
                )

                openai_key_input = gr.Textbox(
                    label="üîë OpenAI API Key (Optional)",
                    placeholder="sk-proj-...",
                    type="password",
                    info="Only needed for OpenAI option"
                )

                analyze_btn = gr.Button("üîç Analyze Statement", variant="primary", size="lg")

                gr.Markdown("""
                ### ü§ñ LLM Options

                **1. Rule-Based (FREE)**
                - ‚ö° Instant results
                - üìä Comprehensive analysis
                - üí° Actionable insights
                - üí∞ No API costs

                **2. Hugging Face**
                - ü§ó Open-source FLAN-T5
                - üÜì Free to use
                - üß† AI-powered insights
                - ‚è±Ô∏è Takes 30-60 seconds

                **3. OpenAI GPT-3.5**
                - üöÄ Advanced AI
                - üìù Natural language
                - üí≥ Requires API key
                - üíµ ~$0.002/analysis
                """)

            with gr.Column(scale=2):
                gr.Markdown("### üìä Analysis Results")
                recommendations_output = gr.Markdown(
                    value="""
## üëã Welcome to UPI Financial Analyzer!

### üéØ How to Use:
1. Upload your Paytm UPI statement PDF
2. Choose your preferred AI model
3. Click "Analyze Statement"
4. Get comprehensive financial insights!

### ü§ñ Available AI Models:

**Rule-Based Analysis (Recommended)**
- Fast, accurate, and completely free
- Comprehensive financial insights
- Budget planning with 50-30-20 rule
- Perfect for regular use

**Hugging Face FLAN-T5**
- Open-source AI model
- Free to use, no API key needed
- AI-generated insights
- Takes about 30-60 seconds

**OpenAI GPT-3.5**
- Most advanced analysis
- Natural conversational insights
- Requires OpenAI API key
- Best for detailed advice

### üìä What You'll Get:
‚úÖ Complete financial overview
‚úÖ Category-wise spending breakdown
‚úÖ Personalized recommendations
‚úÖ Budget planning advice
‚úÖ Savings optimization tips
‚úÖ Downloadable CSV data
"""
                )

        with gr.Row():
            with gr.Column():
                gr.Markdown("### üíæ Download CSV")
                csv_output = gr.File(label="Transaction Data")

            with gr.Column():
                gr.Markdown("### üìã Transactions Table")
                html_output = gr.HTML(label="Preview")

        def process_and_save(pdf, llm_choice, openai_key):
            result, csv_data, html_data = process_upi_statement(pdf, llm_choice, openai_key)

            csv_file = None
            if csv_data:
                csv_file = "upi_transactions.csv"
                with open(csv_file, "w") as f:
                    f.write(csv_data)

            return result, csv_file, html_data

        analyze_btn.click(
            fn=process_and_save,
            inputs=[pdf_input, llm_choice, openai_key_input],
            outputs=[recommendations_output, csv_output, html_output]
        )

        gr.Markdown("""
        ---
        ### üìö Model Comparison

        | Feature | Rule-Based | Hugging Face | OpenAI |
        |---------|-----------|--------------|--------|
        | Speed | ‚ö° Instant | üê¢ 30-60s | ‚ö° 2-5s |
        | Cost | üÜì Free | üÜì Free | üíµ $0.002 |
        | Quality | ‚≠ê‚≠ê‚≠ê‚≠ê | ‚≠ê‚≠ê‚≠ê‚≠ê | ‚≠ê‚≠ê‚≠ê‚≠ê‚≠ê |
        | API Key | ‚ùå None | ‚ùå None | ‚úÖ Required |
        | Offline | ‚úÖ Yes | ‚úÖ Yes* | ‚ùå No |

        *After first model load

        ### üí° Recommendations:
        - **Daily Use:** Rule-Based (fast & free)
        - **AI Experience:** Hugging Face (free AI)
        - **Best Quality:** OpenAI (paid)

        ### üîí Privacy & Security:
        - All processing happens in your session
        - Data is never stored permanently
        - API keys are not logged
        - PDF files are processed locally

        ---
        *Made with ‚ù§Ô∏è using Gradio | Powered by OpenAI, Hugging Face & Custom ML*
        """)

    return app

In [None]:
print("üöÄ Starting Multi-LLM UPI Financial Analyzer...")
print("="*60)

# For Colab: Direct test
print("\nüìÅ Upload your PDF file:")
from google.colab import files
uploaded = files.upload()

if uploaded:
    pdf_filename = list(uploaded.keys())[0]
    print(f"\n‚úÖ File uploaded: {pdf_filename}")
    print("="*60)

    class TempFile:
        def __init__(self, name):
            self.name = name

    temp_file = TempFile(pdf_filename)

    print("\nüîÑ Processing with Rule-Based Analysis...\n")
    recommendations, csv_data, html_data = process_upi_statement(temp_file, "Rule-Based (FREE, Fast)")

    print(recommendations)

    if csv_data:
        with open('upi_transactions.csv', 'w') as f:
            f.write(csv_data)
        print("\n" + "="*60)
        print("‚úÖ Data saved to 'upi_transactions.csv'")
        print("="*60)

# Launch Gradio interface
print("\n\nüåê Launching Gradio Interface with Multi-LLM Support...")
print("="*60)
print("\nü§ñ Available Models:")
print("1. Rule-Based Analysis (Fast & Free)")
print("2. Hugging Face FLAN-T5 (AI-Powered, Free)")
print("3. OpenAI GPT-3.5 (Advanced AI, Paid)")
print("="*60)

app = create_gradio_interface()
app.launch(share=True, debug=True)
