<a href="https://colab.research.google.com/github/lavlugit/lavlugit/blob/main/Comprehensive_Genarator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
!pip install fpdf pandas matplotlib gradio requests google-generativeai

import gradio as gr
import pandas as pd
import matplotlib.pyplot as plt
from fpdf import FPDF
import tempfile
import os
import requests
import google.generativeai as genai
from pathlib import Path

# ==========================
# Google Gemini API Configuration
# ==========================
# Configure with your Google AI Studio API key
GEMINI_API_KEY = "AIzaSyB5XQTr0pE_FcG9ZDLYTvga44rAgMefU8Q"  # Your actual API key

# Configure the Gemini client
try:
    genai.configure(api_key=GEMINI_API_KEY)
    gemini_model = genai.GenerativeModel('gemini-pro')
    print("✅ Gemini API configured successfully!")
    API_CONFIGURED = True
except Exception as e:
    print(f"⚠️ Gemini configuration warning: {e}")
    API_CONFIGURED = False

class GeminiAPI:
    def __init__(self):
        self.configured = API_CONFIGURED

    def generate_text(self, prompt, max_tokens=500):
        """Generate text using Google's Gemini API"""
        if not self.configured:
            return self._demo_response(prompt)

        try:
            response = gemini_model.generate_content(
                prompt,
                generation_config=genai.types.GenerationConfig(
                    max_output_tokens=max_tokens,
                    temperature=0.7
                )
            )
            return response.text if response.text else "No content generated."
        except Exception as e:
            return f"API Error: {str(e)}. Using demo content for: {prompt[:100]}..."

    def _demo_response(self, prompt):
        """Provide demo responses when API is not configured"""
        demo_responses = {
            "abstract": f"**Abstract**: This research examines {prompt.split('about')[-1].split('.')[0] if 'about' in prompt else 'the selected topic'}. The study employs rigorous methodology to analyze key factors and presents significant findings that advance understanding in this field.",
            "introduction": f"**Introduction**: This paper provides a comprehensive overview of {prompt.split('about')[-1].split('.')[0] if 'about' in prompt else 'the research domain'}. The introduction establishes the theoretical framework, research questions, and significance of the study.",
            "methodology": "**Methodology**: The research adopts a mixed-methods approach, combining quantitative data analysis with qualitative insights. Data collection followed established protocols ensuring validity and reliability.",
            "results": "**Results**: Analysis revealed statistically significant findings. Key metrics demonstrate clear patterns supporting the research hypotheses. Detailed results are presented in subsequent sections.",
            "discussion": "**Discussion**: The findings have substantial implications for both theory and practice. This section explores interpretations, limitations, and broader significance.",
            "conclusion": "**Conclusion**: This study makes valuable contributions to the field. Future research directions and practical applications are discussed."
        }

        prompt_lower = prompt.lower()
        for key, value in demo_responses.items():
            if key in prompt_lower:
                return value + "\n\n*[Add your Gemini API key for enhanced AI-generated content]*"

        return f"Research content section for: {prompt[:80]}...\n\n*[Enable Gemini API for full AI content generation]*"

# Initialize Gemini API
gemini = GeminiAPI()

# ==========================
# CrossRef API for References
# ==========================
def get_references(query, num=5):
    """Get academic references from CrossRef"""
    try:
        url = f"https://api.crossref.org/works?query={requests.utils.quote(query)}&rows={num}"
        response = requests.get(url, timeout=15)

        if response.status_code != 200:
            return _fallback_references(query)

        items = response.json().get("message", {}).get("items", [])
        if not items:
            return _fallback_references(query)

        refs = ["**Academic References:**"]
        for i, item in enumerate(items, 1):
            title = item.get("title", ["No title"])[0] if item.get("title") else "Untitled"
            authors = item.get("author", [])
            author_names = []

            for author in authors[:3]:
                given = author.get("given", "")
                family = author.get("family", "")
                if given or family:
                    author_names.append(f"{given} {family}".strip())

            author_str = ", ".join(author_names) if author_names else "Various Authors"
            year = item.get("published-print", {}).get("date-parts", [[None]])[0][0]
            year_str = f" ({year})" if year else ""
            journal = item.get("container-title", [""])[0] if item.get("container-title") else ""
            journal_str = f" *{journal}*" if journal else ""

            refs.append(f"{i}. {author_str}{year_str}. {title}.{journal_str}")

        return "\n\n".join(refs)

    except Exception as e:
        return _fallback_references(query)

def _fallback_references(query):
    """Fallback reference template"""
    return f"""**References for: {query}**

1. Conduct comprehensive literature review using Google Scholar
2. Search academic databases (IEEE, Springer, Elsevier)
3. Review recent conference proceedings
4. Consult relevant journal articles
5. Examine cited references in key papers

*Note: Add specific references from your literature review*"""

# ==========================
# Dataset Analysis Functions
# ==========================
def summarize_dataset(df):
    """Generate comprehensive dataset summary"""
    try:
        summary = f"**Dataset Overview:** {df.shape[0]:,} rows × {df.shape[1]} columns\n\n"
        summary += "**Column Analysis:**\n"

        for col in df.columns:
            dtype = str(df[col].dtype)
            missing = df[col].isnull().sum()
            missing_pct = (missing / len(df)) * 100
            unique = df[col].nunique()

            summary += f"• **{col}**: {dtype} | Missing: {missing} ({missing_pct:.1f}%) | Unique: {unique}\n"

        # Numeric columns summary
        numeric_cols = df.select_dtypes(include=["number"]).columns
        if len(numeric_cols) > 0:
            summary += f"\n**Numeric Analysis** ({len(numeric_cols)} columns):\n"
            for col in numeric_cols[:4]:
                summary += f"• {col}: Mean={df[col].mean():.2f}, Std={df[col].std():.2f}, Range=[{df[col].min():.2f}, {df[col].max():.2f}]\n"

        return summary

    except Exception as e:
        return f"**Dataset Analysis:** Basic structure analyzed. Detailed statistics available upon request."

def generate_plots_and_stats(df):
    """Generate professional statistics and visualizations"""
    temp_dir = tempfile.mkdtemp()
    stats_file = None
    plot_files = []

    try:
        # Generate comprehensive statistics table
        numeric_df = df.select_dtypes(include=["number"])
        if not numeric_df.empty:
            plt.figure(figsize=(12, 8))
            plt.axis('tight')
            plt.axis('off')

            # Create detailed statistics table
            stats = numeric_df.describe().round(3)
            table = plt.table(cellText=stats.values,
                             rowLabels=stats.index,
                             colLabels=stats.columns,
                             cellLoc='center',
                             loc='center',
                             colColours=['#f0f8ff']*len(stats.columns))

            table.auto_set_font_size(False)
            table.set_fontsize(9)
            table.scale(1, 1.8)
            plt.title('Descriptive Statistics', fontsize=14, pad=20)

            stats_file = os.path.join(temp_dir, "statistics.png")
            plt.tight_layout()
            plt.savefig(stats_file, bbox_inches='tight', dpi=150, facecolor='white')
            plt.close()

        # Generate professional visualizations
        numeric_cols = df.select_dtypes(include=["number"]).columns
        for i, col in enumerate(numeric_cols[:3]):  # Limit to 3 best plots
            try:
                fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))

                # Histogram
                ax1.hist(df[col].dropna(), bins=15, color='skyblue', edgecolor='black', alpha=0.7)
                ax1.set_title(f'Distribution of {col}', fontsize=12)
                ax1.set_xlabel(col)
                ax1.set_ylabel('Frequency')
                ax1.grid(alpha=0.3)

                # Box plot
                ax2.boxplot(df[col].dropna(), vert=True)
                ax2.set_title(f'Box Plot: {col}', fontsize=12)
                ax2.set_ylabel(col)
                ax2.grid(alpha=0.3)

                plt.tight_layout()
                plot_file = os.path.join(temp_dir, f"{col}_analysis.png")
                plt.savefig(plot_file, bbox_inches='tight', dpi=150)
                plt.close()
                plot_files.append(plot_file)

            except Exception as e:
                continue

    except Exception as e:
        print(f"Visualization generation: {e}")

    return stats_file, plot_files

# ==========================
# Professional PDF Builder
# ==========================
class ResearchPDF(FPDF):
    def __init__(self):
        super().__init__()
        # Use standard fonts that work everywhere
        self.set_auto_page_break(auto=True, margin=15)

    def header(self):
        self.set_font('Arial', 'B', 12)
        self.cell(0, 10, 'Academic Research Paper', 0, 0, 'C')
        self.ln(10)

    def footer(self):
        self.set_y(-15)
        self.set_font('Arial', 'I', 8)
        self.cell(0, 10, f'Page {self.page_no()}', 0, 0, 'C')

def build_pdf(topic, sections, paper, pdf_file_path, stats_image=None, plot_files=None):
    """Build professional research PDF"""
    try:
        pdf = ResearchPDF()

        # Title Page
        pdf.add_page()
        pdf.set_font('Arial', 'B', 24)
        pdf.cell(0, 40, 'ACADEMIC RESEARCH PAPER', 0, 1, 'C')
        pdf.set_font('Arial', 'B', 18)
        pdf.cell(0, 20, topic, 0, 1, 'C')
        pdf.ln(20)
        pdf.set_font('Arial', 'I', 12)
        pdf.cell(0, 10, 'Generated using AI Research Assistant', 0, 1, 'C')
        pdf.ln(10)

        # Table of Contents
        pdf.add_page()
        pdf.set_font('Arial', 'B', 16)
        pdf.cell(0, 10, 'TABLE OF CONTENTS', 0, 1)
        pdf.ln(10)

        pdf.set_font('Arial', '', 12)
        for i, section in enumerate(sections, 1):
            pdf.cell(0, 8, f'{i}. {section}', 0, 1)

        # Content sections with professional formatting
        for section in sections:
            pdf.add_page()
            pdf.set_font('Arial', 'B', 14)
            pdf.cell(0, 10, section.upper(), 0, 1)
            pdf.ln(5)

            pdf.set_font('Arial', '', 12)
            content = paper.get(section, 'Content generation in progress...')

            # Professional text formatting
            if isinstance(content, str):
                # Handle markdown-like formatting
                lines = content.split('\n')
                for line in lines:
                    if line.startswith('**') and line.endswith('**'):
                        pdf.set_font('Arial', 'B', 12)
                        pdf.cell(0, 8, line[2:-2], 0, 1)
                        pdf.set_font('Arial', '', 12)
                    elif line.startswith('*') and line.endswith('*'):
                        pdf.set_font('Arial', 'I', 12)
                        pdf.cell(0, 8, line[1:-1], 0, 1)
                        pdf.set_font('Arial', '', 12)
                    else:
                        # Smart text wrapping
                        pdf.multi_cell(0, 8, line)
                    pdf.ln(4)
            else:
                pdf.multi_cell(0, 8, str(content))

            # Add professional visualizations for Results section
            if section.lower() == "results":
                pdf.ln(10)
                if stats_image and os.path.exists(stats_image):
                    try:
                        pdf.set_font('Arial', 'B', 11)
                        pdf.cell(0, 10, 'Descriptive Statistics:', 0, 1)
                        pdf.image(stats_image, x=10, w=190)
                        pdf.ln(5)
                    except:
                        pass

                if plot_files:
                    pdf.set_font('Arial', 'B', 11)
                    pdf.cell(0, 10, 'Data Analysis Visualizations:', 0, 1)
                    for plot_file in plot_files or []:
                        if plot_file and os.path.exists(plot_file):
                            try:
                                pdf.image(plot_file, x=10, w=190)
                                pdf.ln(5)
                            except:
                                pass

        pdf.output(pdf_file_path)
        return True

    except Exception as e:
        print(f"PDF generation: {e}")
        # Ultimate fallback
        try:
            pdf = FPDF()
            pdf.add_page()
            pdf.set_font("Arial", size=16)
            pdf.cell(0, 10, f"Research: {topic}", 0, 1)
            pdf.set_font("Arial", size=12)
            pdf.multi_cell(0, 10, "Research paper generated successfully with AI assistance.")
            pdf.output(pdf_file_path)
            return True
        except:
            return False

# ==========================
# Main Research Paper Generator
# ==========================
def generate_paper(topic, keywords, dataset_file=None):
    """Generate complete research paper with AI assistance"""
    try:
        if not topic or not topic.strip():
            return "Please enter a valid research topic."

        print(f"📝 Generating research paper: {topic}")

        # Define academic paper structure
        sections = [
            "Abstract", "Introduction", "Literature Review",
            "Methodology", "Results", "Discussion",
            "Conclusion", "References"
        ]
        paper = {}

        # Process dataset if provided
        dataset_summary = "No dataset provided for analysis."
        stats_image, plot_files = None, []

        if dataset_file is not None:
            try:
                print("🔍 Analyzing dataset...")
                df = pd.read_csv(dataset_file.name)
                # Clean and prepare data
                df = df.select_dtypes(include=['number', 'object']).iloc[:, :15]  # Limit for performance
                dataset_summary = summarize_dataset(df)
                stats_image, plot_files = generate_plots_and_stats(df)
                print("✅ Dataset analysis completed")
            except Exception as e:
                dataset_summary = f"Dataset processed with basic analysis. Error: {str(e)}"
                print("⚠️ Dataset analysis simplified")

        # Generate academic content using Gemini
        print("🤖 Generating academic content...")

        for section in sections:
            if section == "Abstract":
                prompt = f"Write a professional academic abstract about '{topic}'. Keywords: {keywords}. Limit to 200 words. Include research objectives, methods, key findings, and implications."
            elif section == "Introduction":
                prompt = f"Write a comprehensive introduction for a research paper about '{topic}'. Include background, problem statement, research questions, and significance of the study."
            elif section == "Literature Review":
                prompt = f"Write a literature review section about '{topic}'. Summarize relevant theories, previous studies, and research gaps. Keywords: {keywords}"
            elif section == "Methodology":
                prompt = f"Describe the research methodology for studying '{topic}'. Include research design, data collection methods, and analysis techniques. Dataset information: {dataset_summary}"
            elif section == "Results":
                prompt = f"Present research results for '{topic}'. Describe key findings, statistical analyses, and data patterns. Be objective and data-driven."
            elif section == "Discussion":
                prompt = f"Discuss the research findings about '{topic}'. Interpret results, compare with literature, discuss implications, limitations, and theoretical contributions."
            elif section == "Conclusion":
                prompt = f"Write a conclusion for the research paper about '{topic}'. Summarize key findings, contributions, practical implications, and future research directions."
            elif section == "References":
                paper[section] = get_references(topic)
                continue

            print(f"  Generating {section}...")
            paper[section] = gemini.generate_text(prompt, max_tokens=800)

        # Add title
        paper["Title"] = f"Research Study: {topic}"
        all_sections = ["Title"] + sections

        # Generate professional PDF
        safe_topic = "".join(c for c in topic if c.isalnum() or c in (' ', '-', '_')).rstrip()
        pdf_filename = f"/tmp/{safe_topic[:30]}_research_paper.pdf"

        print("📄 Generating PDF document...")
        success = build_pdf(topic, all_sections, paper, pdf_filename, stats_image, plot_files)

        if success and os.path.exists(pdf_filename):
            print("✅ Research paper generated successfully!")
            return pdf_filename
        else:
            raise Exception("PDF generation failed")

    except Exception as e:
        print(f"❌ Error: {e}")
        # Create error PDF
        pdf_filename = "/tmp/research_paper_error.pdf"
        pdf = FPDF()
        pdf.add_page()
        pdf.set_font("Arial", size=14)
        pdf.cell(0, 10, "Research Paper Generation", 0, 1)
        pdf.set_font("Arial", size=12)
        pdf.multi_cell(0, 10, f"Topic: {topic}\n\nPaper generated with basic formatting. Error details: {str(e)}")
        pdf.output(pdf_filename)
        return pdf_filename

# ==========================
# Gradio Interface (FIXED)
# ==========================
def create_interface():
    with gr.Blocks(
        title="AI Research Paper Generator with Gemini",
        theme=gr.themes.Soft()
    ) as demo:

        gr.Markdown("""
        # 🎓 AI Research Paper Generator

        *Powered by Google Gemini AI • Professional Academic Papers*
        """)

        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown("### 📝 Research Parameters")

                topic = gr.Textbox(
                    label="Research Topic",
                    placeholder="e.g., Impact of Artificial Intelligence on Education",
                    value="Machine Learning in Healthcare",
                    lines=2
                )

                keywords = gr.Textbox(
                    label="Keywords (comma separated)",
                    placeholder="AI, healthcare, predictive analytics, machine learning",
                    value="artificial intelligence, healthcare, predictive modeling"
                )

                dataset = gr.File(
                    label="Upload Dataset (Optional CSV)",
                    file_types=[".csv"]
                )

                gr.Markdown("💡 *For data analysis and visualization*")

                generate_btn = gr.Button(
                    "🚀 Generate Research Paper",
                    variant="primary",
                    scale=1
                )

            with gr.Column(scale=1):
                gr.Markdown("### 📄 Generated Paper")

                output_file = gr.File(
                    label="Download Your Research Paper",
                    file_types=[".pdf"]
                )

                with gr.Accordion("ℹ️ Instructions & Tips", open=False):
                    gr.Markdown("""
                    **How to get the best results:**
                    - Be specific with your research topic
                    - Use relevant, comma-separated keywords
                    - Upload CSV data for automated analysis
                    - Review and refine the generated content

                    **Features:**
                    - ✅ Professional academic structure
                    - ✅ AI-powered content generation
                    - ✅ Automated data analysis
                    - ✅ CrossRef reference integration
                    - ✅ Publication-ready PDF format

                    **Current API Status:** {'✅ Connected' if gemini.configured else '⚠️ Demo Mode'}
                    """)

        # Examples section
        gr.Markdown("### 💡 Example Research Topics")
        gr.Examples(
            examples=[
                ["Sustainable Energy Solutions for Urban Areas", "renewable energy, sustainability, urban planning, solar power"],
                ["The Impact of Social Media on Mental Health", "social media, psychology, mental health, adolescents, digital wellbeing"],
                ["Machine Learning Applications in Financial Forecasting", "AI, finance, predictive analytics, risk management, machine learning"]
            ],
            inputs=[topic, keywords],
            label="Click an example to load parameters"
        )

        # Generation event
        generate_btn.click(
            fn=generate_paper,
            inputs=[topic, keywords, dataset],
            outputs=output_file
        )

    return demo

# ==========================
# Launch Application
# ==========================
if __name__ == "__main__":
    print("🚀 Starting AI Research Paper Generator...")
    print(f"🔑 API Status: {'✅ Connected to Gemini' if gemini.configured else '⚠️ Demo Mode - Add API Key for Full Features'}")

    demo = create_interface()
    demo.launch(share=True)

✅ Gemini API configured successfully!
🚀 Starting AI Research Paper Generator...
🔑 API Status: ✅ Connected to Gemini
Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://20da4ea68d11924d91.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
