In [None]:
# This notebook requires the following:
# Install ollama https://ollama.com/docs/installation
# "ollama pull llama3.1:latest"
# pip install crewai

# The input text file should be placed in ./input/article.txt
# The output report will be saved in ./output/plagiarism_report.txt

In [None]:

# Import Libraries and Set Up Environment
import os
from crewai import Agent, Task, Crew, Process
from crewai import LLM
from pathlib import Path

os.environ['CREWAI_API_URL'] = 'http://localhost:11434'


In [None]:
from crewai import LLM
import re

# Initialize the local LLM
llm = LLM(
    model="ollama/llama3.1:latest",
    base_url="http://localhost:11434",
    provider="ollama",
    temperature=0.3,
    timeout=180000
)

def read_article(file_path):
    """Read the article text from a file."""
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            return file.read()
    except Exception as e:
        print(f"Error reading file: {e}")
        return None

def split_into_chunks(text, chunk_size=1000):
    """Split text into smaller chunks for processing."""
    words = text.split()
    return [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]

def analyze_chunk(chunk):
    """Analyze a single chunk for plagiarism using LLM."""
    prompt = f"""Analyze this text for plagiarism risk. Provide:
    1. Score: 0-100 (0=original, 100=plagiarized)
    2. Brief explanation
    Format exactly as:
    Score: [number]
    Explanation: [text]

    Text: {chunk}"""
    
    try:
        response = llm.call(prompt)
        score_match = re.search(r"Score:\s*(\d+)", response)
        explanation_match = re.search(r"Explanation:\s*(.+)", response, re.DOTALL)
        
        return {
            "score": min(100, max(0, int(score_match.group(1))) if score_match else 0),
            "explanation": explanation_match.group(1).strip() if explanation_match else "No explanation provided"
        }
    except Exception as e:
        print(f"Error analyzing chunk: {e}")
        return {"score": 0, "explanation": "Analysis failed"}

def generate_report(results):
    """Generate final report using LLM."""
    high_risk = [r for r in results if r["score"] > 50]
    num_high_risk = len(high_risk)
    avg_score = sum(r["score"] for r in results) / len(results) if results else 0
        
    report_prompt = f"""Create a plagiarism report with:
    1. Brief summary of findings
    2. High-risk chunk statistics
    3. Recommendations
    4. High-risk chunk texts

    Data:
    - Total chunks: {len(results)}
    - High-risk chunks: {num_high_risk}
    - Average score: {avg_score:.1f}
    - Highest score: {max(r["score"] for r in results) if results else 0}
    """
    
    return llm.call(report_prompt)

def save_results(content, output_file):
    """Save results to a file."""
    with open(output_file, 'w', encoding='utf-8') as file:
        file.write(content)

# Main processing
if __name__ == "__main__":
    # Read and chunk the article
    article_text = read_article("./input/article.txt")
    if not article_text:
        exit(1)
        
    chunks = split_into_chunks(article_text)
    
    # Analyze chunks
    results = []
    for i, chunk in enumerate(chunks):
        print(f"Analyzing chunk {i+1}/{len(chunks)}...")
        analysis = analyze_chunk(chunk)
        results.append({
            "chunk_number": i+1,
            "score": analysis["score"],
            "explanation": analysis["explanation"]
        })
    
    # Generate and save report
    final_report = generate_report(results)
    save_results(final_report, "./output/plagiarism_report.txt")
    
    # Print high-risk chunks
    high_risk = [r for r in results if r["score"] > 50]
    num_high_risk = len(high_risk)
    high_risk_texts = "\n\n".join([f"Chunk {r['chunk_number']}:\n\n{chunks[r['chunk_number']-1]}" for r in high_risk])
    with open("./output/plagiarism_report.txt", 'a', encoding='utf-8') as file:
        file.write(f"\nHigh-risk chunk texts: {high_risk_texts}")
    
    print("\nPlagiarism analysis complete. Results saved to ./output/plagiarism_report.txt")
    
    print("\nHigh-risk chunks:")
    for r in high_risk:
        print(f"Chunk {r['chunk_number']}: Score {r['score']} - {r['explanation']}")
        
        
        
        