In [None]:
! pip install PyMuPDF
! pip install docx

In [None]:
import fitz  # PyMuPDF
from docx import Document
from docx.shared import Pt
from docx.enum.text import WD_ALIGN_PARAGRAPH
import re

def extract_text_from_pdf(pdf_path):
    text = ""
    with fitz.open(pdf_path) as pdf:
        for page_num in range(pdf.page_count):
            page = pdf[page_num]
            page_text = page.get_text("text")
            
            # Filter out non-XML-compatible characters
            page_text = re.sub(r'[^\x20-\x7E]+', '', page_text)  # Remove non-ASCII characters
            
            text += page_text
            text += "\n\n"  # Separate pages with double newline
    return text

def save_text_in_docx(text, docx_path):
    # Create a new Document
    doc = Document()

    # APA style header
    header = doc.sections[0].header
    header_para = header.paragraphs[0]
    header_para.text = "Running head: TITLE OF YOUR DOCUMENT"  # Replace with your document title
    header_para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
    
    # Title page formatting
    title = doc.add_paragraph("Title of Your Document")  # Replace with actual title
    title.alignment = WD_ALIGN_PARAGRAPH.CENTER
    title.runs[0].font.size = Pt(14)
    title.runs[0].bold = True
    title.runs[0].font.name = 'Times New Roman'

    # Author information
    author = doc.add_paragraph("Author Name\nInstitution Name")  # Replace with actual details
    author.alignment = WD_ALIGN_PARAGRAPH.CENTER
    author.runs[0].font.size = Pt(12)
    author.runs[0].font.name = 'Times New Roman'

    # Add a space before the main content
    doc.add_paragraph("\n")

    # Add extracted text with APA-style formatting
    paragraphs = text.split("\n\n")  # Separate text into paragraphs
    for para_text in paragraphs:
        para = doc.add_paragraph(para_text)
        para_format = para.paragraph_format
        para_format.line_spacing = 1.5  # Line spacing
        para_format.first_line_indent = Pt(18)  # Indentation
        para.runs[0].font.size = Pt(12)  # Body text size
        para.runs[0].font.name = 'Times New Roman'  # Set font for body text

    # Add a References section (as an example, modify as necessary)
    doc.add_paragraph("\nReferences", style='Heading 1')
    doc.add_paragraph("Author Last Name, First Initial. (Year). Title of the Book. Publisher.")
    
    # Save the document
    doc.save(docx_path)
    print(f"Document saved at {docx_path}")

# Main Execution
pdf_path = r'C:\Users\People\Desktop\upwork\PPT deep learning\heart_sound.pdf'  # Replace with your PDF path
docx_path = r'C:\Users\People\Desktop\upwork\PPT deep learning\extract_text.docx'  # Desired output path for the DOCX file

# Step 1: Extract text
extract_text = extract_text_from_pdf(pdf_path)

# Step 2: Save extracted text in APA-formatted DOCX
save_text_in_docx(extract_text, docx_path)


generate summary


In [None]:
from transformers import pipeline

def generate_transcript_from_text(text, max_length=2000):
    # Initialize the GPT model for text generation (GPT-3.5 or GPT-4 would be best suited)
    model = pipeline("text-generation", model="gpt-3.5-turbo")  # You can use GPT-4 or T5 for more complex tasks

    # Ask the model to generate a detailed, informative transcript (targeting a 10-minute length)
    prompt = f"Generate a 10-minute lecture-style transcript for a deep learning engineer, explaining the following technical concepts in detail, step by step: {text}"

    # Generate transcript with higher max_length for detailed content
    transcript = model(prompt, max_length=max_length, num_return_sequences=1)

    return transcript[0]['generated_text']

# Example usage
extract_text = r'C:\Users\People\Desktop\upwork\PPT deep learning\extract_text.docx'

# Assuming 'extract_text' contains the required content for the transcript
transcript_text = generate_transcript_from_text(extract_text)

# Display the generated transcript
print(transcript_text)


generate pptx

In [None]:
from pptx import Presentation

def create_pptx_from_summary(summary):
    # Create a PowerPoint presentation object
    prs = Presentation()

    # Split the summary into key points for the slides
    key_points = summary.split('. ')  # Assuming each sentence is a key point for simplicity
    
    for point in key_points:
        # Create a new slide
        slide_layout = prs.slide_layouts[1]  # Use title + content layout
        slide = prs.slides.add_slide(slide_layout)
        
        # Set slide title and content
        title = slide.shapes.title
        content = slide.shapes.placeholders[1]
        
        title.text = "Key Concept"
        content.text = point

    # Save the presentation to a file
    prs.save("generated_presentation.pptx")

# Example usage
create_pptx_from_summary(transcript_text)


generate docx

In [None]:
from docx import Document

def create_docx_from_summary(summary):
    # Create a new Document
    doc = Document()
    
    # Title for the document (APA style)
    doc.add_heading('Deep Learning Insights: Technical Overview', 0)

    # Add an introduction
    doc.add_heading('Introduction', level=1)
    doc.add_paragraph("This document provides a technical overview based on the deep learning techniques and models described in the provided text.")
    
    # Split the summary into sections
    sections = summary.split('. ')  # Assuming each sentence is a section
    
    # Iterate through sections and add them to the document
    for i, section in enumerate(sections):
        doc.add_heading(f"Section {i + 1}", level=2)
        doc.add_paragraph(section)
    
    # Save the document to a file
    doc.save('generated_transcript.docx')

# Example usage
create_docx_from_summary(transcript_text)
