In [9]:


#Import the necessary Libraries
import pandas as pd
from docx import Document

# Load CSV Data
Load quotes from a CSV file using pandas, handling potential encoding issues and different CSV formats.

In [25]:
# Load CSV Data
csv_file_path = 'Men-without-women---highlights.csv'  # Path to the CSV file

# Load the CSV data into a DataFrame
try:
    df = pd.read_csv(csv_file_path, encoding='utf-8')
except UnicodeDecodeError:
    df = pd.read_csv(csv_file_path, encoding='latin1')

# Display the first few rows of the DataFrame to verify the data
df.head()

Unnamed: 0,timestamp,chapter,percent,color,quote
0,"September 04, 2023 20:04",Men Without Women,97.6%,#FFB,When I listen to this music I feel like I’m in...
1,"September 04, 2023 19:47",Men Without Women,96.8%,#FFB,It’s quite easy to become Men Without Women. Y...
2,"September 04, 2023 19:45",Men Without Women,96.8%,#FFB,"After that, every time I pass the statue of th..."
3,"September 04, 2023 18:56",Men Without Women,96.0%,#FFB,Suddenly one day you become Men Without Women....
4,"September 04, 2023 18:56",Men Without Women,96.0%,#FFE0EC,Only Men Without Women can comprehend how pain...


# Process Quotes
Clean and process the quotes data, handling any formatting or special characters that may be present.

In [26]:
# Process Quotes

# Process Quotes

# Single comprehensive function to process quotes
def process_quote(quote):
    if not isinstance(quote, str):
        return quote
    
    # Basic cleaning
    quote = quote.strip().replace('\n', ' ').replace('\r', '')
    
    # Remove double quotation marks
    quote = quote.replace('"', '')
    
    # Remove any punctuation at the end
    quote = quote.rstrip('.,;!?')
    
    # Capitalize the first letter
    if quote:
        quote = quote.capitalize()
        
    # Add period at the end if needed
    if quote and not quote.endswith('.'):
        quote += '.'
        
    return quote

# Apply all transformations in one go
df['quote'] = df['quote'].apply(process_quote)

# Display the cleaned DataFrame to verify the changes
df.head()

# Display the cleaned DataFrame to verify the changes
df.head()

Unnamed: 0,timestamp,chapter,percent,color,quote
0,"September 04, 2023 20:04",Men Without Women,97.6%,#FFB,When i listen to this music i feel like i’m in...
1,"September 04, 2023 19:47",Men Without Women,96.8%,#FFB,It’s quite easy to become men without women. y...
2,"September 04, 2023 19:45",Men Without Women,96.8%,#FFB,"After that, every time i pass the statue of th..."
3,"September 04, 2023 18:56",Men Without Women,96.0%,#FFB,Suddenly one day you become men without women....
4,"September 04, 2023 18:56",Men Without Women,96.0%,#FFE0EC,Only men without women can comprehend how pain...


# Create Word Document
Create a new Word document and add the quotes from the CSV file, possibly organizing by categories or authors if available.

In [27]:
# Create Word Document

# Create a new Word document
doc = Document()

# Add a title to the document
doc.add_heading('Quotes Collection', level=1)

# Iterate through the DataFrame and add quotes to the document
for index, row in df.iterrows():
    quote = row['quote']
    chapter = row['chapter']
    author = row.get('author', 'Unknown')  # Use 'Unknown' if author is not available

    # Add the chapter before the quote 
    doc.add_paragraph(f"Chapter: {chapter}", style='Heading 2')

    # Add the timestamp to the document
    doc.add_paragraph(f"Timestamp: {row['timestamp']}")
    #category = row.get('category', 'General')  # Use 'General' if category is not available
    
    # Add the quote to the document
    doc.add_paragraph(f'"{quote}"')
    
    # Add the author and category
    #doc.add_paragraph(f'- {author} ({category})', style='Quote')
    
    # Add a line break for separation
    doc.add_paragraph()


# Add a footer to the document
footer = doc.sections[0].footer
footer.paragraphs[0].text = "MEN WIHTOUT WOMEN - Haruki Murakami"

# Display the document content
for p in doc.paragraphs:
    print(p.text)

# Save the document - Change the file name as needed to avoid overwriting existing files
try:
    doc.save('Quotes_Collection_menWithoutWomen.docx')
    print("Document saved successfully.")
except Exception as e:
    print(f"Error saving document: {e}")

Quotes Collection
Chapter: Men Without Women
Timestamp: September 04, 2023 20:04
"When i listen to this music i feel like i’m in a wide-open, empty place. it’s a vast space, with nothing to close it off. no walls, no ceiling. i don’t need to think, don’t need to say anything, or do anything. just being there is enough. i close my eyes and give myself up to the beautiful strings. there’re no headaches, no sensitivity to cold, no periods, or ovulation. everything is simply beautiful, peaceful, flowing. i can just be."

Chapter: Men Without Women
Timestamp: September 04, 2023 19:47
"It’s quite easy to become men without women. you love a woman deeply, and then she goes off somewhere. that’s all it takes. most of the time (as i’m sure you’re well aware) it’s crafty sailors who take them away. they sweet-talk them into going with them, then carry them off to marseilles or the ivory coast. and there’s hardly anything we can do about it. or else the women have nothing to do with sailors, and 

# Style and Format Document
Apply formatting to make the document visually appealing, including fonts, spacing, headings, and possibly adding page numbers.

In [28]:
# Style and Format Document
# No additional imports are necessary here since the required libraries are imported immediately below.
from docx.shared import Pt
from docx.oxml.ns import qn
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT

# Set the default font for the document
style = doc.styles['Normal']
font = style.font
font.name = 'Times New Roman'
font.size = Pt(12)

# Apply formatting to the title
title = doc.paragraphs[0]
title.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
run = title.runs[0]
run.font.name = 'Arial'
run.font.size = Pt(24)
run.bold = True

# Apply formatting to the quotes and author/category paragraphs
for para in doc.paragraphs[1:]:
    if para.style.name == 'Normal':
        if para.runs:
            run = para.runs[0]
        else:
            run = para.add_run()
        run.font.size = Pt(14)
        run.italic = True
    elif para.style.name == 'Quote':
        para.alignment = WD_PARAGRAPH_ALIGNMENT.RIGHT
        for run in para.runs:
            run.font.size = Pt(12)
            run.font.bold = True

# Add page numbers to the footer
from docx.oxml import OxmlElement

section = doc.sections[0]
footer = section.footer
if footer.paragraphs:
    footer_paragraph = footer.paragraphs[0]
else:
    footer_paragraph = footer.add_paragraph()
footer_paragraph.text = "Page "
footer_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER

# Create a run and insert field codes for the page number
run = footer_paragraph.add_run()
fldChar_begin = OxmlElement('w:fldChar')
fldChar_begin.set(qn('w:fldCharType'), 'begin')
instrText = OxmlElement('w:instrText')
instrText.text = 'PAGE'
fldChar_separate = OxmlElement('w:fldChar')
fldChar_separate.set(qn('w:fldCharType'), 'separate')
fldChar_end = OxmlElement('w:fldChar')
fldChar_end.set(qn('w:fldCharType'), 'end')

run._r.append(fldChar_begin)
run._r.append(instrText)
run._r.append(fldChar_separate)
run._r.append(fldChar_end)

# Save Word Document
Save the completed document to a file and provide options for the user to specify the output location.

In [8]:
!git add .
!git commit -m "Add code and files from codespace to github"
!git push



[main 55b1bdc] Add code and files from codespace to github
 1 file changed, 0 insertions(+), 0 deletions(-)
Enumerating objects: 7, done.
Counting objects: 100% (7/7), done.
Delta compression using up to 4 threads
Compressing objects: 100% (4/4), done.
Writing objects: 100% (4/4), 558 bytes | 558.00 KiB/s, done.
Total 4 (delta 3), reused 0 (delta 0), pack-reused 0 (from 0)
remote: Resolving deltas: 100% (3/3), completed with 3 local objects.[K
To https://github.com/rohit196/test_demo
   adf760a..55b1bdc  main -> main
