In [3]:


#Import the necessary Libraries
import pandas as pd
from docx import Document

# Load CSV Data
Load quotes from a CSV file using pandas, handling potential encoding issues and different CSV formats.

In [5]:
# Load CSV Data
csv_file_path = 'deep-work---highlights.csv'  # Path to the CSV file

# Load the CSV data into a DataFrame
try:
    df = pd.read_csv(csv_file_path, encoding='utf-8')
except UnicodeDecodeError:
    df = pd.read_csv(csv_file_path, encoding='latin1')

# Display the first few rows of the DataFrame to verify the data
df.head()

Unnamed: 0,timestamp,chapter,percent,color,quote
0,"December 20, 2022 04:09",Rule #4: Drain the Shallows,71.8%,#FFB,"changing your schedule on paper. For example, ..."
1,"December 20, 2022 04:09",Rule #4: Drain the Shallows,71.4%,#FFB,"When you’re done scheduling your day, every mi..."
2,"December 19, 2022 05:08",Rule #4: Drain the Shallows,71.0%,#FFB,Here’s my suggestion: At the beginning of each...
3,"December 19, 2022 02:04",Rule #3: Quit Social Media,66.4%,#FFE0EC,"These services aren’t necessarily, as advertis..."
4,"December 19, 2022 00:02",Rule #3: Quit Social Media,59.0%,#FFB,This rule attempts to break us out of this rut...


# Process Quotes
Clean and process the quotes data, handling any formatting or special characters that may be present.

In [6]:
# Process Quotes

# Clean and process the quotes data, handling any formatting or special characters that may be present.
def clean_quote(quote):
    # Remove leading/trailing whitespace and handle special characters
    return quote.strip().replace('\n', ' ').replace('\r', '')

# Apply the cleaning function to the 'quote' column
df['quote'] = df['quote'].apply(clean_quote)

# Display the cleaned DataFrame to verify the changes
df.head()

Unnamed: 0,timestamp,chapter,percent,color,quote
0,"December 20, 2022 04:09",Rule #4: Drain the Shallows,71.8%,#FFB,"changing your schedule on paper. For example, ..."
1,"December 20, 2022 04:09",Rule #4: Drain the Shallows,71.4%,#FFB,"When you’re done scheduling your day, every mi..."
2,"December 19, 2022 05:08",Rule #4: Drain the Shallows,71.0%,#FFB,Here’s my suggestion: At the beginning of each...
3,"December 19, 2022 02:04",Rule #3: Quit Social Media,66.4%,#FFE0EC,"These services aren’t necessarily, as advertis..."
4,"December 19, 2022 00:02",Rule #3: Quit Social Media,59.0%,#FFB,This rule attempts to break us out of this rut...


# Create Word Document
Create a new Word document and add the quotes from the CSV file, possibly organizing by categories or authors if available.

In [9]:
# Create Word Document

# Create a new Word document
doc = Document()

# Add a title to the document
doc.add_heading('Quotes Collection', level=1)

# Iterate through the DataFrame and add quotes to the document
for index, row in df.iterrows():
    quote = row['quote']
    chapter = row.get('chapter', '')  # Use an empty string if chapter is not available
    author = row.get('author', 'Unknown')  # Use 'Unknown' if author is not available
    category = row.get('category', 'General')  # Use 'General' if category is not available
    
    # Add the quote to the document
    doc.add_paragraph(f'"{quote}"')
    
    # Add the author and category
    doc.add_paragraph(f'- {author} ({category})', style='Quote')
    
    # Add a line break for separation
    doc.add_paragraph()

# Save the document
doc.save('Quotes_Collection.docx')

# Style and Format Document
Apply formatting to make the document visually appealing, including fonts, spacing, headings, and possibly adding page numbers.

In [15]:
# Style and Format Document
# No additional imports are necessary here since the required libraries are imported immediately below.
from docx.shared import Pt
from docx.oxml.ns import qn
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT

# Set the default font for the document
style = doc.styles['Normal']
font = style.font
font.name = 'Times New Roman'
font.size = Pt(12)

# Apply formatting to the title
title = doc.paragraphs[0]
title.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
run = title.runs[0]
run.font.name = 'Arial'
run.font.size = Pt(24)
run.bold = True

# Apply formatting to the quotes and author/category paragraphs
for para in doc.paragraphs[1:]:
    if para.style.name == 'Normal':
        if para.runs:
            run = para.runs[0]
        else:
            run = para.add_run()
        run.font.size = Pt(14)
        run.italic = True
    elif para.style.name == 'Quote':
        para.alignment = WD_PARAGRAPH_ALIGNMENT.RIGHT
        for run in para.runs:
            run.font.size = Pt(12)
            run.font.bold = True

# Add page numbers to the footer
from docx.oxml import OxmlElement

section = doc.sections[0]
footer = section.footer
if footer.paragraphs:
    footer_paragraph = footer.paragraphs[0]
else:
    footer_paragraph = footer.add_paragraph()
footer_paragraph.text = "Page "
footer_paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER

# Create a run and insert field codes for the page number
run = footer_paragraph.add_run()
fldChar_begin = OxmlElement('w:fldChar')
fldChar_begin.set(qn('w:fldCharType'), 'begin')
instrText = OxmlElement('w:instrText')
instrText.text = 'PAGE'
fldChar_separate = OxmlElement('w:fldChar')
fldChar_separate.set(qn('w:fldCharType'), 'separate')
fldChar_end = OxmlElement('w:fldChar')
fldChar_end.set(qn('w:fldCharType'), 'end')

run._r.append(fldChar_begin)
run._r.append(instrText)
run._r.append(fldChar_separate)
run._r.append(fldChar_end)

# Save Word Document
Save the completed document to a file and provide options for the user to specify the output location.