In [None]:
!git clone https://github.com/sun-yitao/automation_with_python.git

In [None]:
from datetime import datetime
from pathlib import Path

In [None]:
notes_directory = Path.cwd() / 'automation_with_python' / 'data' / 'text'

In [None]:
notes_directory

## Glob all txt files in notes_directory

In [None]:
filepaths = notes_directory.glob('*.txt')

In [None]:
# note that this exhausts the generator
list(filepaths)

In [None]:
today_date = datetime.today().strftime('%d/%m/%Y')

In [None]:
today_date

## Insert date in first line for all txt files

In [None]:
output_directory = Path.cwd() / 'output' / 'text'

In [None]:
output_directory.mkdir(parents=True)

In [None]:
for filepath in filepaths:
    with open(filepath, 'r') as f:
        text = f.read()
        
    with open(output_directory / filepath.name, 'w') as f:
        f.write(f'{today_date}\n' + text)

## Find overused words in your essay and get synonyms

In [None]:
import nltk # pip install nltk
nltk.download('punkt')
nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from collections import Counter

In [None]:
!pip install python-docx
import docx

In [None]:
def get_text_from_docx(filename):
    doc = docx.Document(filename)
    full_text = []
    for paragraph in doc.paragraphs:
        full_text.append(paragraph.text)
    return '\n'.join(full_text)

In [None]:
txt_path = notes_directory / 'asteroid.docx'
txt_raw = get_text_from_docx(txt_path)

In [None]:
stop_words = set(stopwords.words('english'))
counter = Counter()
words = word_tokenize(txt_raw)

In [None]:
# remove punctuation and common words
for word in words:
    if word.isalpha() and word not in stop_words: 
        counter[word] += 1

In [None]:
print(counter)

In [None]:
word_counts = [(count, word) for word, count in counter.items()]
word_counts.sort(reverse=True)
print(word_counts)

In [None]:
nltk.download('omw-1.4')
nltk.download('wordnet')
from nltk.corpus import wordnet

In [None]:
for count, word in word_counts[:10]:
    synonyms = set()
    for synset in wordnet.synsets(word):
        for name in synset.lemma_names():
            synonyms.add(name)
    synonyms.discard(word)
    print(f'"{word}" used {count} times, synonyms: {synonyms if synonyms else "none"}')

## Automatically Sorting Your Downloads Folder

In [None]:
folder_to_sort = Path('/path/to/Downloads')
filetype_to_folder_mapping = {
    '.png': 'images',
    '.jpg': 'images',
    '.jpeg': 'images',
    '.gif': 'images',
    '.pdf': 'pdfs',
    '.mp4': 'videos',
    '.mp3': 'audio',
    '.zip': 'bundles',
}

In [None]:
for filepath in folder_to_sort.glob('*'):
    file_extension = filepath.suffix
    if file_extension in filetype_to_folder_mapping:
        destination = folder_to_sort / filetype_to_folder_mapping[file_extension]
        if not destination.exists():
            destination.mkdir()
        filepath.rename(destination / filepath.name)