In [2]:
import os
from pathlib import Path
from dataclasses import dataclass
from configparser import ConfigParser
import yaml
import re
import pandas as pd
from tqdm.auto import tqdm
from datetime import datetime
import markdown
import nbconvert
from collections import defaultdict
from dataclasses import dataclass

%load_ext autoreload
%autoreload 2
import blog

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## config

All the config is saved in a config.ini file

In [3]:
config = ConfigParser()
config.read("config.ini")
for section in config.sections():
    print(f"{section}: {config.items(section)}")

blog: [('content', 'notes'), ('publish', 'gh-pages')]


In [7]:
blog.path_content, blog.path_publish

(PosixPath('notes'), PosixPath('gh-pages'))

## test blog.py

In [4]:
posts, postsdict, tags = blog.get_posts()
tags # dict of tags to posts. sort this too?

defaultdict(set,
            {'apps': {'vs_code'},
             'chromebooks': {'setup-crostini-chromebook'},
             'python': {'anaconda', 'building_this_blog', 'note_in_subfolder'},
             'jupyter': {'jupyterlab'}})

In [10]:
for t in tags.keys():
    print(f"tag: {t}")
    for s in sorted(tags[t], key = lambda x: postsdict[x].date, reverse=True):
        print(postsdict[s].slug, postsdict[s].date)
        #print(postsdict[s].html[:22])
    print("---------------")

tag: apps
vs_code 2019-10-10
---------------
tag: chromebooks
setup-crostini-chromebook 2018-08-24
---------------
tag: python
anaconda 2020-06-17
note_in_subfolder 2020-06-14
building_this_blog 2020-06-14
---------------
tag: jupyter
jupyterlab 2019-08-04
---------------


In [118]:
for tag, pp in tags.items():
    print(tag, pp)
    for p in pp:

apps {'vs_code'}
chromebooks {'setup-crostini-chromebook'}
python {'building_this_blog', 'anaconda', 'note_in_subfolder'}
jupyter {'jupyterlab'}


In [11]:
for p in posts:
    print(p.slug, p.date)
    

anaconda 2020-06-17
building_this_blog 2020-06-14
note_in_subfolder 2020-06-14
vs_code 2019-10-10
jupyterlab 2019-08-04
setup-crostini-chromebook 2018-08-24


In [116]:
for slug, p in postsdict.items():
    print(slug, p.date)
    if p.toc:
        print(p.toc[:20])
    else:
        print("no toc")

anaconda 2020-06-17
no toc
building_this_blog 2020-06-14
<div class="toc">
<u
note_in_subfolder 2020-06-14
<div class="toc">
<u
vs_code 2019-10-10
no toc
jupyterlab 2019-08-04
no toc
setup-crostini-chromebook 2018-08-24
<div class="toc">
<u


## Pinboard

In [134]:
pb_token = os.environ.get("PB_TOKEN")
pb_token

In [147]:
import pinboard
pb = pinboard.Pinboard("KO:821C1E5B824A6732581B")

# check its working
pb.posts.update() # should return most recent updated date

datetime.datetime(2020, 6, 24, 4, 39, 45)

In [175]:
all_posts = pb.posts.all() # list of all bookmarks

In [196]:
# the api returns a class obj
bookmark = all_posts[0]
# useful fields in that object:
bookmark.description, bookmark.tags, bookmark.extended, bookmark.url, bookmark.time

('What I learned from looking at 200 machine learning tools',
 ['ai'],
 '',
 'https://huyenchip.com/2020/06/22/mlops.html',
 datetime.datetime(2020, 6, 24, 4, 39, 45))

In [149]:
links = pb.posts.recent()

In [171]:
for b in links["posts"]:
    print(b.time, b.tags)

2020-06-24 04:39:45 ['ai']
2020-06-22 03:38:00 ['python']
2020-06-22 03:36:08 ['python']
2020-06-15 06:40:30 ['apps', 'markdown']
2020-06-14 07:20:02 ['windows', 'apps']
2020-06-13 20:06:16 ['']
2020-06-13 12:18:36 ['recipes']
2020-06-13 05:03:24 ['games']
2020-06-12 14:13:48 ['dataviz']
2020-06-12 01:04:52 ['art', 'posters']
2020-06-10 23:44:52 ['ai']
2020-06-04 05:10:27 ['blogs']
2020-06-04 02:11:58 ['analytics']
2020-06-02 07:29:04 ['streamlit']
2020-05-31 03:00:33 ['books', 'programming']


In [172]:
tag_posts = pb.posts.all(tag=["programming"])

In [174]:
for b in tag_posts:
    print(b.time, b.tags)

2020-05-31 03:00:33 ['books', 'programming']
2018-03-14 07:07:19 ['programming']
2017-02-27 23:27:15 ['python', 'programming', 'bayes']
2016-11-15 22:35:31 ['programming', 'poster']
2016-11-15 11:32:49 ['programming']


## Read each file and parse front matter

In [34]:
from markdown.extensions.codehilite import CodeHilite
codehilite = CodeHilite(noclasses=True)

In [39]:
txt = """

This is a "test" para. Here goes a line break:
what does it do?

```python
print(3+3) # this is a comment
```

## Bash

Most remote VM's are running a basic bash shell. Fix that by running

```bash
sudo apt update &&
sudo apt install htop neofetch -y &&
wget https://github.com/sharkdp/bat/releases/download/v0.15.1/bat-musl_0.15.1_amd64.deb &&
sudo dpkg -i bat-musl_0.15.1_amd64.deb &&
rm bat-musl_0.15.1_amd64.deb &&
sh -c "$(curl -fsSL https://raw.github.com/ohmybash/oh-my-bash/master/tools/install.sh)"
```

## repos

This updates repos - but doesn't upgrade as no need generally for a short lived VM and installs

- [htop](https://hisham.hm/htop/): monitor machine use
- neofetch: show machine overview
- [Bat](https://github.com/sharkdp/bat): better cat with syntax highlighting
- [oh-my-bash](https://ohmybash.github.io/) - makes the terminal look good

If working with files:

```bash
sudo apt install fzf mc -y
```

- mc: file manager
- fzf: fuzzy search

## git

Setup git by running the following inside a git repo (if needed).

```bash
git config --global user.name "khalido" &&
git config --global user.email "khalid.omar@gmail.com" &&
git config credential.helper 'cache --timeout=21600'
```



Note, there has gotta be a better way to do this.
"""

In [40]:
# make enters into line breaks by adding "nl2br"
extensions=["extra", "toc", "codehilite"] # , "smarty" 

# https://help.farbox.com/pygments.html
# monokai default

extension_configs = {
    'codehilite': {
        'noclasses': True,
        'linenums': False,
        'pygments_style': "monokai"
    },
}

In [41]:
print(md.toc)

<div class="toc">
<ul>
<li><a href="#twitter-embed">twitter embed</a></li>
</ul>
</div>



In [10]:
# configure python markdown parser

# make enters into line breaks by adding "nl2br"
extensions=["extra", "toc", "codehilite"] # , "smarty" 

# https://help.farbox.com/pygments.html
# monokai default

extension_configs = {
    'codehilite': {
        'noclasses': True,
        'linenums': False,
        'pygments_style': "monokai"
    },
}

md = markdown.Markdown(extensions=extensions, extension_configs=extension_configs)

# paths
md_paths = [f for f in path.rglob("*.md")]
notebook_paths = [f for f in path.rglob("*.ipynb")]

data = [] # holds all the posts
tags = defaultdict(set) # holds set of all posts for every tag

for p in md_paths:
    d = {} # one dict for each post
    all_txt = p.read_text() 

    # extract front matter b/w "---" lines
    n = all_txt[3:].find("---") + 3
    fm = yaml.load(all_txt[:n]) # front matter dict
    txt = all_txt[n+3:].strip() # text excluding front matter

    # now add values I care about to the dict
    d["title"] = fm["title"]
    
    if fm.get("slug"):
        d["slug"] = fm["slug"]
    else:
        d["slug"] = p.name.split(".")[0]

    # add created date
    try:
        d["date"] = fm["date"]
    except:
        dt = datetime.fromtimestamp(p.stat().st_ctime) # create time of file
        d["date"] = dt

    # add last modified data
    try:
        d["lastmod"] = fm["lastmod"]
    except:
        dt = datetime.fromtimestamp(p.stat().st_mtime)
        d["lastmod"] = dt
    
    d["path"] = p
    d["filename"] = p.name
    d["front_matter"] = fm
    d["markdown"] = txt
    
    d["tags"] = fm["tags"]
    for tag in fm["tags"]:
        tags[tag].add(d["slug"])

    d["html"] = md.convert(txt) 
    
    data.append(d)

df = pd.DataFrame(data) #.set_index("filename")
print(f"{len(df)} posts converted and saved in dataframe")
df

6 posts converted and saved in dataframe


Unnamed: 0,title,slug,date,lastmod,path,filename,front_matter,markdown,tags,html
0,Visual Studio Code,vs_code,2019-10-10 00:00:00.000,2019-10-10 00:00:00.000,notes/vs_code.md,vs_code.md,"{'title': 'Visual Studio Code', 'date': 2019-1...","VS Code has so many extensions and stuff, that...",[apps],"<p>VS Code has so many extensions and stuff, t..."
1,Setting up Linux on a Chromebook with Crostini,setup-crostini-chromebook,2018-08-24 00:00:00.000,2019-08-07 00:00:00.000,notes/setup-crostini-chromebook.md,setup-crostini-chromebook.md,{'title': 'Setting up Linux on a Chromebook wi...,ChromeOS supports a built in Linux - running i...,[chromebooks],<p>ChromeOS supports a built in Linux - runnin...
2,Using Anaconda,anaconda,2020-06-17 10:05:57.290,2020-06-17 10:05:57.290,notes/anaconda.md,anaconda.md,"{'title': 'Using Anaconda', 'tags': ['python']}",## Install Anaconda\n\nThere seems to be two m...,[python],"<h2 id=""install-anaconda"">Install Anaconda</h2..."
3,Jupyter Lab tips and tricks,jupyterlab,2019-08-04 00:00:00.000,2020-06-17 10:06:10.030,notes/jupyterlab.md,jupyterlab.md,"{'title': 'Jupyter Lab tips and tricks', 'date...",My collection of tips and tricks for using jup...,[jupyter],<p>My collection of tips and tricks for using ...
4,Setting up my own blog,building_this_blog,2020-06-14 00:00:00.000,2019-08-07 00:00:00.000,notes/building_this_blog.md,building_this_blog.md,"{'title': 'Setting up my own blog', 'date': 20...",Hereby I talk through writing my own blog engi...,[python],<p>Hereby I talk through writing my own blog e...
5,Testing note in a subfolder,note_in_subfolder,2020-06-14 00:00:00.000,2019-08-07 00:00:00.000,notes/sub_notes/note_in_subfolder.md,note_in_subfolder.md,"{'title': 'Testing note in a subfolder', 'date...",This note is inside a subfolder.\n\n## twitter...,[python],<p>This note is inside a subfolder.</p>\n<h2 i...


## Output

In [64]:
for post in df.itertuples():
    print(post.tags)

['apps']
['chromebooks']
['python']
['jupyter']
['python']
['python']
