# Script to Make Site
This script creates a website ready to push to github. It scans the content directory for all jupyter notebooks, converts the jupyter notebook files to html, and updates the home page with links to these html files.

## Load libraries

In [1]:
# Load libraries
import os
import re
import fileinput
import sys
from glob import glob
import shutil
from titlecase import titlecase

# Set path to content
path = 'content/'

## Extract all Jupyter notebook filenames

In [2]:
# Find all jupyter notebooks in all content folders
all_ipynb_files = [os.path.join(root, name)
                   for root, dirs, files in os.walk(path)
                       for name in files
                           if name.endswith((".ipynb"))]

# Remove make file from list
del all_ipynb_files[0]

# Filter out checkpoint files
ipynb_files = [x for x in all_ipynb_files if ".ipynb_checkpoints" not in x]

# View file list
print(ipynb_files)

['content/python/exception_handling.ipynb', 'content/python/sets.ipynb', 'content/python/if_else.ipynb', 'content/python/linked_lists.ipynb', 'content/python/pandas_basics.ipynb', 'content/python/loops.ipynb', 'content/python/list_comprehension.ipynb', 'content/python/dictionaries.ipynb', 'content/python/strings.ipynb', 'content/web_scraping/scrape_digikey_table_data_with_pandas_and_selenium.ipynb', 'content/web_scraping/scrape_house_listings_with_selenium.ipynb', 'content/web_scraping/scraping_job_listings_with_lxml.ipynb', 'content/web_scraping/scrape_free_proxies_with_selenium.ipynb', 'content/web_scraping/scrape_fanduel_nba_player_stats_with_scrapy.ipynb', 'content/web_scraping/scrape_linkedin_with_selenium.ipynb', 'content/web_scraping/lxml_basics.ipynb', 'content/web_scraping/scrape_crypto_prices_with_selenium.ipynb', 'content/web_scraping/scraping_stock_market_news_and_updates_with_lxml.ipynb', 'content/web_scraping/beautiful_soup_basics.ipynb', 'content/web_scraping/scrape_nba_

## Extract category titles and post titles

In [3]:
# Extract category titles and post titles
posts = {}

for post in ipynb_files:
    # Extract category
    category = titlecase(post.split('/')[1].replace('_', ' '))
    
    # Extract post
    post_title = titlecase(post.split('/')[2].replace('.ipynb', '').replace('_', ' '))
    
    # Add category if not in dictionary
    if category not in posts.keys():
        posts[category] = []
        # Append post to dictionary
        posts[category].append(post_title)
    # Otherwise, just append post
    else:
        posts[category].append(post_title)

# View dictionary
print(posts)

{'Python': ['Exception Handling', 'Sets', 'If Else', 'Linked Lists', 'Pandas Basics', 'Loops', 'List Comprehension', 'Dictionaries', 'Strings'], 'Web Scraping': ['Scrape Digikey Table Data With Pandas and Selenium', 'Scrape House Listings With Selenium', 'Scraping Job Listings With LXML', 'Scrape Free Proxies With Selenium', 'Scrape Fanduel Nba Player Stats With Scrapy', 'Scrape Linkedin With Selenium', 'LXML Basics', 'Scrape Crypto Prices With Selenium', 'Scraping Stock Market News and Updates With LXML', 'Beautiful Soup Basics', 'Scrape Nba Player RPM Stats From Espn With LXML', 'Scrape Attorneys With Selenium and Beautifulsoup Part1', 'Selenium Basics', 'Scrape Proxy Table With Nodejs', 'Scrape Nba Player Game Logs From Espn With LXML', 'Scrape Proxy Table With Pandas', 'Get Domain Information With Whois', 'Scrapy Basics', 'Scrape Historical Ohlc Stock Prices', 'Scrape Car Dealerships'], 'Data Science': ['Iterate and Evaluate a Naive Bayes Classifier', 'Analysis of Boston House Pric

## Convert files to markdown

In [4]:
# For each file
for file in ipynb_files:
    # Convert into markdown
    os.system('jupyter nbconvert --to markdown {file}'.format(file=file))

[NbConvertApp] Converting notebook content/python/exception_handling.ipynb to markdown
[NbConvertApp] Writing 993 bytes to content/python/exception_handling.md
[NbConvertApp] Converting notebook content/python/sets.ipynb to markdown
[NbConvertApp] Writing 1328 bytes to content/python/sets.md
[NbConvertApp] Converting notebook content/python/if_else.ipynb to markdown
[NbConvertApp] Writing 1154 bytes to content/python/if_else.md
[NbConvertApp] Converting notebook content/python/linked_lists.ipynb to markdown
[NbConvertApp] Writing 5349 bytes to content/python/linked_lists.md
[NbConvertApp] Converting notebook content/python/pandas_basics.ipynb to markdown
[NbConvertApp] Writing 3990 bytes to content/python/pandas_basics.md
[NbConvertApp] Converting notebook content/python/loops.ipynb to markdown
[NbConvertApp] Writing 1533 bytes to content/python/loops.md
[NbConvertApp] Converting notebook content/python/list_comprehension.ipynb to markdown
[NbConvertApp] Writing 1957 bytes to content/p

[NbConvertApp] Converting notebook content/data_science/fixing_assumptions.ipynb to markdown
[NbConvertApp] Support files will be in fixing_assumptions_files/
[NbConvertApp] Making directory content/data_science/fixing_assumptions_files
[NbConvertApp] Making directory content/data_science/fixing_assumptions_files
[NbConvertApp] Making directory content/data_science/fixing_assumptions_files
[NbConvertApp] Making directory content/data_science/fixing_assumptions_files
[NbConvertApp] Writing 13199 bytes to content/data_science/fixing_assumptions.md
[NbConvertApp] Converting notebook content/data_science/author_prediction_unsupervised_nlp_with_bow.ipynb to markdown
[NbConvertApp] Support files will be in author_prediction_unsupervised_nlp_with_bow_files/
[NbConvertApp] Making directory content/data_science/author_prediction_unsupervised_nlp_with_bow_files
[NbConvertApp] Making directory content/data_science/author_prediction_unsupervised_nlp_with_bow_files
[NbConvertApp] Making directory c

[NbConvertApp] Converting notebook content/data_engineering/plotting_current_and_gain_distributions_with_matplotlib.ipynb to markdown
[NbConvertApp] Support files will be in plotting_current_and_gain_distributions_with_matplotlib_files/
[NbConvertApp] Making directory content/data_engineering/plotting_current_and_gain_distributions_with_matplotlib_files
[NbConvertApp] Making directory content/data_engineering/plotting_current_and_gain_distributions_with_matplotlib_files
[NbConvertApp] Making directory content/data_engineering/plotting_current_and_gain_distributions_with_matplotlib_files
[NbConvertApp] Making directory content/data_engineering/plotting_current_and_gain_distributions_with_matplotlib_files
[NbConvertApp] Making directory content/data_engineering/plotting_current_and_gain_distributions_with_matplotlib_files
[NbConvertApp] Making directory content/data_engineering/plotting_current_and_gain_distributions_with_matplotlib_files
[NbConvertApp] Making directory content/data_engi

## Update homepage

In [5]:
'''# Open homepage markdown file
f = open('index2.md', 'w')

header1 = "## <center>Python • Data Science • Machine Learning</center>\n"
header2 = "## <center>Technical Notes</center>\n\n"

#intro = "Welcome to my site. I am a data scientist who is fascinated with solving challenging data science problems across a variety of fields. Check out my technical notes relating to python and data science below!\n"
#intro = "I am a data scientist who is fascinated with solving challenging data-oriented problems across a variety of fields. I enjoy seeking hidden truths in data. Check out my technical notes on python and data science below!"
intro = "Hello! I'm Rakesh Bhatia, a software freelancer who specializes in web scraping, web development, and data science. Welcome to my portfolio. I enjoy searching for hidden truths in data, which inspired me to create this website with a variety of technical notes and projects on python, data science, machine learning, and more. Check out all of my posts below!"

# Write title and intro
f.write(header1)
f.write(header2)
f.write(intro)

# Write categories and post titles/links
for category, titles in posts.items():
    f.write("\n> **_" + category + "_**\n")
    for title in titles:
        f.write("> - [" + title + "](https://rakeshbhatia.github.io/notes/content/" + "_".join([x.lower() for x in category.split(" ")]) + "/" + "_".join([x.lower() for x in title.split(" ")]) + ")" + "\n")

f.write("\nCopyright © Rakesh Bhatia, 2022. All notes available on [GitHub](https://github.com/rakeshbhatia/notes).")

f.close()'''

'# Open homepage markdown file\nf = open(\'index2.md\', \'w\')\n\nheader1 = "## <center>Python • Data Science • Machine Learning</center>\n"\nheader2 = "## <center>Technical Notes</center>\n\n"\n\n#intro = "Welcome to my site. I am a data scientist who is fascinated with solving challenging data science problems across a variety of fields. Check out my technical notes relating to python and data science below!\n"\n#intro = "I am a data scientist who is fascinated with solving challenging data-oriented problems across a variety of fields. I enjoy seeking hidden truths in data. Check out my technical notes on python and data science below!"\nintro = "Hello! I\'m Rakesh Bhatia, a software freelancer who specializes in web scraping, web development, and data science. Welcome to my portfolio. I enjoy searching for hidden truths in data, which inspired me to create this website with a variety of technical notes and projects on python, data science, machine learning, and more. Check out all o