# Script to Make Site
This script creates a website ready to push to github. It scans the content directory for all jupyter notebooks, converts the jupyter notebook files to html, and updates the home page with links to these html files.

## Load libraries

In [13]:
# Load libraries
import os
import re
import fileinput
import sys
from glob import glob
import shutil
from titlecase import titlecase

# Set path to content
path = 'content/'

## Extract all Jupyter notebook filenames

In [14]:
# Find all jupyter notebooks in all content folders
all_ipynb_files = [os.path.join(root, name)
                   for root, dirs, files in os.walk(path)
                       for name in files
                           if name.endswith((".ipynb"))]

# Remove make file from list
del all_ipynb_files[0]

# Filter out checkpoint files
ipynb_files = [x for x in all_ipynb_files if ".ipynb_checkpoints" not in x]

# View file list
print(ipynb_files)

['content/python/sets.ipynb', 'content/python/if_else.ipynb', 'content/python/linked_lists.ipynb', 'content/python/pandas_basics.ipynb', 'content/python/loops.ipynb', 'content/python/list_comprehension.ipynb', 'content/python/dictionaries.ipynb', 'content/web_scraping/scraping_job_listings_with_lxml.ipynb', 'content/web_scraping/scrape_fanduel_nba_player_stats_with_scrapy.ipynb', 'content/web_scraping/scraping_stock_market_news_and_updates_with_lxml.ipynb', 'content/web_scraping/beautiful_soup_basics.ipynb', 'content/web_scraping/scrape_nba_player_rpm_stats_from_espn_with_lxml.ipynb', 'content/web_scraping/scrape_nba_player_game_logs_from_espn_with_lxml.ipynb', 'content/web_scraping/scrapy_basics.ipynb', 'content/web_scraping/scrape_historical_ohlc_stock_prices.ipynb', 'content/data_science/Fixing Assumptions.ipynb', 'content/data_science/analysis_of_boston_house_prices.ipynb', 'content/data_science/home_credit_default_risk_analysis.ipynb', 'content/data_science/In Sample Evaluation an

## Extract category titles and post titles

In [15]:
# Extract category titles and post titles
posts = {}

for post in ipynb_files:
    # Extract category
    category = titlecase(post.split('/')[1].replace('_', ' '))
    
    # Extract post
    post_title = titlecase(post.split('/')[2].replace('.ipynb', '').replace('_', ' '))
    
    # Add category if not in dictionary
    if category not in posts.keys():
        posts[category] = []
        # Append post to dictionary
        posts[category].append(post_title)
    # Otherwise, just append post
    else:
        posts[category].append(post_title)

# View dictionary
print(posts)

{'Python': ['Sets', 'If Else', 'Linked Lists', 'Pandas Basics', 'Loops', 'List Comprehension', 'Dictionaries'], 'Web Scraping': ['Scraping Job Listings With Lxml', 'Scrape Fanduel Nba Player Stats With Scrapy', 'Scraping Stock Market News and Updates With Lxml', 'Beautiful Soup Basics', 'Scrape Nba Player Rpm Stats From Espn With Lxml', 'Scrape Nba Player Game Logs From Espn With Lxml', 'Scrapy Basics', 'Scrape Historical Ohlc Stock Prices'], 'Data Science': ['Fixing Assumptions', 'Analysis of Boston House Prices', 'Home Credit Default Risk Analysis', 'In Sample Evaluation and Cross Validation', 'Fixing Assumptions', 'Iterate and Evaluate a Naive Bayes Classifier', 'Preparing Data', 'Preparing Data', 'In Sample Evaluation and Cross Validation'], 'Data Engineering': ['Plotting S Parameter Distributions With Matplotlib', 'Plotting Current and Gain Distributions With Matplotlib', 'Plotting Current and Gain Distributions With Matplotlib 2']}


## Convert files to HTML

In [16]:
# For each file
for file in ipynb_files:
    # Convert into markdown
    os.system('jupyter nbconvert --to html_embed {file}'.format(file=file))

## Update homepage

In [17]:
# Open homepage markdown file
f = open('README.md', 'w')

header1 = "## <center>Python • Data Science • Machine Learning</center>"
header2 = "## <center>Technical Notes</center>\n\n"

#intro = "Welcome to my site. I am a data scientist who is fascinated with solving challenging data science problems across a variety of fields. Check out my technical notes relating to python and data science below!\n"

intro = "I am a data scientist who is fascinated with solving challenging data-oriented problems across a variety of fields. I enjoy seeking hidden truths in data. Check out my technical notes on python and data science below!"

# Write title and intro
f.write(header)
f.write(intro)

# Write categories and post titles/links
for category, titles in posts.items():
    f.write("\n> **_" + category + "_**\n")
    for title in titles:
        f.write("> - [" + title + "](https://rakeshbhatia.github.io/notes/content/" + "_".join([x.lower() for x in category.split(" ")]) + "/" + "_".join([x.lower() for x in title.split(" ")]) + ")" + "\n")

f.write("\nCopyright © Rakesh Bhatia, September 2019. All notes available on [GitHub](https://github.com/rakeshbhatia/notes).")
        
f.close()