Skip to content

Commit

Permalink
Wordcloud du site (#136)
Browse files Browse the repository at this point in the history
Les principaux changements sont dans le dossier `build`
  • Loading branch information
linogaliana committed Sep 9, 2021
1 parent 04f8b8f commit 8166e22
Show file tree
Hide file tree
Showing 12 changed files with 921 additions and 80 deletions.
21 changes: 17 additions & 4 deletions .Rprofile
Expand Up @@ -27,7 +27,7 @@ badge <- function(type = "onyxia"){



reminder_badges <- function(notebook = ""){
reminder_badges <- function(notebook = "", onyxia_only = FALSE){

if (notebook != ""){
if (!endsWith(notebook, ".ipynb")){
Expand Down Expand Up @@ -60,7 +60,20 @@ reminder_badges <- function(notebook = ""){
)
colab_link <- sprintf("[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](http://colab.research.google.com/github/linogaliana/python-datascientist/blob/master%s)",
notebook)


if (isTRUE(onyxia_only)){
return(
cat(
c(
github_link,
nbviewer_link,
onyxia_link
),
sep = "\n"
)
)
}

return(
cat(
c(
Expand All @@ -77,14 +90,14 @@ reminder_badges <- function(notebook = ""){
}


print_badges <- function(fpath = NULL){
print_badges <- function(fpath = NULL, onyxia_only = FALSE){
if (is.null(fpath)){
fpath <- knitr::current_input(dir = TRUE)
}
fpath <- gsub(paste0(here::here(),"/./"), "", fpath)
fpath <- gsub("Rmd", "ipynb", fpath)
fpath <- gsub("content","notebooks",fpath)
reminder_badges(fpath)
reminder_badges(fpath, onyxia_only = onyxia_only)
}

github_link <- function(fpath = NULL){
Expand Down
19 changes: 12 additions & 7 deletions .github/workflows/netlify-test.yaml
Expand Up @@ -34,9 +34,20 @@ jobs:
run: |
conda info
conda list
- name: Render blog
- name: Build to md
run: |
Rscript -e 'source("./build/build_light.R")'
- name: Clean files with Python function
run: |
rm -rf "./temp"
mkdir -p temp
mkdir -p notebooks
pip install matplotlib wordcloud nltk
python build/cleanmd.py
python build/wc_website.py
- name: Render blog
run: |
Rscript -e 'source("./build/netlify.R")'
- name: Install npm
if: ${{ github.event.pull_request.head.repo.full_name == github.repository }}
uses: actions/setup-node@v2
Expand All @@ -58,12 +69,6 @@ jobs:
with:
name: content
path: content/
- name: Clean files with Python function
run: |
rm -rf "./temp"
mkdir -p temp
mkdir -p notebooks
python cleanmd.py
- name: Convert in ipynb with Jupytext
run: |
pip install jupytext
Expand Down
11 changes: 11 additions & 0 deletions .github/workflows/prod.yml
Expand Up @@ -35,6 +35,17 @@ jobs:
- name: Render blog
run: |
Rscript -e 'source("./build/build.R")'
- name: Create
run: |
rm -rf "./temp"
mkdir -p temp
mkdir -p notebooks
pip install matplotlib wordcloud nltk
python build/cleanmd.py
python build/wc_website.py
- name: Render blog
run: |
Rscript -e 'source("./build/netlify.R")'
- name: Install npm
if: ${{ github.repository == 'linogaliana/python-datascientist' }}
uses: actions/setup-node@v2
Expand Down
39 changes: 0 additions & 39 deletions .github/workflows/test-light.yaml

This file was deleted.

11 changes: 0 additions & 11 deletions build/build.R
Expand Up @@ -21,17 +21,6 @@ file.remove(



Sys.setenv(HUGO_IGNOREERRORS = "error-remote-getjson",
HUGO_BASEURL = "/",
#HUGO_BASEURL = "https://linogaliana-teaching.netlify.app/",
HUGO_RELATIVEURLS = "false",
BLOGDOWN_POST_RELREF = "true",
BLOGDOWN_SERVING_DIR = here::here())

cmd = blogdown:::find_hugo()
cmd_args = c("--themesDir themes", "-t github.com")#, "--gc")#, "--minify")
system2(cmd, cmd_args)


#blogdown::stop_server()

Expand Down
14 changes: 1 addition & 13 deletions build/build_light.R
@@ -1,7 +1,7 @@
content_rmd <- list.files("./content", recursive = TRUE, pattern = "*.Rmd", full.names = TRUE)
content_rmd <- content_rmd[!grepl("/git/", content_rmd)]
content_rmd <- content_rmd[!grepl("06a_exo_supp_webscraping.", content_rmd)]
content_rmd <- content_rmd[21]
content_rmd <- content_rmd[5]

file.remove(
gsub(
Expand All @@ -24,18 +24,6 @@ file.remove(



Sys.setenv(HUGO_IGNOREERRORS = "error-remote-getjson",
HUGO_BASEURL = "/",
#HUGO_BASEURL = "https://linogaliana-teaching.netlify.app/",
HUGO_RELATIVEURLS = "false",
BLOGDOWN_POST_RELREF = "true",
BLOGDOWN_SERVING_DIR = here::here())

cmd = blogdown:::find_hugo()
cmd_args = c("--themesDir themes", "-t github.com")#, "--gc")#, "--minify")
system2(cmd, cmd_args)


#blogdown::stop_server()


Expand Down
File renamed without changes.
11 changes: 11 additions & 0 deletions build/netlify.R
@@ -0,0 +1,11 @@

Sys.setenv(HUGO_IGNOREERRORS = "error-remote-getjson",
HUGO_BASEURL = "/",
#HUGO_BASEURL = "https://linogaliana-teaching.netlify.app/",
HUGO_RELATIVEURLS = "false",
BLOGDOWN_POST_RELREF = "true",
BLOGDOWN_SERVING_DIR = here::here())

cmd = blogdown:::find_hugo()
cmd_args = c("--themesDir themes", "-t github.com")#, "--gc")#, "--minify")
system2(cmd, cmd_args)
66 changes: 61 additions & 5 deletions build/wc_website.py
@@ -1,11 +1,67 @@
import glob
import matplotlib
import matplotlib.pyplot as plt
import wordcloud
import numpy as np
import PIL
import io
import requests
import random
import re

list_files = glob.glob("./content/course/**/*.Rmd", recursive=True)
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')
stop_words = set(stopwords.words('french'))

filename = list_files[0]
list_files = glob.glob("./temp/course/**/*.Rmd", recursive=True)

with open(filename, encoding='utf-8') as f:
content = f.readlines()

book_mask = np.array(PIL.Image.open("./build/python_black.png"))


def read_file(filename):
with open(filename, 'r', encoding='utf-8') as f:
text = f.readlines()
f.close()
new_text = " ".join([line for line in text])
s = new_text
return s

def grey_color_func(word, font_size, position, orientation, random_state=None,
**kwargs):
return "hsl(0, 0%%, %d%%)" % random.randint(60, 100)

def make_wordcloud(corpus):
wc = wordcloud.WordCloud(mask=book_mask, max_words=2000, margin=10, contour_width=3, contour_color='white')
wc.generate(corpus).recolor(color_func=grey_color_func, random_state=3)
return wc

def keep_text_within_shortword(shortcode):
return re.sub(re.compile("(\{\{).*(\}\}\\n)|(\\n\{\{).*(\}\})"),"",shortcode)

def clean_file(text):
text = " ".join(text).lower()
s = keep_text_within_shortword(text)
# REMOVE R CHUNKS ------
s = re.sub(r'(?s)(```\{r)(.*?)(```)', "", s)
s = re.sub(r'`', '', s)
return s

list_content = [read_file(fl) for fl in list_files]

corpus = clean_file(text = list_content)

corpus = corpus.split(" ")
corpus = [w for w in corpus if not w in stop_words]
#corpus = [word for word in corpus if word.isalpha()]
corpus = " ".join(corpus)

fig = plt.figure()

plt.imshow(make_wordcloud(corpus), interpolation='bilinear')
plt.axis("off")
plt.tight_layout()
plt.savefig('./content/home/word.png', bbox_inches='tight', pad_inches = 0, dpi=199)


content

0 comments on commit 8166e22

Please sign in to comment.