In [1]:
# download latest version of pip, numpy, pandas if desired
!pip install --upgrade pip
!pip install --upgrade numpy pandas

Looking in indexes: https://registry.affirm-stage.com/artifactory/api/pypi/pypi/simple
Looking in links: file:///mnt/pip/wheelhouse
Requirement already up-to-date: pip in /home/benson/fpna/.virtualenvs/fpna/lib/python3.7/site-packages (20.1.1)
Looking in indexes: https://registry.affirm-stage.com/artifactory/api/pypi/pypi/simple
Looking in links: file:///mnt/pip/wheelhouse
Requirement already up-to-date: numpy in /home/benson/fpna/.virtualenvs/fpna/lib/python3.7/site-packages (1.18.4)
Requirement already up-to-date: pandas in /home/benson/fpna/.virtualenvs/fpna/lib/python3.7/site-packages (1.0.3)


In [2]:
import yaml
import os

import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
from sklearn.decomposition import PCA

import seaborn as sns

## Some setup

In [3]:
rand_incorr = ["Not quite, but you're learning! Try again.", 
               "That's not the answer I was looking for, but try again.",
               "Not exactly. Give it another go.",
               "You're close...I can feel it! Try it again.",
               "Nice try, but that's not exactly what I was hoping for. Try again.",
               "One more time. You can do it!"]

rand_corr = ["Excellent job!",
             "Nice job!",
             "You got it!"]

In [4]:
# Parse lesson
def process_lesson(yaml):
    idx = 0
    repeat_q_flag = False
    idx_lesson = len(lesson)
    while idx < idx_lesson:
        if lesson[idx]['Class'] == 'meta':
            proc_meta(lesson[idx])
            idx += 1
        elif lesson[idx]['Class'] == 'text':
            proc_text(lesson[idx])
            idx += 1
        elif lesson[idx]['Class'] == 'mult_question':
            corr = proc_mult_choice(lesson[idx], repeat_q_flag)
            repeat_q_flag = not corr
            idx += int(corr)    
        elif lesson[idx]['Class'] in ['cmd_question', 'script']:
#             print(lesson[idx])
            corr = proc_script(lesson[idx], repeat_q_flag)
            repeat_q_flag = not corr
            idx += int(corr)
        else:
            print("UNK!!")
            print(lesson[idx])
            idx += 1
            
    print("Congrats on completing the lesson!")

In [5]:
def proc_meta(d: dict):
    """
    Processes metadata associated with lesson bank
    """
    for k, v in d.items():
        if k != 'Class':
            print(f'{k}: {v}')
    return 1

def proc_text(d: dict):
    """
    Processes text information for question
    """
    for k, v in d.items():
        if k == 'Output':
            print(f'{v}')
            input("Press Enter to continue...")
        print("\n")
        
    return 1
        
def proc_mult_choice(d: dict, repeat_q: bool):
    """
    Prompts user for multiple choice question
    """
    for k, v in d.items():
        if k in ['Output', 'AnswerChoices'] and not repeat_q:
            if k == 'AnswerChoices':
                choices = '\n'.join(v.split(';'))
                print(f"Choices: \n\n{choices}")
            else:
                print(f'{v}')
        elif k == 'Hint' and repeat_q:
            print(f'Hint: {v}')
        elif k == 'CorrectAnswer':
            ans = v
#         else:
#             print(f"Unk: {k}, {v}")
            
    guess = input("> ")
    
    # perhaps need to consider numerical precision here; not sure if symbolic evaluation would be nice
    if guess.isnumeric():
        match = guess == ans
    else:
        match = re.sub('["|\']', '\'', re.sub('[^A-Za-z0-9]+', '', guess.upper())) == re.sub('["|\']', '\'', re.sub('[^A-Za-z0-9]+', '', ans.upper()))
        
    if match:
        print(random.choice(rand_corr))
    else:
        print(random.choice(rand_incorr))
        
    return match

def proc_script(d: dict, repeat_q: bool):
    """
    Prompts user for question requiring response
    """
    for k, v in d.items():
        if k in ['Output', 'AnswerChoices'] and not repeat_q:
            if k == 'Answer Choices':
                choices = '\n'.join(v.split(';'))
                print(f"Choices: \n\n{choices}")
            else:
                print(f'{v}')
        elif k == 'Hint' and repeat_q:
            print(f'Hint: {v}')
        elif k == 'CorrectAnswer':
            ans = v
            
    guess = input("> ")
    
    # perhaps need to consider numerical precision here; not sure if symbolic evaluation would be nice
    match = re.sub('["|\']', '\'', guess) == re.sub('["|\']', '\'', ans)
        
    if match:
        print(random.choice(rand_corr))
    else:
        print(random.choice(rand_incorr))
        
    return match

## Start running stuff

In [6]:
# List available courses
# course_dir = '../courses-python/scRNA-seq/'
course_dir = '../courses/rna_seq/'

print("The following courses are available .. \n")
print('\n'.join([s for s in os.listdir(course_dir) if s != 'MANIFEST']))

The following courses are available .. 

Session-contents.md
fastq_trimming
Examine_DESEq2_results
Pathway_Analysis
k-Means
Heatmap
Introduction
DESeq2
fastq_download
PCA
Gene_Count_Pre-Processing
Read_alignment_quantification


In [11]:
# Pick lesson to load
# doesn't work with import data or high_level_analyses_intro_clustering; think clustering prob needs to be demo'd
# since not convenional to do single line in python
lesson_to_load = 'Examine_DESEq2_results'
lesson = yaml.load(open(os.path.join(course_dir, lesson_to_load, 'lesson.yaml')), Loader=yaml.FullLoader)

In [12]:
process_lesson(lesson)

Course: RNA-seq analysis
Lesson: Examine DESEq2 results
Author: your name goes here
Type: Standard
Organization: your organization's name goes here
Version: 2.4.5
Histograms of pvalues are really useful diagnostic tools for RNA-seq data analyses. It is best to exclude genes with small counts (baseMean). Draw a histogram of res$pvalue where res$baseMean > 1
> kfajl
Not exactly. Give it another go.
Hint: Try typing hist(res$pvalue[res$baseMean > 1])
> hist(res$pvalue[res$baseMean > 1])
You got it!
UNK!!
{'Class': 'figure', 'Output': 'Take a look at the histogram you made!', 'Figure': 'hist.R', 'FigureType': 'new'}


Notice how the first bin (with the smallest p-values) contains the most genes. This is the typical distribution we would expect from our data. If the histogram doesn't look similar to this one, we might have to use different statistical tests for different hypotheses.
Press Enter to continue...


UNK!!
{'Class': 'video', 'Output': 'For more information, check out this RNA-seq