# ipynb_snippets
> This notebook & webpage (depending on how you're access it) contains a quick reference for python code to do various things. See the legend on the right to jump to a section.
- skip_exec: true

# Some brief notes:

- [Hamel's blog for inspiration](https://hamel.dev/notes/linux/bash_scripting.html)
- some person's homepage https://www.mm218.dev/

Oke this is a bunch of test text

# Todo dump:

- pd.set_option('display.float_format', lambda x: '%.3f' % x)

In [None]:
# Update the req. file for docker
!pip freeze > /home/jovyan/proj/docker/jupyter-base-ds/requirements.txt
!git config --global user.email "" 
!git config --global user.name  "Menno Witteveen"
!git -C /home/jovyan/proj/docker/jupyter-compbio-ds/ add requirements.txt
# !git -C /home/jovyan/proj/docker/jupyter-base-ds/ status
!git -C /home/jovyan/proj/docker/jupyter-compbio-ds/ commit -m "update req.txt with line_profiler"

In [None]:
# 'ff'

# Base Imports

In [None]:
%config Completer.use_jedi = False
########################################################
## Base Imports:

# Sys Imports:
import time, sys, os

# Standard Imports:
import numpy as np
import scipy as sp
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats, linalg

#########################################################
## Experiment Specific Imports

# Logistics Imports:
import inspect, glob, re, contextlib, pickle, functools #,submitit #pyreadr
from tqdm.auto import tqdm
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
from mjwt.utils import jobinfo, corr, implot, sizegb, psrc, beep, Timer, Struct as mStruct

# ML Imports:
from sklearn.metrics import r2_score, roc_auc_score
from sklearn.model_selection import train_test_split, ParameterGrid
from scipy.stats import pearsonr, spearmanr

# Genomics Imports:
import pysnptools as pst
from pysnptools.snpreader import Bed, Pheno, SnpHdf5, SnpData
from pysnptools.pstreader import PstData, PstHdf5, PstReader
from lambdapred.utils import load_bimfam

########################################################
## Configuration & Initialisation

# Display Configuration:
from IPython.display import set_matplotlib_formats, display #, HTML, Audio, Javascript
plt.rcParams['figure.figsize'] = [10, 5]
pd.set_option('max_colwidth', 200) 
# pd.set_option('display.max_colwidth', None) # No pd trunkation (radical)
# display(HTML("<style>.container { width:75% !important; }</style>"))
# pd.reset_option('all')

# Initializations & Extensions:
timer = Timer(); toc = timer.toc; tic = timer.tic; tic(''); log=np.log10
notebook = False  if '__file__' in locals() else True
with contextlib.suppress(BaseException): # non-essential import for development.
    get_ipython().run_line_magic('load_ext', 'line_profiler')

In [None]:
# Run this line if you want to get some help infos:
??implot

In [None]:
# !pip install pysnptools

# ML Recipes

## Pytorch 101

In [None]:
#  -*- coding: utf-8 -*-
import torch
import math


# Create Tensors to hold input and outputs.
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

# For this example, the output y is a linear function of (x, x^2, x^3), so
# we can consider it as a linear layer neural network. Let's prepare the
# tensor (x, x^2, x^3).
p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)

# In the above code, x.unsqueeze(-1) has shape (2000, 1), and p has shape
# (3,), for this case, broadcasting semantics will apply to obtain a tensor
# of shape (2000, 3) 

# Use the nn package to define our model as a sequence of layers. nn.Sequential
# is a Module which contains other Modules, and applies them in sequence to
# produce its output. The Linear Module computes output from input using a
# linear function, and holds internal Tensors for its weight and bias.
# The Flatten layer flatens the output of the linear layer to a 1D tensor,
# to match the shape of `y`.
model = torch.nn.Sequential(
    torch.nn.Linear(3, 1),
    torch.nn.Flatten(0, 1)
)

# The nn package also contains definitions of popular loss functions; in this
# case we will use Mean Squared Error (MSE) as our loss function.
loss_fn = torch.nn.MSELoss(reduction='sum')

learning_rate = 1e-6
for t in range(2000):

    # Forward pass: compute predicted y by passing x to the model. Module objects
    # override the __call__ operator so you can call them like functions. When
    # doing so you pass a Tensor of input data to the Module and it produces
    # a Tensor of output data.
    y_pred = model(xx)

    # Compute and print loss. We pass Tensors containing the predicted and true
    # values of y, and the loss function returns a Tensor containing the
    # loss.
    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    # Zero the gradients before running the backward pass.
    model.zero_grad()

    # Backward pass: compute gradient of the loss with respect to all the learnable
    # parameters of the model. Internally, the parameters of each Module are stored
    # in Tensors with requires_grad=True, so this call will compute gradients for
    # all learnable parameters in the model.
    loss.backward()

    # Update the weights using gradient descent. Each parameter is a Tensor, so
    # we can access its gradients like we did before.
    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate * param.grad

# You can access the first layer of `model` like accessing the first item of a list
linear_layer = model[0]

# For linear layer, its parameters are stored as `weight` and `bias`.
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

# Schedulers
    


## Slurm & Submitit Tricks

In [None]:
for i, job in job_dt.items():
    job.cancel()
    

In [None]:
# Cancel ALL submitit jobs!:
from io import StringIO
string =  !que
stringio = StringIO('\n'.join(string))
que_df = pd.read_table(stringio, delim_whitespace=True); que_df

for _, jobid in que_df.JOBID[que_df.NAME == 'submitit'].iteritems():
    jerg
    !scancel {jobid}

In [None]:
string = """60080867, 60080868, 60080869, 60080870, 60080871, 60080872, """
log_folder = "/home/mennow/dsmwpred/mennow/log_test/%j"
eval_job_dt = dict()
for i, job_id in enumerate(string.split(', ')):
    if job_id != '': eval_job_dt[i] = submitit.SlurmJob(job_id=job_id, folder=log_folder) 

In [None]:
string='''{0: SlurmJob<job_id=55037710, task_id=0, state="COMPLETED">,
 1: SlurmJob<job_id=55037711, task_id=0, state="COMPLETED">,
 2: SlurmJob<job_id=55037712, task_id=0, state="RUNNING">,
 3: SlurmJob<job_id=55037713, task_id=0, state="RUNNING">,
 4: SlurmJob<job_id=55037714, task_id=0, state="COMPLETED">,
 5: SlurmJob<job_id=55037715, task_id=0, state="COMPLETED">,
 6: SlurmJob<job_id=55037716, task_id=0, state="RUNNING">,
 7: SlurmJob<job_id=55037717, task_id=0, state="RUNNING">,
 8: SlurmJob<job_id=55037718, task_id=0, state="COMPLETED">,
 9: SlurmJob<job_id=55037719, task_id=0, state="COMPLETED">,
 10: SlurmJob<job_id=55037720, task_id=0, state="COMPLETED">,
 11: SlurmJob<job_id=55037721, task_id=0, state="RUNNING">,
 12: SlurmJob<job_id=55037722, task_id=0, state="RUNNING">,
 13: SlurmJob<job_id=55037723, task_id=0, state="RUNNING">,
 14: SlurmJob<job_id=55037724, task_id=0, state="RUNNING">,
 15: SlurmJob<job_id=55037725, task_id=0, state="RUNNING">}'''

folder = "/home/mennow/dsmwpred/mennow/log_test/%j"
job_dt = dict()
for i, elem in enumerate(string.split('\n')):
    start = 'job_id='
    stop = ', task_id='
    job_id = elem[elem.find(start)+len(start):elem.find(stop)]
    job_dt[i] = submitit.SlurmJob(job_id=job_id, folder=folder)

In [None]:
# Clean out everything older then 9 days modded. Careful with this!
!find ./log_test/* -type d -ctime +9 -exec rm -rf {} \;

In [None]:
string = !squeue -t pd
# string = !squeue -t r # For RUNNING ones.
string = '\n'.join(string)
from io import StringIO
df = pd.read_csv(StringIO(string), delim_whitespace=True)
df.groupby('USER').count().sort_values(by='JOBID').iloc[::-1]

# If you really want all: this cell is a little buggy...
string = !squeue -u mennow
string = '\n'.join(string)--format="%all"
from io import StringIO
df = pd.read_csv(StringIO(string), delim_whitespace=True)

# Seeing some prints:
[print(f'---> i={i} <---\n', job_dt[i].stdout()[-100:]) for i in np.sort(np.random.randint(0,len(job_dt), 4)) if job_dt[i].stdout() is not None]

# See a unique count of all the Slurm states:
df = pd.DataFrame([elem.state for elem in eval_job_dt.values()])
df[1] = 1.
df.groupby(0).count()

# Decoration

In [1]:
# Methods that overwrite the getting and setting syntax
class C(object):
    def __init__(self):
        self._x = None
        self.data = 42

    @property
    def x(self):
        """I'm the 'x' property."""
        print("getter of x called")
        return self._x

    @x.setter
    def x(self, value):
        print("setter of x called")
        self._x = value

    @x.deleter
    def x(self):
        print("deleter of x called")
        del self._x
        

c = C()
c.x = 'foo'  # setter called
foo = c.x    # getter called
del c.x      # deleter called

setter of x called
getter of x called
deleter of x called
