# This is a reference sheet for Python and Unix commands covered in weeks 1-4 of HUMBIO51 

 ## UNIX 
<ol>
 <li><a href=#13>Unix Commands </a></li>
 <li><a href=#14>Unix Operators </a></li>
</ol>

 ## PYTHON
<ol>
 <li><a href=#0>Miscellaneous essential concepts </a></li>
 <li><a href=#1>Moving around the file system </a></li>
 <li><a href=#2>Reading and writing files</a></li>
 <li><a href=#3>Strings</a></li>
 <li><a href=#4>Lists</a></li>
 <li><a href=#5>Tuples</a></li>
 <li><a href=#6>Dictionaries</a></li>
 <li><a href=#7>For loops</a></li>
 <li><a href=#8>If statements</a></li>
 <li><a href=#9>Functions</a></li>
 <li><a href=#10>Importing Packages</a></li>   
 <li><a href=#11>Working with Pandas Dataframes</a></li>
 <li><a href=#12>Making plots</a></li>     
</ol>

# UNIX

### UNIX COMMANDS <a name='13' />

In [None]:
!pwd  #lists the name of the current directory 
!ls #displays the contents of the current directory 
!cat filename #prints the contents of a file (quotes around the filename are not required)
!head -n10 # prints the first 10 lines of a file. Note: if your file is large, use the head or tail command rather than the cat
           # command to examine its contents 

### UNIX OPERATORS <a name='14' />

In [5]:
> write to a file
| send the output of one command to the input of the next

To get help with a UNIX command you can type the command and use the --h or -help flags. 

# PYTHON 

### Miscelaneous essential concepts <a name='0' />

In [None]:
# This is a comment. Comments are lines that begin with a hash(#) symbol. They are not executed and are used as notes for the programmer. 

In [None]:
#The print function prints text to the screen. 
#Make sure you surround the text you wish to print in quotes 
print ('Hello World')

In [None]:
# Use the help function when confused about a variable / function 
help(print)

## Moving around the file system  <a name='1' />

In [None]:
#the import statement adds python libraries to your PYTHONPATH and allows the interpreter to find them 
#os library has not been imported: 

os.getcwd() #will fail because the os library has not been imported. 

In [None]:
import os  #to fix the error, first import the os library. 
os.getcwd() #We execute the getcwd function to print the current working directory 

In [None]:
os.chdir("/home/jovyan/humbio51_instructor") #changes your working directory. There are shortcuts we can use: 

In [None]:
os.chdir("/home/jovyan/humbio51_instructor/helpers") #starting the directory name with "/" indicates an absolute path. 
os.chdir("..") #moves one directory up (to the parent directory)
os.chdir("helpers") #omitting the parent directories indicates a relative path. 

In [None]:
#lists files in the current directory, a single period (.)  stands for the current directory 
os.listdir('.')

In [None]:
#create a new directory 
os.mkdir('mydir')

## Reading and writing files <a name='2' />

In [None]:
#open a file for writing 
f=open("myfile.txt",'w') 

In [None]:
#write text to a file 
f.write("hello world! \n goodbye world\n") #use \n to indicate newline characters. 

In [None]:
#open a file for reading 
f=open('myfile.txt','r')


In [None]:
#read an open file 
contents=f.read()
print(contents)

In [None]:
#read the lines contained in a file, split by newlines 
contents=f.readlines()
print(contents) #note! this can be done only once, contents will be empty the second time you iterate through. 

In [None]:
#close a file
f.close()

In [None]:
#alternative to open a file without a separate close command
with open('myfile.txt') as f: 
    data = f.read()

## Working with strings <a name='3' />

In [None]:
#you can concatenate strings with the "+" sign 
a="hello"
b="world"
c=a+b
print(c)

In [None]:
#We can print variables by changing them to strings. For example 
a=1 
print(str(a))

In [None]:
#We can use the "+" sign to join strings for printing 
print("a is:"+str(a))

In [None]:
#you can find and replace string characters 
c=c.replace("hello","goodbye") #make sure you assign the new string to a new variable, so your edits stick. 
print(c)

In [None]:
#string indexing 
sequence="ACGTACGT"
print(sequence[1]) #we can select a single character at a numeric index (0-based)
print(sequence[1:3])#we can slice the string to select several contiguous characters
print(sequence[::-1]) #string reversal


## Working with lists <a name='4' />

In [None]:
#both of the below are valid ways to create an empty list 
a=[] 
b=list()
print("a"+str(a))
print("b"+str(b))

In [None]:
#creating lists with some values in them 
a=[1,2,3,4]
b=['a','b','c','d']
print(a)
print(b)

In [None]:
#use the "append" command to add values to a list 
a.append(5)
print(a)

In [None]:
#we can update list values with new values 
a[0]=7
print(a)

In [None]:
#use the "join" command to join the elements in a list 
c='.'.join(b)
print(c)


In [None]:
#use the "split" command to split a string into a list using a specified delimiter 
d=c.split('.')
print(d)

In [None]:
#use the "len" command to get the length of a list 
print(len(d))

## Tuples <a name='5' />

In [None]:
#Unlike lists, tuples are immutable. 
#Tuples can be defined as follows: 
a=(1,2)
print(a)


In [None]:
#you can index into a tuple, just like a list 
print(a[0])
print(a[1])

In [None]:
#However, you cannot reassign values to a tuple 
a[0]=5 #should give an error 

## Dictionaries <a name='6' />

In [None]:
#Dictionaries are key value pairs that are efficient to use 
#Dictionaries can be defined in two ways: 

my_dict={} 
my_dict=dict() 

In [None]:
#We can populate a dictionary by assigning keys and values 
my_dict['a']=1
my_dict['b']=2
my_dict['c']=3
print(my_dict)

In [None]:
#We can query a dictionary by looking up specific keys 
my_dict['a']

In [None]:
#we can also print all keys or all values in a dictionary 
my_dict.keys()

In [None]:
my_dict.values()

## For loops <a name='7' />

In [None]:
#iterate by values 
sequence='AGCCCTCCA'
for i in sequence:
    print (i)


In [None]:
#iterate by index 
for i in [1,2,3,4]: 
    print(sequence[i])

In [None]:
#use the range command to get all integers in a given range 
list(range(0,3)) #first number is included, second number is excluded

In [None]:
#if only one integer is provided to range, the first integer is assumed to be 0 
list(range(3))

In [None]:
#A common pattern is to combine range with len to iterate through all indices in a list 
for i in range(len(sequence)):
    print(sequence[i])

## If statement logic <a name='8' />

Equals: a == b

Not Equals: a != b

Less than: a < b

Less than or equal to: a <= b

Greater than: a > b

Greater than or equal to: a >= b

In [None]:
#An "if statement" is written by using the if keyword.
a=5
b=10
if a>b: 
    print(str(a) + " is greater than " + str(b))
else: 
    print(str(b) + " is greater than "+ str(a))

In [None]:
#an elif can be used to perform multiple comparisons 
if a>b: 
    print(str(a) + " is greater than " + str(b))
elif a==b: 
    print(str(a) + " is equal to " + str(b))
else: 
    print(str(b) + " is greater than "+ str(a))

## Using functions <a name='9' />

In [None]:
#Functions are re-usable chunks of code that can be called with a single command 
#functions are defined as follows 

#def my_function(arg1,arg2):
#    #do something with arg1, arg2
#    return output  #return an output value 



In [None]:
#For example: 
def multiply(a,b): 
    return a*b


In [None]:
#Functions can be executed after they are defined by passing arguments: 
product=multiply(2,3)
print(product)

## Importing Packages <a name='10' />

In [None]:
#import packages using the import cammand
import Bio 
#the print command can be used to verify the version that is installed
print(Bio.__version__) 

## Working with Pandas Dataframes <a name='11' />

In [None]:
#importing .tsv file into Python using pandas
import pandas
df = pd.read_table(
     filepath_or_buffer='filename.tsv', 
     header=0,
     index_col=0
)


#importing .csv file into Python using pandas
df = pd.read_csv(
     filepath_or_buffer='filename.csv', 
     header=0,
     index_col=0
)

#specifying rows or columns in a dataframe
x=df['columnname']
x=df.loc[rowname]
x=df.iloc[rownumber(s),columnnumber(s)]

#sorting pandas dataframe
df_sorted=pd.DataFrame.sort_values(df,by="columnname",ascending=False)

# Make a scatter plot, barplot or histogram <a name='12'>

### Plotting package  

Plotnine: https://plotnine.readthedocs.io/en/stable/api.html <br> 


In [None]:
#load the necessary modules for plotnine
from plotnine import * 

In [None]:
#Make a Bar plot using plotnine
x=df['columnname']
qplot(x=x,
      geom="bar",
      xlab="xlabel",
      ylab="ylabel")  

In [None]:
#Make a Bar plot using plotnine with the x and y-axes flipped
x=df['columnname']
qplot(x=x,
      geom="bar",
      xlab="xlabel",
      ylab="ylabel")+coord_flip()

In [None]:
#Make a histogram using plotnine  
x=df['columnname']
qplot(x=x,
      geom="histogram",
      xlab="xlabel",
      ylab="ylabel")  

In [None]:
#Make a Scatter Plot using plotnine
x=df['columnname1']
y=df['columnname2']
qplot(x=x,
      y=y,
      geom="point",
      xlab="xlabel",
      ylab="ylabel")  

In [None]:
#Make a Scatter Plot using plotnine with colored points
qplot(x=x,
      y=y,
      geom="point",
      xlab="PC1",
      ylab="PC2",
      color=list(metadata_subset['System']))+scale_color_discrete(name="System")

In [None]:
#Make a Scatter Plot using plotnine labeling points
x=df['columnname1']
y=df['columnname2']
label=df['columnname3']
print(qplot(x=x,
      y=y,
      label=label,
      geom=["point","text"],
      xlab="xlabel",
      ylab="ylabel"))

In [None]:
#Make a Scatter Plot using plotnine labeling a single point
x_subset=df.iloc[i,j]
y_subset=df.iloc[i,k]

qplot(x=x,
      y=y,
      xlab=xlab,
      ylab=ylab)
      + annotate('text',x=x_subset, y=y_subset, label=label_subset, size=7, color='blue', va='bottom', ha='left', angle=0)

In [None]:
#Make a Scatter Plot using plotnine labeling a single point and setting x- and y- axis limits

x_subset=df.iloc[i,j]
y_subset=df.iloc[i,k]

print(qplot(x=x,
      y=y,
      xlab="xlabel",
      ylab="ylabel") + annotate('text',x=x_subset, y=y_subset,label=label_subset, size=7, color='blue', va='bottom', ha='left') 
      + xlim(0,10)
      + ylim(0,70))