# GA Lab: iPython Notebook / Python

## iPython Notebook

- Shift + Enter executes the current highlight cell
- Shift + Tab help for current command

In [None]:
from IPython.core.display import HTML
HTML("<iframe src=http://www.nature.com/news/ipython-interactive-demo-7.21492 width=1000 height=800></iframe>")

### import statements

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit

%matplotlib inline

### execute shell commands

In [None]:
!ls -lsh

### send shell commands to variable

In [None]:
home_drive = !ls ~
home_drive

#### Shell magic!

In [None]:
%lsmagic

In [None]:
%timeit a = np.random.rand(100, 100)

In [None]:
a = 10
print a
%reset
print a

In [None]:
%%time
mu, sigma = 0, 0.1
normal_dist = np.random.normal(mu, sigma, 1000)

In [None]:
plt.plot(normal_dist)

In [None]:
import pandas.io.data
import datetime
aapl = pd.io.data.get_data_yahoo('FB', 
                                 start=datetime.datetime(2015, 4, 1), 
                                 end=datetime.datetime(2015, 4, 28))
aapl.head()

## Plotting!

In [None]:
fig = plt.figure(figsize=(20,16))

ax = fig.add_subplot(2,2,1)
ax.plot(aapl.index, aapl['Close'])
ax.set_title('Line plots', size=24)

ax = fig.add_subplot(2,2,2)
ax.plot(aapl['Close'], 'o')
ax.set_title('Scatter plots', size=24)

ax = fig.add_subplot(2,2,3)
ax.hist(normal_dist, bins=50)
ax.set_title('Histograms', size=24)
ax.set_xlabel('count', size=16)

ax = fig.add_subplot(2,2,4)
ax.boxplot(normal_dist)
ax.set_title('Boxplots', size=24)
print

## 1. Python Data types

In [None]:
type(42)

In [None]:
from math import pi
type(pi)
print pi

In [None]:
type('matt')

In [None]:
type(u'matt')

## 2. Basic Python execution

In [None]:
test_integer = 42
test_float = pi
test_string = 'matt'

#### Basic print

In [None]:
print (test_integer, test_float, test_string)

#### C-style formatting

In [None]:
print ("%d %f %s") % (test_integer, test_float, test_string)
print ("%d %.2f %s") % (test_integer, test_float, test_string)

#### Format - the pythonic way

In [None]:
print ("{} {} {}").format(test_integer, test_float, test_string)
print ("{} {:.2f} {}").format(test_integer, test_float, test_string)

## Python operations

In [None]:
1 + 2.5

In [None]:
'matt' + 1

### Python 2.7 division

In [None]:
numerator, denominator = 1,2

#### Important to remember:

In [None]:
print (numerator / denominator)

In [None]:
print (numerator / float(denominator))

In [None]:
print 1/2
from __future__ import division
print 1/2

### 3. Python Data Structures

## Lists [ ]

- aka arrays
- ordered
- mutable
- mixed types 

In [None]:
price_per_sq_foot = [300,324,220,976,347,888,134,95,567,1400,120]
type(price_per_sq_foot)

#### list operations:

In [None]:
#Number of elements in the list
print "There are {} elements in the list".format(len(price_per_sq_foot))
print

#Add an item to the end of the list
price_per_sq_foot.append(1000)
print "Updated price per square foot"
print (price_per_sq_foot)

#Remove and return most recent element
last_element = price_per_sq_foot.pop()
print "Last in, updated price per sq foot"
print (last_element, price_per_sq_foot)

#Sort the array
print "Sorted list"
print sorted(price_per_sq_foot)

#Reverse order
print "Reverse order"
print sorted(price_per_sq_foot)[::-1]

#### List slicing

In [None]:
#First element
sorted_ppsf = sorted(price_per_sq_foot)
print "First element"
print 'Cheapest: {}'.format(sorted_ppsf[0])
print 
print "Last element"
print 'Most expensive: {}'.format(sorted_ppsf[len(sorted_ppsf) - 1])
#or
print 'Most expensive: {}'.format(sorted_ppsf[-1])

## Tuples ( )

- immutable
- can't change / sort
- functional programming

In [None]:
test = (1, 2)
print test[0]
print test[1]
test[0] = test[1]

## Dictionary { }

- aka associate arrays / key-value pairs
- mutable
- unordered

In [None]:
#Can contain multiple types
employee_info = {'id': 123, 'name': 'Matthew', 'manages': [45,67,90], 'managed_by': 2}
print "Name {}: Headcount: {}".format(employee_info['name'], len(employee_info['manages']))

In [None]:
offensive_skill_dict = {'RB': 'Running Back', 'WR': 'Wide Receiver', 'QB': 'Quarterback', 'FB': 'Full back', 'TE': 'Tight End'}
print offensive_skill_dict

In [None]:
print offensive_skill_dict.keys()

### Sets

- unique
- cool operations

In [None]:
#remove redundant entries
list_list = [1,1,2,4,6,8,3,1,2,4,5]
set_list = set(list_list)
print set_list

In [None]:
#Set operations
panel_a, panel_b = set(('BRCA1', 'BRCA2', 'TP53', 'CHEK2', 'MYC')), set(('CHEK2', 'SPHK1', 'MYCN', 'MYC'))

print "Genes in panel 1 but not in panel b: {}\n".format((panel_a - panel_b))
print "Genes in both panels (Intersection): {}\n".format(panel_a & panel_b)
print "All the genes: {}\n".format(panel_a | panel_b)
print "Genes in A that aren't in B, genes in B that aren't in A (symmetric difference): {}\n".format(panel_a ^ panel_b)


### 4. flow control

### iterating through a list

In [None]:
my_list = ['Jimmy Carter','Ronald Regan','George Bush','Bill Clinton', 'George HW Bush', 'Barack Obama']

In [None]:
#Not the best way
for index in range(0, len(my_list)):
    print my_list[index]

In [None]:
#Better
for president in my_list:
    print (president)

In [None]:
#what if I want the index too
for index, president in enumerate(my_list):
    print index, president

In [None]:
#list comprehension - this is pythonic - and faster!
[x for x in my_list]

#### Faster?

In [None]:
%%timeit
result = []
for x in range(1,10000):
    for y in range(1,10000):
        result.append(x)
len(result)

In [None]:
%%timeit
field = [x for x in range(1,10000) for y in range(1,10000)]
len(field)

In [None]:
zip

In [None]:
map

In [None]:
lambda

In [None]:
comprehensions

### Additional Resources

Web based viewer for ipython notebooks
http://nbviewer.ipython.org/