# Python Introduction

## Load Libraries

In [1]:
# We can load libraries just like R
import numpy as np    # Numpy is python's numerical library, and it is standard to reference it as 'np'
import pandas as pd   # Pandas is python's dataframe package (just like R, everything is in memory)
import re             # Regular expression library
import os             # Operating System commands (change directory, set working directory, list files, ...)
import logging        # Python's Logging library

## Variables

In [2]:
x = 'math is fun'
print x # This usage is obsolete in Python 3

math is fun


In [3]:
print(x) # This is the preferred usage now, although both work in Python 2

math is fun


In [4]:
# Lists
a = [1,2,3]
b = [4,1.1,-5]
mixed_list = [True, 'beer', 3.14]

In [5]:
# Tuples
c = (1,4,7)
d1 = (True, 'foo',4.4)
d2 = (3,-3,0)

In [6]:
zip(a,b) # List of Tuples

[(1, 4), (2, 1.1), (3, -5)]

In [7]:
zip(d1,d2)

[(True, 3), ('foo', -3), (4.4, 0)]

In [8]:
a+b # Not like R!!!

[1, 2, 3, 4, 1.1, -5]

In [9]:
# In order to add elementwise, we use list comprehension
[a_i + b_i for a_i,b_i in zip(a,b)]

[5, 3.1, -2]

In [10]:
a.extend(b) # THIS IS IN PLACE!!!!

In [11]:
a

[1, 2, 3, 4, 1.1, -5]

In [12]:
a.append(b) # THIS IS IN PLACE!!!!

In [13]:
[a.append(b_el) for b_el in b] # Same as extend.

[None, None, None]

In [14]:
a

[1, 2, 3, 4, 1.1, -5, [4, 1.1, -5], 4, 1.1, -5]

In [15]:
# Important!  Indexing starts at ZERO, instead of 1 (R)
a[0]

1

In [16]:
a[0:1]

[1]

In [17]:
# Introducing Numpy- arrays are great to use! (Most similar to R's Vectors)
a = np.array([1,2,3])
b = np.array([4,2,0])

In [18]:
a+b

array([5, 4, 3])

In [19]:
# Dictionaries:  Super important!
home_dict={'address':'123 Main St',
          'city':'Seattle',
          'state':'WA',
          'zip': 98101,
          'bedrooms':5,
          'agents':['Sally', 'John', 'Bill', 'Cindy']}

In [20]:
home_dict

{'address': '123 Main St',
 'agents': ['Sally', 'John', 'Bill', 'Cindy'],
 'bedrooms': 5,
 'city': 'Seattle',
 'state': 'WA',
 'zip': 98101}

In [21]:
for key,val in home_dict.items():
    print('Key: '+ str(key))
    print('Value: '+ str(val))
    print('-----------')

Key: city
Value: Seattle
-----------
Key: zip
Value: 98101
-----------
Key: bedrooms
Value: 5
-----------
Key: state
Value: WA
-----------
Key: agents
Value: ['Sally', 'John', 'Bill', 'Cindy']
-----------
Key: address
Value: 123 Main St
-----------


In [22]:
# There are lots of objects in python that act as 'iterators'.
# These iterators remember what element you asked for and will always give you the next one until empty.
for c in x:
    print(c)

m
a
t
h
 
i
s
 
f
u
n


In [23]:
for i in home_dict:
    print(i)

city
zip
bedrooms
state
agents
address


In [24]:
a = [1,3,5,7,9,11,13]
for value in a:
    print(value)

1
3
5
7
9
11
13


In [25]:
# Keep track of indices better!!!
for i,value in enumerate(a):
    print('Index: ' + str(i))
    print('Value: ' + str(value))
    print('------')

Index: 0
Value: 1
------
Index: 1
Value: 3
------
Index: 2
Value: 5
------
Index: 3
Value: 7
------
Index: 4
Value: 9
------
Index: 5
Value: 11
------
Index: 6
Value: 13
------


In [26]:
# Sets!  Sets are an object that MUST contain unique items
set(a)

{1, 3, 5, 7, 9, 11, 13}

In [27]:
set(a.extend(a)) # Why doesn't this work?  Because a.extend(a) is a method, it doesn't return anything!

TypeError: 'NoneType' object is not iterable

In [28]:
a

[1, 3, 5, 7, 9, 11, 13, 1, 3, 5, 7, 9, 11, 13]

In [29]:
set(a)

{1, 3, 5, 7, 9, 11, 13}

In [30]:
# Quick unique trick for a
a = list(set(a))

In [31]:
a

[1, 3, 5, 7, 9, 11, 13]

## Pandas

In [32]:
# Pandas introduces two basic objects:
#  Series (like R's Vectors) (can be a numpy array)
#  DataFrames (just like R)
a = pd.Series(range(10))

In [33]:
a

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: int64

In [34]:
dates = pd.date_range('2015-01-01', periods=6)

In [35]:
dates

<class 'pandas.tseries.index.DatetimeIndex'>
[2015-01-01, ..., 2015-01-06]
Length: 6, Freq: D, Timezone: None

In [36]:
df = pd.DataFrame({'dates':pd.date_range('2015-01-01','2015-03-31', freq='5D'),
                   'values':np.random.rand(19),
                   'group':np.random.choice([1,2,3],19)})

In [37]:
df

Unnamed: 0,dates,group,values
0,2015-01-01,1,0.612435
1,2015-01-06,1,0.587268
2,2015-01-11,1,0.881705
3,2015-01-16,2,0.049966
4,2015-01-21,3,0.360619
5,2015-01-26,3,0.923075
6,2015-01-31,2,0.677156
7,2015-02-05,1,0.951413
8,2015-02-10,1,0.774526
9,2015-02-15,1,0.303596


In [38]:
df.groupby('group').mean()

Unnamed: 0_level_0,values
group,Unnamed: 1_level_1
1,0.536089
2,0.42847
3,0.658427
