# Python Introduction

## Load Libraries

In [1]:
# We can load libraries just like R
import numpy as np    # Numpy is python's numerical library, and it is standard to reference it as 'np'
import pandas as pd   # Pandas is python's dataframe package (just like R, everything is in memory)
import re             # Regular expression library
import os             # Operating System commands (change directory, set working directory, list files, ...)
import logging        # Python's Logging library

## Variables

In [3]:
x = 'math is fun'
print(x)

math is fun


In [5]:
# Lists
a = [1,2,3]
b = [4,1.1,-5]
mixed_list = [True, 'beer', 3.14]

In [6]:
# Tuples
c = (1,4,7)
d1 = (True, 'foo',4.4)
d2 = (3,-3,0)

In [10]:
zip(a,b) # List of Tuples

for x in zip(a,b):
    print(x)

(1, 4)
(2, 1.1)
(3, -5)


In [11]:
for x1, x2 in zip(d1, d2):
    print(x1, x2)

True 3
foo -3
4.4 0


In [12]:
a+b # Not like R!!!

[1, 2, 3, 4, 1.1, -5]

In [13]:
# In order to add elementwise, we use list comprehension
[a_i + b_i for a_i,b_i in zip(a,b)]

[5, 3.1, -2]

In [14]:
a.extend(b) # THIS IS IN PLACE!!!!

In [15]:
a

[1, 2, 3, 4, 1.1, -5]

In [16]:
a.append(b) # THIS IS IN PLACE!!!!

In [17]:
[a.append(b_el) for b_el in b] # Same as extend.

[None, None, None]

In [18]:
a

[1, 2, 3, 4, 1.1, -5, [4, 1.1, -5], 4, 1.1, -5]

In [19]:
# Important!  Indexing starts at ZERO, instead of 1 (R)
a[0]

1

In [20]:
a[0:1]

[1]

In [21]:
# Introducing Numpy- arrays are great to use! (Most similar to R's Vectors)
a = np.array([1,2,3])
b = np.array([4,2,0])

In [22]:
a+b

array([5, 4, 3])

In [23]:
# Dictionaries:  Super important!
home_dict={'address':'123 Main St',
          'city':'Seattle',
          'state':'WA',
          'zip': 98101,
          'bedrooms':5,
          'agents':['Sally', 'John', 'Bill', 'Cindy']}

In [24]:
home_dict # Note the order of the keys!

{'address': '123 Main St',
 'agents': ['Sally', 'John', 'Bill', 'Cindy'],
 'bedrooms': 5,
 'city': 'Seattle',
 'state': 'WA',
 'zip': 98101}

In [25]:
for key,val in home_dict.items():
    print('Key: '+ str(key))
    print('Value: '+ str(val))
    print('-----------')

Key: address
Value: 123 Main St
-----------
Key: city
Value: Seattle
-----------
Key: state
Value: WA
-----------
Key: zip
Value: 98101
-----------
Key: bedrooms
Value: 5
-----------
Key: agents
Value: ['Sally', 'John', 'Bill', 'Cindy']
-----------


In [26]:
# There are lots of objects in python that act as 'iterators'.
# These iterators remember what element you asked for and will always give you the next one until empty.
for c in x:
    print(c)

3
-5


In [27]:
for i in home_dict:
    print(i)

address
city
state
zip
bedrooms
agents


In [28]:
a = [1,3,5,7,9,11,13]
for value in a:
    print(value)

1
3
5
7
9
11
13


In [29]:
# Keep track of indices better!!!
for i,value in enumerate(a):
    print('Index: ' + str(i))
    print('Value: ' + str(value))
    print('------')

Index: 0
Value: 1
------
Index: 1
Value: 3
------
Index: 2
Value: 5
------
Index: 3
Value: 7
------
Index: 4
Value: 9
------
Index: 5
Value: 11
------
Index: 6
Value: 13
------


In [30]:
# Sets!  Sets are an object that MUST contain unique items
set(a)

{1, 3, 5, 7, 9, 11, 13}

In [31]:
set(a.extend(a)) # Why doesn't this work?  Because a.extend(a) is a method, it doesn't return anything!

TypeError: 'NoneType' object is not iterable

In [32]:
a

[1, 3, 5, 7, 9, 11, 13, 1, 3, 5, 7, 9, 11, 13]

In [33]:
set(a)

{1, 3, 5, 7, 9, 11, 13}

In [34]:
# Quick unique trick for a
a = list(set(a))

In [35]:
a

[1, 3, 5, 7, 9, 11, 13]

## Pandas

In [36]:
# Pandas introduces two basic objects:
#  Series (like R's Vectors) (can be a numpy array)
#  DataFrames (just like R)
a = pd.Series(range(10))

In [37]:
a

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: int32

In [38]:
dates = pd.date_range('2015-01-01', periods=6)

In [39]:
dates

DatetimeIndex(['2015-01-01', '2015-01-02', '2015-01-03', '2015-01-04',
               '2015-01-05', '2015-01-06'],
              dtype='datetime64[ns]', freq='D')

In [44]:
df = pd.DataFrame({'dates':pd.date_range('2015-01-01','2015-03-31', freq='5D'),
                   'values':np.random.rand(18),
                   'group':np.random.choice([1,2,3],18)})

In [45]:
df

Unnamed: 0,dates,group,values
0,2015-01-01,1,0.579168
1,2015-01-06,2,0.735424
2,2015-01-11,1,0.440158
3,2015-01-16,1,0.468138
4,2015-01-21,1,0.57951
5,2015-01-26,3,0.99744
6,2015-01-31,2,0.072507
7,2015-02-05,2,0.075233
8,2015-02-10,2,0.712047
9,2015-02-15,1,0.001481


In [47]:
df.groupby('group').mean()

Unnamed: 0_level_0,values
group,Unnamed: 1_level_1
1,0.414862
2,0.438136
3,0.614862


##  Also note that jupyter has the ability to create markup!