In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#http://techbus.safaribooksonline.com/video/databases-and-reporting-tools/9781771375764/introduction/video240029

# Date Types

In [2]:
temps = pd.Series([10, 30, 40, 50, 90])

In [3]:
temps

0    10
1    30
2    40
3    50
4    90
dtype: int64

In [4]:
temps = pd.Series([1.1, 30, 40, 50, 90])

In [5]:
temps

0     1.1
1    30.0
2    40.0
3    50.0
4    90.0
dtype: float64

In [6]:
temps = pd.Series([{}, 30.1, 40, 50, 90])

In [7]:
temps

0      {}
1    30.1
2      40
3      50
4      90
dtype: object

In [8]:
temps = pd.Series(["2016-01-02", "2017-03-04"])

In [9]:
temps

0    2016-01-02
1    2017-03-04
dtype: object

In [10]:
pd.to_datetime(temps)

0   2016-01-02
1   2017-03-04
dtype: datetime64[ns]

# More Series

In [11]:
temps = pd.Series([10, 30, 40, 50, 90])
for x in temps:
    print x

10
30
40
50
90


In [12]:
10 in temps

False

In [13]:
4 in temps  #check the index

True

In [14]:
40 in set(temps)

True

In [15]:
40 in temps.values

True

In [16]:
set(temps)

{10, 30, 40, 50, 90}

In [17]:
dict(temps)

{0: 10, 1: 30, 2: 40, 3: 50, 4: 90}

In [18]:
for i, val in temps.iteritems():
    print i, val

0 10
1 30
2 40
3 50
4 90


In [19]:
for i, val in dict(temps).items():
    print i,val

0 10
1 30
2 40
3 50
4 90


# Broadcasting

In [20]:
temps

0    10
1    30
2    40
3    50
4    90
dtype: int64

In [21]:
temps + 2

0    12
1    32
2    42
3    52
4    92
dtype: int64

In [22]:
temps * 2

0     20
1     60
2     80
3    100
4    180
dtype: int64

In [23]:
t2 = pd.Series([11,22,33], index = [1,2,3])

In [24]:
temps + t2

0     NaN
1    41.0
2    62.0
3    83.0
4     NaN
dtype: float64

In [25]:
temps * t2

0       NaN
1     330.0
2     880.0
3    1650.0
4       NaN
dtype: float64

In [26]:
def add_2(val):
    return val+2

temps.apply(add_2)

0    12
1    32
2    42
3    52
4    92
dtype: int64

In [27]:
temps.apply(float)

0    10.0
1    30.0
2    40.0
3    50.0
4    90.0
dtype: float64

In [28]:
temps.astype(str)

0    10
1    30
2    40
3    50
4    90
dtype: object

In [29]:
temps.astype(float)

0    10.0
1    30.0
2    40.0
3    50.0
4    90.0
dtype: float64

# CRUD Operations Reading

In [30]:
temps = pd.Series([10, 30, 40, 50, 90], index = list("ABCDE"))
temps

A    10
B    30
C    40
D    50
E    90
dtype: int64

In [31]:
temps.loc["A"] #based on label

10

In [32]:
temps[0] #based on position and label

10

In [33]:
temps["A"]

10

In [34]:
temps.B

30

In [35]:
temps.iloc[-1] #based on position

90

In [36]:
temps = pd.Series([10, 30, 40, 50, 90], index = ["A","B","C",0,1]) # mixed index
temps

A    10
B    30
C    40
0    50
1    90
dtype: int64

In [37]:
temps[4]

90

In [38]:
temps.loc[0] #label

50

In [39]:
temps[0] #label

50

# CRUD updating

In [40]:
temps = pd.Series([10, 30, 40, 50, 90], index = list("ABCDE"))
temps

A    10
B    30
C    40
D    50
E    90
dtype: int64

In [41]:
temps["A"] = 11
temps.loc["B"] = 31
temps.iloc[-1] = 91
temps

A    11
B    31
C    40
D    50
E    91
dtype: int64

In [42]:
t2 = temps.append(pd.Series([100],index=["F"]))
t2

A     11
B     31
C     40
D     50
E     91
F    100
dtype: int64

In [43]:
temps

A    11
B    31
C    40
D    50
E    91
dtype: int64

In [44]:
temps.set_value("D",51)

A    11
B    31
C    40
D    51
E    91
dtype: int64

In [45]:
temps

A    11
B    31
C    40
D    51
E    91
dtype: int64

# Delete

In [46]:
temps

A    11
B    31
C    40
D    51
E    91
dtype: int64

In [47]:
del temps["A"]

In [48]:
temps

B    31
C    40
D    51
E    91
dtype: int64

In [49]:
temps[temps< 50]

B    31
C    40
dtype: int64

In [52]:
mask = temps.index == "D"

In [53]:
mask

array([False, False,  True, False], dtype=bool)

In [54]:
temps[mask]

D    51
dtype: int64

# Summary Statistics

In [55]:
temps.min()

31

In [56]:
temps.describe()

count     4.000000
mean     53.250000
std      26.462237
min      31.000000
25%      37.750000
50%      45.500000
75%      61.000000
max      91.000000
dtype: float64

In [57]:
temps.describe(percentiles=[.05, .1, .3])

count     4.000000
mean     53.250000
std      26.462237
min      31.000000
5%       32.350000
10%      33.700000
30%      39.100000
50%      45.500000
max      91.000000
dtype: float64

In [58]:
ser8 = pd.Series(["price", "accord", "price", "camry"], dtype='category')
ser8

0     price
1    accord
2     price
3     camry
dtype: category
Categories (3, object): [accord, camry, price]

In [59]:
ser8.describe()

count         4
unique        3
top       price
freq          2
dtype: object

In [60]:
ser8.value_counts()

price     2
camry     1
accord    1
dtype: int64