In [1]:
import pandas as pd

# Working with Series

In [2]:
lst = [2, 67,43]
pd.Series(lst)

0     2
1    67
2    43
dtype: int64

In [3]:
lst_bool = [True, False, True, True]

In [6]:
# This is not advised as its good to keep data types homogenous
lst_mixed = ["Stringy", True, 23,67 ,22.0]

In [5]:
lst_mixed

['Stringy', True, 23, 67, 22.0]

In [10]:
# set an index in a series
pd.Series(5,index=["A","B","C", "D"])

A    5
B    5
C    5
D    5
dtype: int64

In [9]:
# create a series from a list comprehension
pd.Series([num*10 for num in range(10)])

0     0
1    10
2    20
3    30
4    40
5    50
6    60
7    70
8    80
9    90
dtype: int64

In [11]:
my_phonebook = {'michael':'555-555-5552',
               'shawn':'555-555-5532',
               'larry':'555-555-4433'}

In [12]:
# create a series from a dictionary
pd.Series(my_phonebook)

michael    555-555-5552
shawn      555-555-5532
larry      555-555-4433
dtype: object

In [22]:
# create a series from a csv file using squeeze and column specification
stock = pd.read_csv('./data/Stock.csv', squeeze=True, usecols=['Open'])

In [21]:
stock

0     16.770000
1     17.100000
2     17.059999
3     16.900000
4     16.969999
5     16.240000
6     16.389999
7     16.549999
8     15.990000
9     15.770000
10    16.389999
11    16.330000
12    16.340000
13    16.240000
14    16.469999
15    16.790001
16    17.100000
17    17.299999
18    16.299999
19    16.459999
Name: Open, dtype: float64

In [24]:
# understand the indexing of a series
stock.index

RangeIndex(start=0, stop=20, step=1)

In [25]:
# return an array of series values
stock.values

array([16.77    , 17.1     , 17.059999, 16.9     , 16.969999, 16.24    ,
       16.389999, 16.549999, 15.99    , 15.77    , 16.389999, 16.33    ,
       16.34    , 16.24    , 16.469999, 16.790001, 17.1     , 17.299999,
       16.299999, 16.459999])

In [26]:
# review the data types of a series
stock.dtype

dtype('float64')

In [27]:
# return the dimensions of a series, should be 1
stock.ndim

1

In [33]:
# use an aggregate function on a series
stock.aggregate(['max','min'])

max    17.299999
min    15.770000
Name: Open, dtype: float64

In [34]:
# pass a series to a function
max(stock)

17.299999

In [35]:
# return the first five value
stock.head()

0    16.770000
1    17.100000
2    17.059999
3    16.900000
4    16.969999
Name: Open, dtype: float64

In [37]:
# I can call statistical functions on a series
mean_price = stock.mean()

std_price = stock.std()

In [38]:
std_price

0.4076130110008758

In [40]:
# determine the shape of a series
stock.shape

(20,)

In [41]:
stock.idxmax

<bound method Series.idxmax of 0     16.770000
1     17.100000
2     17.059999
3     16.900000
4     16.969999
5     16.240000
6     16.389999
7     16.549999
8     15.990000
9     15.770000
10    16.389999
11    16.330000
12    16.340000
13    16.240000
14    16.469999
15    16.790001
16    17.100000
17    17.299999
18    16.299999
19    16.459999
Name: Open, dtype: float64>

In [44]:
# I can create label indexes from lists
avg_temps = [33,23,45,64,78,89,94,101,88,77,65,55,32]
months = ["Jsn", "Feb", "Mar", "Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"]

In [51]:
# create a series from a lists of data and custom indexes 
pd.Series(avg_temps, index=months)

Jsn     33.23
Feb     45.00
Mar     64.00
Apr     78.00
May     89.00
Jun     94.00
Jul    101.00
Aug     88.00
Sep     77.00
Oct     65.00
Nov     55.00
Dec     32.00
dtype: float64

In [53]:
yearly_weather = pd.Series(data=avg_temps, index=months)

In [54]:
yearly_weather.median

<bound method Series.median of Jsn     33.23
Feb     45.00
Mar     64.00
Apr     78.00
May     89.00
Jun     94.00
Jul    101.00
Aug     88.00
Sep     77.00
Oct     65.00
Nov     55.00
Dec     32.00
dtype: float64>

In [55]:
# sort series values in descending order
yearly_weather.sort_values()

Dec     32.00
Jsn     33.23
Feb     45.00
Nov     55.00
Mar     64.00
Oct     65.00
Sep     77.00
Apr     78.00
Aug     88.00
May     89.00
Jun     94.00
Jul    101.00
dtype: float64

In [59]:
# sort series values in ascending order, does not mutate underlying data
yearly_weather.sort_values(ascending=False)

Jul    101.00
Jun     94.00
May     89.00
Aug     88.00
Apr     78.00
Sep     77.00
Oct     65.00
Mar     64.00
Nov     55.00
Feb     45.00
Jsn     33.23
Dec     32.00
dtype: float64

In [60]:
# sort the series by the index values
yearly_weather.sort_index()

Apr     78.00
Aug     88.00
Dec     32.00
Feb     45.00
Jsn     33.23
Jul    101.00
Jun     94.00
Mar     64.00
May     89.00
Nov     55.00
Oct     65.00
Sep     77.00
dtype: float64

In [61]:
# use built in python types with series
len(yearly_weather)
type(yearly_weather)

pandas.core.series.Series

In [62]:
list(yearly_weather)

[33.23, 45.0, 64.0, 78.0, 89.0, 94.0, 101.0, 88.0, 77.0, 65.0, 55.0, 32.0]

In [63]:
dict(yearly_weather)

{'Jsn': 33.23,
 'Feb': 45.0,
 'Mar': 64.0,
 'Apr': 78.0,
 'May': 89.0,
 'Jun': 94.0,
 'Jul': 101.0,
 'Aug': 88.0,
 'Sep': 77.0,
 'Oct': 65.0,
 'Nov': 55.0,
 'Dec': 32.0}

In [64]:
sorted(yearly_weather)

[32.0, 33.23, 45.0, 55.0, 64.0, 65.0, 77.0, 78.0, 88.0, 89.0, 94.0, 101.0]

In [65]:
max(yearly_weather)

101.0

In [66]:
min(yearly_weather)

32.0

In [68]:
# extract values from series as I extract by slicing from a list
yearly_weather[0]

33.23

In [71]:
spring_temp_mean = yearly_weather[2:6].mean()

In [72]:
spring_temp_mean

81.25

In [75]:
# extract by index value
yearly_weather[['Apr','Aug']]

Apr    78.0
Aug    88.0
dtype: float64

In [77]:
# applying methods to a series
def celcius(c):
    return ((c * 1.9) +32)

In [79]:
celsius_converted = yearly_weather.apply(celcius)

In [81]:
celsius_converted.head()

Jsn     95.137
Feb    117.500
Mar    153.600
Apr    180.200
May    201.100
dtype: float64

In [82]:
# mapping objects
mapobj = {32:320, 77:770, 88:880}

In [83]:
yearly_weather.map(mapobj)

Jsn      NaN
Feb      NaN
Mar      NaN
Apr      NaN
May      NaN
Jun      NaN
Jul      NaN
Aug    880.0
Sep    770.0
Oct      NaN
Nov      NaN
Dec    320.0
dtype: float64