# pandas Series

## Setup

In [1]:
import numpy as np
import pandas as pd

pd.set_option('display.max_columns', 10)
pd.set_option('display.max_rows', 10)

### Create Series from ndarray

In [2]:
s1 = pd.Series(np.arange(0,5))
s1

0    0
1    1
2    2
3    3
4    4
dtype: int32

### Create Series with index

In [3]:
s2 = pd.Series(np.arange(0,5), index=['a','b','c','d','e'])
s2

a    0
b    1
c    2
d    3
e    4
dtype: int32

### Assign index to Existing Series

In [4]:
s2.index = ['A','B','C','D','E']
s2 

A    0
B    1
C    2
D    3
E    4
dtype: int32

### The index and values Properties

In [5]:
s2.index

Index(['A', 'B', 'C', 'D', 'E'], dtype='object')

In [6]:
s2.values

array([0, 1, 2, 3, 4])

In [5]:
type(s2.values)

numpy.ndarray

### Create One-Item Series from Scalar

In [6]:
s3 = pd.Series(5)
s3

0    5
dtype: int64

### Create Series from list

In [9]:
s4 = pd.Series([1,2,3,4,5])
s4

0    1
1    2
2    3
3    4
4    5
dtype: int64

### Create Series from dict

In [7]:
from datetime import date
bdays = {
    'John': date(1940, 10, 9),
    'Paul': date(1942, 6, 18),
    'George': date(1943, 2, 25),
    'Ringo': date(1940, 7, 7),
}
s5 = pd.Series(bdays)
s5

John      1940-10-09
Paul      1942-06-18
George    1943-02-25
Ringo     1940-07-07
dtype: object

In [9]:
ar = np.array([1,2,3,np.nan,5,6,7,np.nan,9,10])
ar

array([ 1.,  2.,  3., nan,  5.,  6.,  7., nan,  9., 10.])

In [20]:
ar.mean()
np.nanmean(ar)

5.375

In [21]:
s6 = pd.Series(ar)
s6

# Replace Nans with mean of vector
s6.fillna(s6.mean())


0     1.000
1     2.000
2     3.000
3     5.375
4     5.000
5     6.000
6     7.000
7     5.375
8     9.000
9    10.000
dtype: float64

In [17]:
s6.mean(), sum([1,2,3,5,6,7,9,10])/8

(5.375, 5.375)

In [16]:
s6.mean(skipna=True)

5.375

### loc[] and iloc[]

In [23]:
s7 = pd.Series(np.random.sample(5), index=['a','b','c','d','e'])
s7

a    0.425040
b    0.439718
c    0.711583
d    0.486793
e    0.035760
dtype: float64

In [17]:
s7.loc['a'], s7.iloc[0]

(0.4617874536166332, 0.4617874536166332)

In [24]:
s7.loc['b':'e']

b    0.439718
c    0.711583
d    0.486793
e    0.035760
dtype: float64

In [19]:
s7.iloc[1:4]

b    0.750611
c    0.904561
d    0.534732
dtype: float64

In [20]:
s7.loc[['a','c','d']]

a    0.461787
c    0.904561
d    0.534732
dtype: float64

In [21]:
s7.iloc[[0,2,4]]

a    0.461787
c    0.904561
e    0.865766
dtype: float64

### Alignment

In [25]:
grades1 = pd.Series([17, 44, 28, 8, 3], index=['A','B','C','D','F'])
grades2 = pd.Series([76, 122, 151, 21, 0], index=['D','C','B','A','F'])

In [23]:
grades1

A    17
B    44
C    28
D     8
F     3
dtype: int64

In [24]:
grades2

D     76
C    122
B    151
A     21
F      0
dtype: int64

In [26]:
grades_all = grades1 + grades2
grades_all

A     38
B    195
C    150
D     84
F      3
dtype: int64

In [26]:
grades1 = pd.Series([17, 44, 28, 8, 3], index=['A','B','C','D','F'])
grades2 = pd.Series([76, 122, 151, 21], index=['D','C','B','A'])
grades_all = grades1 + grades2
grades_all

A     38.0
B    195.0
C    150.0
D     84.0
F      NaN
dtype: float64

In [27]:
grades_all = grades1.add(grades2, fill_value=0)
grades_all

A     38.0
B    195.0
C    150.0
D     84.0
F      3.0
dtype: float64

### Comparing Series

In [28]:
mantle1968 = pd.Series([14,1,18,54,6], index=['2B','3B','HR','RBI','SB'])
mantle1951 = pd.Series([11,5,13,65,8], index=['2B','3B','HR','RBI','SB'])
mantle1968 > mantle1951

2B      True
3B     False
HR      True
RBI    False
SB     False
dtype: bool

In [29]:
mantle1968[mantle1968 > mantle1951]

2B    14
HR    18
dtype: int64

### Element-wise Operations

In [31]:
np.random.seed(1)
exam_grades = pd.Series(np.random.randint(60,101,100))
exam_grades

0     97
1     72
2     68
3     69
4     71
      ..
95    87
96    81
97    71
98    67
99    73
Length: 100, dtype: int32

In [32]:
curved_grades = exam_grades.multiply(1.05)
curved_grades

0     101.85
1      75.60
2      71.40
3      72.45
4      74.55
       ...  
95     91.35
96     85.05
97     74.55
98     70.35
99     76.65
Length: 100, dtype: float64

In [28]:
def convert_to_letter(grade):
    if grade >= 90:
        return 'A'
    elif grade >= 80:
        return 'B'
    elif grade >= 70:
        return 'C'
    elif grade >= 65:
        return 'D'
    else:
        return 'F'

In [34]:
letter_grades = curved_grades.apply(convert_to_letter)

In [35]:
letter_grades = curved_grades.apply(convert_to_letter)
letter_grades

0     A
1     C
2     C
3     C
4     C
     ..
95    A
96    B
97    C
98    C
99    C
Length: 100, dtype: object