# Pandas Series

In [1]:
# Similar to a NumPy array, but a Series can have axis labels (can be indexed by a label)

In [2]:
import numpy as np

In [3]:
import pandas as pd

In [5]:
labels = ['a', 'b', 'c']
my_data = [10,20,30]
arr = np.array(my_data)
d = {'a': 10, 'b': 20, 'c':30}

In [7]:
labels

['a', 'b', 'c']

In [8]:
my_data

[10, 20, 30]

In [9]:
arr

array([10, 20, 30])

In [10]:
d

{'a': 10, 'b': 20, 'c': 30}

In [11]:
# 0, 1, 2 are indices
# dtype refers to the type of the actual data
pd.Series(data=my_data)

0    10
1    20
2    30
dtype: int64

In [12]:
# can specify what you want the index to be (labels list are now the indices)
pd.Series(data=my_data, index=labels)

a    10
b    20
c    30
dtype: int64

In [13]:
pd.Series(my_data, labels)

a    10
b    20
c    30
dtype: int64

In [15]:
# pass in a numpy array
pd.Series(arr, labels)

a    10
b    20
c    30
dtype: int64

In [16]:
# can pass in a dictionary - automatically takes the keys and uses those as indices of the corresponding values
pd.Series(d)

a    10
b    20
c    30
dtype: int64

In [17]:
d

{'a': 10, 'b': 20, 'c': 30}

In [18]:
# the data in a Series doesn't have to be numbers
pd.Series(labels)

0    a
1    b
2    c
dtype: object

In [21]:
# can even use functions (not particularly practical)
pd.Series(data=[sum, print, len])

0      <built-in function sum>
1    <built-in function print>
2      <built-in function len>
dtype: object

In [22]:
ser1 = pd.Series([1,2,3,4], ['USA', 'Germany', 'USSR', 'Japan'])

In [23]:
ser1

USA        1
Germany    2
USSR       3
Japan      4
dtype: int64

In [24]:
ser2 = pd.Series([1,2,5,4], ['USA', 'Germany', 'Italy', 'Japan'])

In [25]:
ser2

USA        1
Germany    2
Italy      5
Japan      4
dtype: int64

In [26]:
# grabbing data from a series
# here, the data type of the index is string, so we use a string to extract the data
ser1['USA']

1

In [35]:
ser3 = pd.Series(data=labels)

In [36]:
ser3

0    a
1    b
2    c
dtype: object

In [38]:
# pass an integer since labels are of type int
ser3[0]

'a'

In [40]:
ser1

USA        1
Germany    2
USSR       3
Japan      4
dtype: int64

In [41]:
ser2

USA        1
Germany    2
Italy      5
Japan      4
dtype: int64

In [39]:
# can add two series together
# summed by the labels
# if a label does not exist in both series, then a null type is given
# Note: Integers are converted to float so you don't lose information is division is used
ser1 + ser2

Germany    4.0
Italy      NaN
Japan      8.0
USA        2.0
USSR       NaN
dtype: float64