![](NotebookHeader.jpg)

## Course : Python for Data Science¶
## Module 3 Lesson 2 : Pandas - Data manipulation using Series

**Example 1 : Getting Started with Series** 

In [1]:
import numpy as np 
import pandas as pd           # Importing Pandas library
from pandas import Series  

### Numpy from previous example and limitation

In [2]:
marks = [20,30,40, 550, 600]
np_marks = np.array(marks)
np_marks                  

array([ 20,  30,  40, 550, 600])

In [3]:
np_marks.index    # Index values make the difference - should get an error

AttributeError: 'numpy.ndarray' object has no attribute 'index'

### Vectorized operations but indexing values is numeric 0 to n-1 based.
### In real business use cases, we need to be able to access data using something with intelligence like customer number etc.

In [5]:
s1 = Series([10.0, 20.1, 50, 100])
s1  

0     10.0
1     20.1
2     50.0
3    100.0
dtype: float64

In [6]:
s1.index

RangeIndex(start=0, stop=4, step=1)

In [10]:
type(s1)

pandas.core.series.Series

In [7]:
s1.values

array([ 10. ,  20.1,  50. , 100. ])

In [8]:
s1.values.dtype

dtype('float64')

### Series object with meaningful index

In [12]:
s2 = Series([10, 20, 50, 100],index=[1000, 1005, 1010, 1015])
s2

1000     10
1005     20
1010     50
1015    100
dtype: int64

In [13]:
# One Dimensional Array
s1 = Series([10, 20, 50, 100],index=['low', 'medium', 'high', 'very high'])
s1

low           10
medium        20
high          50
very high    100
dtype: int64

In [14]:
s1['medium']      # Indexing a value

20

In [15]:
indexnames = s1.index
indexnames

Index(['low', 'medium', 'high', 'very high'], dtype='object')

In [16]:
# In a Series, You can use assigned indexes to access values - single or set
s1[['high','medium']]       # Note the syntax when retriving more than one value 

high      50
medium    20
dtype: int64

In [17]:
s1['high']  = 70
s1

low           10
medium        20
high          70
very high    100
dtype: int64

### Conditions and Slicing

In [18]:
s1 = Series([10, 20, 50, 100],index=['low', 'medium', 'high', 'very high'])
s1[0:2]

low       10
medium    20
dtype: int64

In [19]:
s1[s1 == 100] 

very high    100
dtype: int64

In [20]:
#1 : Boolean array
cond = s1 < 50      # This resolves to boolean array of values
s1[cond]            # To retrieve data values corresponding to a condition

low       10
medium    20
dtype: int64

In [21]:
s1[cond].index      # To retrieve index values corresponding to a condition

Index(['low', 'medium'], dtype='object')

### Vectorized operations like in Numpy are possible*

In [22]:
#2 : Scalar Operations 
s2 = s1 * 3            # To retrieve data values corresponding to a condition
s2

low           30
medium        60
high         150
very high    300
dtype: int64

In [23]:
#3 : Applying math functions  
s3 = np.power(s1, 2)            # To retrieve data values corresponding to a condition
s3
# s1 = [10, 20, 50, 100]

low            100
medium         400
high          2500
very high    10000
dtype: int64

In [None]:
np.power(2,3)

In [25]:
d = {'a':1, 'b':2}                                      
s4 = Series(d)
s4   

a    1
b    2
dtype: int64

In [26]:
s4.index             # Observe that the index is keys of the dictionary 

Index(['a', 'b'], dtype='object')

### Reassigning Index names 
#### For example we may load with input index and retrive ids from another source for processing like CRM

In [27]:
segments = {'low':10, 'medium': 20, 'high': 700, 'very high':1000}  # Dictionary of customer segments

In [28]:
s1_segments = Series(segments)    # Series will use keys of dictionary as its index.
s1_segments

low            10
medium         20
high          700
very high    1000
dtype: int64

In [29]:
segment_names = ['Mass Volume', 'Average', 'premium', 'Luxury']

In [30]:
s1_segments.index = segment_names        # Assigning custom index to Series created from a dictionary.

In [31]:
s1_segments['Average']

20

### More operations / methods

In [32]:
s1_segments.isnull()     # Example Functions to check each element for null

Mass Volume    False
Average        False
premium        False
Luxury         False
dtype: bool

In [34]:
s1_segments.append(Series({'Ultra Luxury' :500}))      # adding or concatenating another series
s1_segments

Mass Volume      10
Average          20
premium         700
Luxury         1000
dtype: int64

In [35]:
s2 = s1_segments 
s2.append(Series({'New Segment':100}))      # adding or concatenating another series
s1_segments

Mass Volume      10
Average          20
premium         700
Luxury         1000
dtype: int64

In [41]:
s1_segments = s1_segments.append(Series({'Ultra Luxury' :500}), ignore_index=True)  # In cases where values 

In [43]:
segment_names = ['Mass Volume', 'Average', 'premium', 'Luxury','Ultra Luxury']
s1_segments.index = segment_names
s1_segments

Mass Volume       10
Average           20
premium          700
Luxury          1000
Ultra Luxury     500
dtype: int64

### Operations will be aligned per Index values

In [44]:
# Say this is Standard price for key segments or multiple
segments = pd.Series({'low':10, 'medium': 20, 'high': 70, 'very high':100, 'Super High':200}) 

In [46]:
# Say this is Seris with Discounts for key segments 
discounts = pd.Series({ 'medium': 5, 'low':0, 'high': 15, 'very high':25})

In [50]:
net_price = segments - discounts
net_price 

Super High     NaN
high          55.0
low           10.0
medium        15.0
very high     75.0
dtype: float64

In [None]:
net_price    # Observe that addition is aligned on index values

In [51]:
net_price[net_price.notnull()]       # Selecting only non-null entries

high         55.0
low          10.0
medium       15.0
very high    75.0
dtype: float64

In [None]:
segments.name

In [52]:
segments.index.name = "Marketing Segment"
discounts.index.name = "Disc Tier"

In [53]:
discounts

Disc Tier
medium        5
low           0
high         15
very high    25
dtype: int64