In [1]:
import pandas as pd
import numpy as np

In [27]:
s = pd.Series([56,67,88,99,33])

In [28]:
s

0    56
1    67
2    88
3    99
4    33
dtype: int64

In [4]:
type(s)

pandas.core.series.Series

In [7]:
s[0]  # index

56

In [8]:
s[0:3]

0    56
1    67
2    88
dtype: int64

In [29]:
s.index

RangeIndex(start=0, stop=5, step=1)

In [7]:
marks = pd.Series([56,67,88,99,33], index=[101,102,103,103,106])  # custom index

In [10]:
marks

101    56
102    67
103    88
103    99
106    33
dtype: int64

In [11]:
marks[103]

103    88
103    99
dtype: int64

In [14]:
s = pd.Series( np.random.randint(1,100,10), np.arange(10,0,-1))

In [15]:
s

10    10
9     84
8     26
7     35
6     80
5     14
4     90
3     78
2     11
1     86
dtype: int32

In [2]:
s = pd.Series( np.random.randint(1,100,10), list('abcdefgijk'))
s

a    32
b    21
c    26
d     7
e    89
f    70
g    26
i    20
j    63
k    89
dtype: int32

### Series attributes

In [31]:
s.index

Index(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'i', 'j', 'k'], dtype='object')

In [20]:
s.values

array([17, 84, 44,  7, 89, 47, 19, 27, 86, 42])

In [21]:
s.axes

[Index(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'i', 'j', 'k'], dtype='object')]

In [22]:
s.is_unique

True

In [3]:
s.iloc[0], s.loc['e'] , s['e']

(32, 89, 89)

In [36]:
players = pd.Series( {'Dhoni':30,'Virat':32, 'Rohit':20})

In [37]:
players

Dhoni    30
Virat    32
Rohit    20
dtype: int64

In [40]:
players['Dhoni':'Rohit']

Dhoni    30
Virat    32
Rohit    20
dtype: int64

In [41]:
players.loc['Dhoni':'Virat']   # from Dhoni to Virat 

Dhoni    30
Virat    32
dtype: int64

In [44]:
players['Dhoni'] = 38
players

Dhoni    38
Virat    32
Rohit    20
dtype: int64

In [43]:
players.iloc[0:2]   # from 0 to 1 

Dhoni    38
Virat    32
dtype: int64

In [8]:
aggr = marks.describe()
print(type(aggr))
print(aggr)

<class 'pandas.core.series.Series'>
count     5.000000
mean     68.600000
std      26.120873
min      33.000000
25%      56.000000
50%      67.000000
75%      88.000000
max      99.000000
dtype: float64


In [9]:
aggr.loc[['count','mean']]

count     5.0
mean     68.6
dtype: float64

In [53]:
aggr.iloc[:3]

count     5.000000
mean     68.600000
std      26.120873
dtype: float64

In [10]:
for i,v in marks.items():
    print(i,v)

101 56
102 67
103 88
103 99
106 33


In [11]:
marks.agg([np.mean,sum, min, max])

mean     68.6
sum     343.0
min      33.0
max      99.0
dtype: float64

In [12]:
marks[106] = 67
marks.mode()[0]   # first mode - mode() returns Series 

67

In [41]:
s.tail()

6      2
7     35
8     98
9     27
10    17
dtype: int32

In [42]:
s.head(3)

1    99
2    52
3    95
dtype: int32

#### apply()

In [45]:
def ispass(v):
    return True if v >= 70 else False

In [46]:
marks.apply(ispass)

101    False
102    False
103     True
103     True
106    False
dtype: bool

In [13]:
marks.apply(lambda v: 70 if v < 70 else v)

101    70
102    70
103    88
103    99
106    70
dtype: int64

#### pct_change()

In [14]:
sales = pd.Series ( {'Jan': 2000,'Feb' : 3000, 'Mar' : 1000, 'Apr' : 4000,'May' : 5000})

In [15]:
sales.pct_change() * 100

Jan           NaN
Feb     50.000000
Mar    -66.666667
Apr    300.000000
May     25.000000
dtype: float64

In [17]:
print(marks)
print(marks.rank())

101    56
102    67
103    88
103    99
106    67
dtype: int64
101    1.0
102    2.5
103    4.0
103    5.0
106    2.5
dtype: float64


In [20]:
marks.rank(method='first',ascending=False)

101    5.0
102    3.0
103    2.0
103    1.0
106    4.0
dtype: float64

In [21]:
marks.quantile(0.75)

88.0

### Convert Series to others

In [22]:
marks.to_dict()

{101: 56, 102: 67, 103: 99, 106: 67}

In [25]:
sales.to_json()

'{"Jan":2000,"Feb":3000,"Mar":1000,"Apr":4000,"May":5000}'

### Handling NA/NAN values

In [29]:
ages = pd.Series([50,45,35,35,50,60,23,56,np.nan,35,40, np.nan])
#print(ages)

In [30]:
ages.value_counts()

35.0    3
50.0    2
40.0    1
56.0    1
23.0    1
60.0    1
45.0    1
dtype: int64

In [32]:
ages.isna().sum()    # no. of NaN values 

2

In [35]:
ages.sort_values(ascending=False,na_position='first')

8      NaN
11     NaN
5     60.0
7     56.0
4     50.0
0     50.0
1     45.0
10    40.0
9     35.0
3     35.0
2     35.0
6     23.0
dtype: float64

In [36]:
ages.isna().any() 

True

In [39]:
ages[ages.notna()]    # not operator is ~  :   ~ ages.isna()

0     50.0
1     45.0
2     35.0
3     35.0
4     50.0
5     60.0
6     23.0
7     56.0
9     35.0
10    40.0
dtype: float64

In [41]:
ages.dropna()
# ages.dropna(inplace=True)  # Drops na values from series 

0     50.0
1     45.0
2     35.0
3     35.0
4     50.0
5     60.0
6     23.0
7     56.0
8      NaN
9     35.0
10    40.0
11     NaN
dtype: float64


In [43]:
ages.fillna(ages.mean()) # replace NaN with mean 

0     50.0
1     45.0
2     35.0
3     35.0
4     50.0
5     60.0
6     23.0
7     56.0
8     42.9
9     35.0
10    40.0
11    42.9
dtype: float64

In [44]:
ages

0     50.0
1     45.0
2     35.0
3     35.0
4     50.0
5     60.0
6     23.0
7     56.0
8      NaN
9     35.0
10    40.0
11     NaN
dtype: float64

In [48]:
ages.isnull().sum()

2