In [1]:
import pandas as pd
import numpy as np

In [2]:
s = pd.Series([56,67,88,99,33])

In [3]:
s

0    56
1    67
2    88
3    99
4    33
dtype: int64

In [4]:
type(s)

pandas.core.series.Series

In [4]:
s[0]  # index

56

In [5]:
s[0:3]

0    56
1    67
2    88
dtype: int64

In [6]:
s.index

RangeIndex(start=0, stop=5, step=1)

In [2]:
marks = pd.Series([56,67,88,99,33], index=[101,102,103,103,106])  # custom index

In [8]:
marks

101    56
102    67
103    88
103    99
106    33
dtype: int64

In [9]:
marks[103]

103    88
103    99
dtype: int64

In [10]:
s = pd.Series(np.random.randint(1,100,10), np.arange(10,0,-1))

In [11]:
s

10    42
9     40
8     47
7     88
6     41
5     96
4     27
3     66
2      9
1     20
dtype: int32

In [15]:
s = pd.Series( np.random.randint(1,100,10), list('abcdefgijk'))
s

a    33
b    24
c    43
d     2
e    47
f    57
g    46
i     2
j    82
k    38
dtype: int32

### Series attributes

In [16]:
s.index

Index(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'i', 'j', 'k'], dtype='object')

In [17]:
s.values

array([33, 24, 43,  2, 47, 57, 46,  2, 82, 38])

In [19]:
s.axes

[Index(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'i', 'j', 'k'], dtype='object')]

In [20]:
s.is_unique

False

In [3]:
s.iloc[0], s.loc['e'] , s['e']

(32, 89, 89)

In [24]:
players = pd.Series( {'Dhoni':30,'Virat':32, 'Rohit':20, 'Jageda':25})

In [25]:
players

Dhoni     30
Virat     32
Rohit     20
Jageda    25
dtype: int64

In [23]:
players.index

Index(['Dhoni', 'Virat', 'Rohit'], dtype='object')

In [26]:
players['Dhoni':'Rohit']

Dhoni    30
Virat    32
Rohit    20
dtype: int64

In [41]:
players.loc['Dhoni':'Virat']   # from Dhoni to Virat 

Dhoni    30
Virat    32
dtype: int64

In [44]:
players['Dhoni'] = 38
players

Dhoni    38
Virat    32
Rohit    20
dtype: int64

In [33]:
players[ ['Dhoni',"Jageda"]]

Dhoni     30
Jageda    25
dtype: int64

In [27]:
players.iloc[0:2]   # from 0 to 1 

Dhoni    30
Virat    32
dtype: int64

In [29]:
aggr = marks.describe()
print(type(aggr))
print(aggr)

<class 'pandas.core.series.Series'>
count     5.000000
mean     68.600000
std      26.120873
min      33.000000
25%      56.000000
50%      67.000000
75%      88.000000
max      99.000000
dtype: float64


In [31]:
aggr['25%':'75%']

25%    56.0
50%    67.0
75%    88.0
dtype: float64

In [32]:
aggr.loc[['count','mean']]

count     5.0
mean     68.6
dtype: float64

In [34]:
aggr.iloc[:3]

count     5.000000
mean     68.600000
std      26.120873
dtype: float64

In [35]:
for i,v in marks.items():
    print(i,v)

101 56
102 67
103 88
103 99
106 33


In [43]:
marks.agg([np.mean,sum, min, max, len])

mean     68.6
sum     343.0
min      33.0
max      99.0
len       5.0
dtype: float64

In [45]:
marks[106] = 67
marks.mode()[0]   # first mode - mode() returns Series 

67

In [46]:
s.tail()

f    57
g    46
i     2
j    82
k    38
dtype: int32

In [47]:
s.head(3)

a    33
b    24
c    43
dtype: int32

#### sorting

In [52]:
s.sort_values()   # returns a sorted series 

d     2
i     2
b    24
a    33
k    38
c    43
g    46
e    47
f    57
j    82
dtype: int32

#### apply()

In [53]:
def ispass(v):
    return True if v >= 70 else False

In [54]:
marks.apply(ispass)

101    False
102    False
103     True
103     True
106    False
dtype: bool

In [55]:
marks.apply(lambda v: 70 if v < 70 else v)

101    70
102    70
103    88
103    99
106    70
dtype: int64

#### pct_change()

In [14]:
sales = pd.Series ( {'Jan': 2000,'Feb' : 3000, 'Mar' : 1000, 'Apr' : 4000,'May' : 5000})

In [15]:
sales.pct_change() * 100

Jan           NaN
Feb     50.000000
Mar    -66.666667
Apr    300.000000
May     25.000000
dtype: float64

In [6]:
marks = pd.Series((70,90,30,50,60))
print(marks)
print(marks.rank())

0    70
1    90
2    30
3    50
4    60
dtype: int64
0    4.0
1    5.0
2    1.0
3    2.0
4    3.0
dtype: float64


In [7]:
marks.rank(method='first',ascending=False)

0    2.0
1    1.0
2    5.0
3    4.0
4    3.0
dtype: float64

In [9]:
marks = pd.Series((70,90,30,70,60))
marks.rank()

0    3.5
1    5.0
2    1.0
3    3.5
4    2.0
dtype: float64

In [12]:
marks.rank(method="max")

0    4.0
1    5.0
2    1.0
3    4.0
4    2.0
dtype: float64

In [13]:
marks.quantile(0.75)

70.0

### Convert Series to others

In [14]:
marks.to_dict()

{0: 70, 1: 90, 2: 30, 3: 70, 4: 60}

In [16]:
marks.to_json()

'{"0":70,"1":90,"2":30,"3":70,"4":60}'

In [17]:
marks.to_csv("marks.csv")

  """Entry point for launching an IPython kernel.


### Handling NA/NAN values

In [18]:
ages = pd.Series([50,45,35,35,50,60,23,56,np.nan,35,40, np.nan])
#print(ages)

In [33]:
ages.value_counts(dropna=False)

35.0    3
NaN     2
50.0    2
40.0    1
56.0    1
23.0    1
60.0    1
45.0    1
dtype: int64

In [22]:
ages.isna()

0     False
1     False
2     False
3     False
4     False
5     False
6     False
7     False
8      True
9     False
10    False
11     True
dtype: bool

In [23]:
ages.isna().sum()    # no. of NaN values 

2

In [25]:
ages.sort_values(ascending=False,na_position='first')

5     60.0
7     56.0
4     50.0
0     50.0
1     45.0
10    40.0
9     35.0
3     35.0
2     35.0
6     23.0
8      NaN
11     NaN
dtype: float64

In [26]:
ages.isna().any() 

True

In [28]:
ages.notna()

0      True
1      True
2      True
3      True
4      True
5      True
6      True
7      True
8     False
9      True
10     True
11    False
dtype: bool

In [27]:
ages[ages.notna()]    # not operator is ~  :   ~ ages.isna()

0     50.0
1     45.0
2     35.0
3     35.0
4     50.0
5     60.0
6     23.0
7     56.0
9     35.0
10    40.0
dtype: float64

In [29]:
ages.dropna()
# ages.dropna(inplace=True)  # Drops na values from series 

0     50.0
1     45.0
2     35.0
3     35.0
4     50.0
5     60.0
6     23.0
7     56.0
9     35.0
10    40.0
dtype: float64

In [30]:
ages.fillna(ages.median()) # replace NaN with mean 

0     50.0
1     45.0
2     35.0
3     35.0
4     50.0
5     60.0
6     23.0
7     56.0
8     42.5
9     35.0
10    40.0
11    42.5
dtype: float64

In [32]:
ages.notna().sum()

10