# PANDAS SERIES

## Importing Library

In [1]:
import pandas as pd
import numpy as np

In [2]:
country = ['India','Srilanka','Australia','USA','China']

In [3]:
pd.Series(country)

0        India
1     Srilanka
2    Australia
3          USA
4        China
dtype: object

## Custom Index

In [4]:
marks = [67,68,64,33,27]
subjects = ['Maths','Science','Hindi','English','Social Science']

In [5]:
pd.Series(marks)

0    67
1    68
2    64
3    33
4    27
dtype: int64

In [6]:
pd.Series(marks,index=subjects)

Maths             67
Science           68
Hindi             64
English           33
Social Science    27
dtype: int64

In [7]:
# Setting a name: 
pd.Series(marks,index=subjects,name="Mayank ke marks")


Maths             67
Science           68
Hindi             64
English           33
Social Science    27
Name: Mayank ke marks, dtype: int64

## Series from Dictionary

In [8]:
marks = {
    'maths' : 67,
    'english' : 89,
    'science' : 76,
    'Hindi' : 76
}

In [9]:
marks_series = pd.Series(marks,name="mnku ke marks")

In [10]:
marks_series

maths      67
english    89
science    76
Hindi      76
Name: mnku ke marks, dtype: int64

## Series Attributes

In [11]:
# Size
marks_series.size

4

In [12]:
# dtype
marks_series.dtype

dtype('int64')

In [13]:
# name 
marks_series.name

'mnku ke marks'

In [14]:
# is_unique
marks_series.is_unique

False

In [15]:
# index 
marks_series.index

Index(['maths', 'english', 'science', 'Hindi'], dtype='object')

In [16]:
# values
marks_series.values

array([67, 89, 76, 76])

## Series using read_csv

In [17]:
# with one column
s = pd.read_csv('subs.csv').squeeze(True)

In [18]:
s

0       48
1       57
2       40
3       43
4       44
      ... 
360    231
361    226
362    155
363    144
364    172
Name: Subscribers gained, Length: 365, dtype: int64

In [19]:
type(s)

pandas.core.series.Series

In [20]:
runs = pd.read_csv('kohli_ipl.csv',index_col='match_no').squeeze(True)

In [21]:
type(runs)

pandas.core.series.Series

In [22]:
movies = pd.read_csv('bollywood.csv',index_col='movie').squeeze(True)

In [23]:
movies

movie
Uri: The Surgical Strike                   Vicky Kaushal
Battalion 609                                Vicky Ahuja
The Accidental Prime Minister (film)         Anupam Kher
Why Cheat India                            Emraan Hashmi
Evening Shadows                         Mona Ambegaonkar
                                              ...       
Hum Tumhare Hain Sanam                    Shah Rukh Khan
Aankhen (2002 film)                     Amitabh Bachchan
Saathiya (film)                             Vivek Oberoi
Company (film)                                Ajay Devgn
Awara Paagal Deewana                        Akshay Kumar
Name: lead, Length: 1500, dtype: object

In [24]:
type(movies)

pandas.core.series.Series

## Series Methods

In [25]:
# head and tail

s.head() # by default it will give us top five rows of our series or dataframe

0    48
1    57
2    40
3    43
4    44
Name: Subscribers gained, dtype: int64

In [26]:
runs.head(3) # we can specify the values we want to fetch from the head function by providing number.

match_no
1     1
2    23
3    13
Name: runs, dtype: int64

In [27]:
runs.tail() # by default it will give us last five rows of our series or dataframe.

match_no
211     0
212    20
213    73
214    25
215     7
Name: runs, dtype: int64

In [28]:
runs.tail(7) # same as head()

match_no
209    58
210    30
211     0
212    20
213    73
214    25
215     7
Name: runs, dtype: int64

In [29]:
# sample()

movies.sample() # it will show random rows

movie
Super Nani    Rekha
Name: lead, dtype: object

In [30]:
movies.sample(5) # we can specify how much random rows we want to see.

movie
Zameen (2003 film)           Ajay Devgn
Ugly (film)                  Rahul Bhat
Double Dhamaal              Sanjay Dutt
Rakhtbeej                    Julia Datt
Baar Baar Dekho       Sidharth Malhotra
Name: lead, dtype: object

In [31]:
# value_counts -> movies
movies.value_counts()

lead
Akshay Kumar            48
Amitabh Bachchan        45
Ajay Devgn              38
Salman Khan             31
Sanjay Dutt             26
                        ..
Tanishaa Mukerji         1
Tanuja                   1
Ankit                    1
Rakhee Gulzar            1
Geetika Vidya Ohlyan     1
Name: count, Length: 566, dtype: int64

In [38]:
# sort_values -> inplace
runs.sort_values(ascending=False).head(1).values[0]
runs

match_no
1       1
2      23
3      13
4      12
5       1
       ..
211     0
212    20
213    73
214    25
215     7
Name: runs, Length: 215, dtype: int64

In [39]:
runs.sort_values(ascending=False)

match_no
128    113
126    109
123    108
120    100
164    100
      ... 
93       0
130      0
206      0
207      0
211      0
Name: runs, Length: 215, dtype: int64

In [49]:
# sort_index -> inplace -> movies
movies.sort_index(inplace=True)


In [50]:
movies

movie
1920 (film)                   Rajniesh Duggall
1920: London                     Sharman Joshi
1920: The Evil Returns             Vicky Ahuja
1971 (2007 film)                Manoj Bajpayee
2 States (2014 film)              Arjun Kapoor
                                   ...        
Zindagi 50-50                      Veena Malik
Zindagi Na Milegi Dobara        Hrithik Roshan
Zindagi Tere Naam           Mithun Chakraborty
Zokkomon                       Darsheel Safary
Zor Lagaa Ke...Haiya!            Meghan Jadhav
Name: lead, Length: 1500, dtype: object

In [45]:
runs = runs.copy()

In [46]:
runs.sort_values(inplace=True)

In [47]:
runs

match_no
8        0
87       0
93       0
91       0
206      0
      ... 
164    100
120    100
123    108
126    109
128    113
Name: runs, Length: 215, dtype: int64

## Serires Maths Methods

In [53]:
# count

movies.count()

np.int64(1500)

In [56]:
# sum -> product

s.sum()
s.product()

np.int64(0)

In [61]:
# mean -> median -> mode -> std -> var
s.mean()
print(runs.median())
print(movies.mode())
print(s.std())
print(runs.var())

24.0
0    Akshay Kumar
Name: lead, dtype: object
62.6750230372527
688.0024777222343


In [64]:
# min/max

s.min()
s.max()

np.int64(396)

In [65]:
# describe

runs.describe()

count    215.000000
mean      30.855814
std       26.229801
min        0.000000
25%        9.000000
50%       24.000000
75%       48.000000
max      113.000000
Name: runs, dtype: float64

## Series Indexing

In [68]:
# integer indexing

x = pd.Series([12,13,14,15,16,17,18,72,89,9])

x[2]

np.int64(14)

In [74]:
# Negative indexing 
x[-1]

KeyError: -1

In [75]:
movies[0]

  movies[0]


'Rajniesh Duggall'

In [78]:
# Slicing

runs[5:16]

match_no
207    0
135    0
130    0
211    0
106    1
204    1
113    1
77     1
1      1
5      1
75     1
Name: runs, dtype: int64

In [80]:
# negative slicing

runs[-5:]

match_no
164    100
120    100
123    108
126    109
128    113
Name: runs, dtype: int64

In [81]:
movies[-5:]

movie
Zindagi 50-50                      Veena Malik
Zindagi Na Milegi Dobara        Hrithik Roshan
Zindagi Tere Naam           Mithun Chakraborty
Zokkomon                       Darsheel Safary
Zor Lagaa Ke...Haiya!            Meghan Jadhav
Name: lead, dtype: object

In [82]:
movies[::2]

movie
1920 (film)                 Rajniesh Duggall
1920: The Evil Returns           Vicky Ahuja
2 States (2014 film)            Arjun Kapoor
3 A.M. (2014 film)             Salil Acharya
3 Idiots                          Aamir Khan
                                  ...       
Zero (2018 film)              Shah Rukh Khan
Zila Ghaziabad                  Vivek Oberoi
Zindaggi Rocks                  Sushmita Sen
Zindagi Na Milegi Dobara      Hrithik Roshan
Zokkomon                     Darsheel Safary
Name: lead, Length: 750, dtype: object

In [83]:
# Fancy indexing
runs[[1,3,4,5]]

match_no
1     1
3    13
4    12
5     1
Name: runs, dtype: int64

## Editing Serires

In [84]:
# editing by index

marks_series[1] = 100

marks_series

  marks_series[1] = 100


maths       67
english    100
science     76
Hindi       76
Name: mnku ke marks, dtype: int64

In [85]:
# what if an index does not exist

marks_series['sst'] = 67

In [86]:
marks_series

maths       67
english    100
science     76
Hindi       76
sst         67
Name: mnku ke marks, dtype: int64