# What is Pandas

Pandas is a fast, powerful, flexible and easy to use open source data analysis and manipulation tool, built on top of the Python programming language.

https://pandas.pydata.org/about/index.html

# Pandas Series
A Pandas Series is like a column in a table. It is a 1-D array holding data of any type.

In [1]:
import pandas as pd
import numpy as np

In [2]:
# List se series

country = ['India', 'Pakistan', 'Bangladesh', 'Sri Lanka', 'Nepal']

pd.Series(country)

0         India
1      Pakistan
2    Bangladesh
3     Sri Lanka
4         Nepal
dtype: object

In [3]:
# Integer
runs = [13,24,45,56,100]

pd.Series(runs)

0     13
1     24
2     45
3     56
4    100
dtype: int64

In [4]:
# custom Index
marks = [23,45,67,89,90]
subjects = ['maths', 'physics', 'chemistry', 'english', 'hindi']

pd.Series(marks, index = subjects)

maths        23
physics      45
chemistry    67
english      89
hindi        90
dtype: int64

In [6]:
# we can provide a name to this series
marks_of_students = pd.Series(marks, index = subjects, name = 'Marks of students')

In [8]:
# Series from dict

marks_dict = {'maths': 23, 'physics': 45, 'chemistry': 67, 'english': 89, 'hindi': 90}

marks_series = pd.Series(marks_dict, name = 'Marks of students in dict')
marks_series


maths        23
physics      45
chemistry    67
english      89
hindi        90
Name: Marks of students in dict, dtype: int64

Series Attributes

In [11]:
# size

marks_series.size

5

In [12]:
# dtype

marks_series.dtype

dtype('int64')

In [13]:
# name

marks_series.name

'Marks of students in dict'

In [14]:
# is_unique

marks_series.is_unique

True

In [15]:
# index

marks_series.index

Index(['maths', 'physics', 'chemistry', 'english', 'hindi'], dtype='object')

In [18]:
# values

marks_series.values

type(marks_series)

pandas.core.series.Series

Series using read_csv

In [32]:
subs = pd.read_csv('subs.csv')

In [24]:
type(pd.read_csv('subs.csv'))

pandas.core.frame.DataFrame

In [26]:
# convert dataframe to series
series = pd.read_csv('subs.csv').squeeze()
series

0       48
1       57
2       40
3       43
4       44
      ... 
360    231
361    226
362    155
363    144
364    172
Name: Subscribers gained, Length: 365, dtype: int64

In [29]:
# with 2 cols

vk = pd.read_csv('kohli_ipl.csv', index_col='match_no').squeeze()
vk

match_no
1       1
2      23
3      13
4      12
5       1
       ..
211     0
212    20
213    73
214    25
215     7
Name: runs, Length: 215, dtype: int64

In [37]:
movies = pd.read_csv('bollywood.csv', index_col='movie').squeeze()

In [33]:
# head and tail
subs.head()

Unnamed: 0,Subscribers gained
0,48
1,57
2,40
3,43
4,44


In [34]:
subs.tail()

Unnamed: 0,Subscribers gained
360,231
361,226
362,155
363,144
364,172


In [35]:
subs.head(10)

Unnamed: 0,Subscribers gained
0,48
1,57
2,40
3,43
4,44
5,46
6,33
7,40
8,44
9,74


In [36]:
vk.tail(10)

match_no
206     0
207     0
208     9
209    58
210    30
211     0
212    20
213    73
214    25
215     7
Name: runs, dtype: int64

In [38]:
# sample - this will give random values
movies.sample(10)

movie
Gangster (2006 film)         Kangana Ranaut
Aaja Nachle                   Madhuri Dixit
A Flat (film)               Jimmy Sheirgill
Fraud Saiyaan                  Arshad Warsi
Aarakshan                  Amitabh Bachchan
Gabbar Is Back                 Akshay Kumar
Dostana (2008 film)       Abhishek Bachchan
Veer (2010 film)                Salman Khan
Phillauri (film)             Anushka Sharma
Kis Kisko Pyaar Karoon         Kapil Sharma
Name: lead, dtype: object

In [39]:
# freq in the data for each item
movies

movie
Uri: The Surgical Strike                   Vicky Kaushal
Battalion 609                                Vicky Ahuja
The Accidental Prime Minister (film)         Anupam Kher
Why Cheat India                            Emraan Hashmi
Evening Shadows                         Mona Ambegaonkar
                                              ...       
Hum Tumhare Hain Sanam                    Shah Rukh Khan
Aankhen (2002 film)                     Amitabh Bachchan
Saathiya (film)                             Vivek Oberoi
Company (film)                                Ajay Devgn
Awara Paagal Deewana                        Akshay Kumar
Name: lead, Length: 1500, dtype: object

In [40]:
# each actor/actress have done how many movies

movies.value_counts() 

lead
Akshay Kumar            48
Amitabh Bachchan        45
Ajay Devgn              38
Salman Khan             31
Sanjay Dutt             26
                        ..
Tanishaa Mukerji         1
Tanuja                   1
Ankit                    1
Rakhee Gulzar            1
Geetika Vidya Ohlyan     1
Name: count, Length: 566, dtype: int64

In [42]:
# sort values 
 
vk.sort_values(ascending=False)

match_no
128    113
126    109
123    108
120    100
164    100
      ... 
93       0
130      0
206      0
207      0
211      0
Name: runs, Length: 215, dtype: int64

In [None]:
vk.sort_values(ascending=False).head(1).values[0] # we can do changes permanently by assigning it back to vk using inplace = True

np.int64(113)