### What is Pandas

Pandas is a fast, powerful, flexible and easy to use open source data analysis and manipulation tool,
built on top of the Python programming language.

https://pandas.pydata.org/about/index.html

### Pandas Series

A Pandas Series is like a column in a table. It is a 1-D array holding data of any type.

In [1]:
import numpy as np
import pandas as pd

### Series from lists

In [2]:
# string
country = ['India','Pakistan','USA','Nepal','Srilanka']

pd.Series(country)

Unnamed: 0,0
0,India
1,Pakistan
2,USA
3,Nepal
4,Srilanka


In [4]:
# integers
runs = [13,24,56,78,100]

runs_ser = pd.Series(runs)
runs_ser

Unnamed: 0,0
0,13
1,24
2,56
3,78
4,100


In [5]:
# custom index
marks = [67,57,89,100]
subjects = ['maths','english','science','hindi']

pd.Series(marks,index=subjects)

Unnamed: 0,0
maths,67
english,57
science,89
hindi,100


In [6]:
# setting a name
marks = pd.Series(marks,index=subjects,name='Nitish ke marks')
marks

Unnamed: 0,Nitish ke marks
maths,67
english,57
science,89
hindi,100


### Series from dict

In [7]:
marks = {
    'maths':67,
    'english':57,
    'science':89,
    'hindi':100
}

marks_series = pd.Series(marks,name='nitish ke marks')
marks_series

Unnamed: 0,nitish ke marks
maths,67
english,57
science,89
hindi,100


### Series Attributes

In [8]:
# size
marks_series.size

4

In [9]:
# dtype
marks_series.dtype

dtype('int64')

In [10]:
# name
marks_series.name

'nitish ke marks'

In [11]:
# is_unique
marks_series.is_unique

True

In [12]:
pd.Series([1,1,2,3,4,5]).is_unique

False

In [13]:
# index
marks_series.index

Index(['maths', 'english', 'science', 'hindi'], dtype='object')

In [14]:
runs_ser.index

RangeIndex(start=0, stop=5, step=1)

In [15]:
type(runs_ser.index)

pandas.core.indexes.range.RangeIndex

In [16]:
# values
marks_series.values

array([ 67,  57,  89, 100])

### Series using read_csv

In [23]:
# with one col
subs = pd.read_csv('/content/subs.csv', header=None).squeeze()
subs

Unnamed: 0,0
0,Subscribers gained
1,48
2,57
3,40
4,43
5,44
6,46
7,33
8,40
9,44


In [35]:
# with 2 cols
vk = pd.read_csv('/content/kohli_ipl.csv', index_col=0, header=None).squeeze()
vk

Unnamed: 0_level_0,1
0,Unnamed: 1_level_1
match_no,runs
1,1
2,23
3,13
4,12
5,1
6,9
7,34
8,0
9,21


In [38]:
movies = pd.read_csv('/content/bollywood.csv',index_col=0,header=None).squeeze()
movies

Unnamed: 0_level_0,1
0,Unnamed: 1_level_1
movie,lead
Uri: The Surgical Strike,Vicky Kaushal
Battalion 609,Vicky Ahuja
The Accidental Prime Minister (film),Anupam Kher
Why Cheat India,Emraan Hashmi
Evening Shadows,Mona Ambegaonkar
Soni (film),Geetika Vidya Ohlyan
Fraud Saiyaan,Arshad Warsi
Bombairiya,Radhika Apte
Manikarnika: The Queen of Jhansi,Kangana Ranaut


### Series methods

In [39]:
# head and tail
subs.head()

Unnamed: 0,0
0,Subscribers gained
1,48
2,57
3,40
4,43


In [40]:
vk.head(3)

Unnamed: 0_level_0,1
0,Unnamed: 1_level_1
match_no,runs
1,1
2,23


In [41]:
vk.tail(10)

Unnamed: 0_level_0,1
0,Unnamed: 1_level_1
206,0
207,0
208,9
209,58
210,30
211,0
212,20
213,73
214,25
215,7


In [42]:
# sample
movies.sample(5)

Unnamed: 0_level_0,1
0,Unnamed: 1_level_1
Aetbaar,Amitabh Bachchan
Maine Pyaar Kyun Kiya?,Salman Khan
Gollu Aur Pappu,Dimple Kapadia
Darwaaza Bandh Rakho,Ishrat Ali
3 Idiots,Aamir Khan


In [43]:
# value_counts -> movies
movies.value_counts()

Unnamed: 0_level_0,count
1,Unnamed: 1_level_1
Akshay Kumar,48
Amitabh Bachchan,45
Ajay Devgn,38
Salman Khan,31
Sanjay Dutt,26
Shah Rukh Khan,22
Emraan Hashmi,21
Saif Ali Khan,18
John Abraham,18
Shahid Kapoor,17


In [59]:
# sort_values -> inplace
vk.sort_values()

Unnamed: 0_level_0,1
0,Unnamed: 1_level_1
207,0
93,0
91,0
87,0
130,0
8,0
135,0
206,0
211,0
113,1


In [60]:
# sort_values -> inplace
vk.sort_values(ascending=False)

Unnamed: 0_level_0,1
0,Unnamed: 1_level_1
match_no,runs
82,99
81,93
145,92
178,90
66,9
64,9
6,9
208,9
171,9


In [61]:
# sort_values -> inplace
vk.sort_values(ascending=False).head()

Unnamed: 0_level_0,1
0,Unnamed: 1_level_1
match_no,runs
82,99
81,93
145,92
178,90


In [62]:
# sort_values -> inplace
vk.sort_values(ascending=False).head().values[2]

'93'

In [65]:
# sort_index -> inplace -> movies
movies.sort_index(inplace=True)

In [66]:
movies

Unnamed: 0_level_0,1
0,Unnamed: 1_level_1
1920 (film),Rajniesh Duggall
1920: London,Sharman Joshi
1920: The Evil Returns,Vicky Ahuja
1971 (2007 film),Manoj Bajpayee
2 States (2014 film),Arjun Kapoor
22 Yards,Barun Sobti
3 A.M. (2014 film),Salil Acharya
3 Deewarein,Naseeruddin Shah
3 Idiots,Aamir Khan
3 Storeys,Aisha Ahmed


### Series Maths Methods

In [68]:
# count
vk.count()

216

In [70]:
# sum -> product
subs.sum()

'Subscribers gained4857404344463340447449564942394454514037404550563549468861425650504976123108110566265663366821261098883108101128111961019910572937610470809910413611512311214587849280707013212398968886929211012010572849078818989938695837287100727967719077100886595788996811058066736815990837677666485136135717975727810993847487128105105828610088104871191189711411710210196901141181131231291401802252492653062612221701601141751701151521051059311010213676105130146146105146150114113117108103108138124139117103108146131113108125919798119115123119127135137156160122111131192224254214236261247207254301233168179154152153202185158173180184259196213210179182216183185159180160154147138157134184134132119153185138150134160170175228237276173290295245241267269211186206202176236222203190190170156172174157178189174157173162151115152150186156230186150153167183244158155167157196243227259183166154396312268229197188176221225221202171185184276258219212192164192220196197204149156177210209231226155144172'

In [73]:
# mean -> median -> mode -> std -> var
# subs.mean()
# print(vk.median())
print(movies.mode())
print(subs.std())
print(vk.var())

0    Akshay Kumar
Name: 1, dtype: object


TypeError: could not convert string to float: 'Subscribers gained'

In [74]:
# min/max
subs.max()

'Subscribers gained'

In [75]:
# describe
subs.describe()

Unnamed: 0,0
count,366
unique,181
top,105
freq,9
