# PANDAS

### Importing Pandas

In [1]:
import numpy as np
import pandas as pd

## series from lists

In [3]:
country = ['India', 'Nepal', 'USA', "China"]
pd.Series(country)

0    India
1    Nepal
2      USA
3    China
dtype: object

In [4]:
#integer series:

runs = [13,54,100,76,0,1]
pd.Series(runs)

0     13
1     54
2    100
3     76
4      0
5      1
dtype: int64

In [6]:
#custom index:
marks = [67,57,89,100]
subjects = ['maths', 'english', 'science', 'hindi']

pd.Series(marks, index=subjects)

maths       67
english     57
science     89
hindi      100
dtype: int64

In [7]:
#setting name to seires:

pd.Series(marks, index=subjects, name='Shubham ke marks')

maths       67
english     57
science     89
hindi      100
Name: Shubham ke marks, dtype: int64

## creating series from dictionary

In [9]:
marks = {
    'maths' : 67,
    'english' : 57,
    'science' : 89,
    'hindi' : 100
}

marks_series = pd.Series(marks, name = 'Shubham ke marks')
print(marks_series)

maths       67
english     57
science     89
hindi      100
Name: Shubham ke marks, dtype: int64


## Series attributes

In [12]:
#size

marks_series.size

4

In [14]:
#name

marks_series.name

'Shubham ke marks'

In [15]:
#dtype

marks_series.dtype

dtype('int64')

In [18]:
#is_unique

print(marks_series.is_unique)

print(pd.Series([1,1,1,2,3,4,2,4,5,5]).is_unique)

True
False


In [19]:
#index

marks_series.index

Index(['maths', 'english', 'science', 'hindi'], dtype='object')

In [24]:
runs = [1,4,2,100]
runs_series = pd.Series(run)

runs_series.index

RangeIndex(start=0, stop=4, step=1)

In [25]:
#values

marks_series.values

array([ 67,  57,  89, 100])

In [26]:
runs_series.values

array([  1,   4,   2, 100])

## Creating series using read_csv

In [None]:
#with one col dataset:

#subs.csv

pd.read_csv('datasets\subs.csv')

Unnamed: 0,Subscribers gained
0,48
1,57
2,40
3,43
4,44
...,...
360,231
361,226
362,155
363,144


In [45]:
#in series form:

subs_series = pd.read_csv('datasets\subs.csv')
subs_series = subs_series.squeeze()

print(type(subs_series))
print(subs_series)


<class 'pandas.core.series.Series'>
0       48
1       57
2       40
3       43
4       44
      ... 
360    231
361    226
362    155
363    144
364    172
Name: Subscribers gained, Length: 365, dtype: int64


In [81]:
#with 2 cols dataset:

kohli_series = pd.read_csv('datasets\kohli_ipl.csv', index_col = 'match_no').squeeze()
print(kohli_series)

match_no
1       1
2      23
3      13
4      12
5       1
       ..
211     0
212    20
213    73
214    25
215     7
Name: runs, Length: 215, dtype: int64


In [56]:
#bollywood.csv:

bolly_series = pd.read_csv('datasets\movies.csv', index_col = 'movie').squeeze()
print(bolly_series)

movie
Uri: The Surgical Strike                   Vicky Kaushal
Battalion 609                                Vicky Ahuja
The Accidental Prime Minister (film)         Anupam Kher
Why Cheat India                            Emraan Hashmi
Evening Shadows                         Mona Ambegaonkar
                                              ...       
Hum Tumhare Hain Sanam                    Shah Rukh Khan
Aankhen (2002 film)                     Amitabh Bachchan
Saathiya (film)                             Vivek Oberoi
Company (film)                                Ajay Devgn
Awara Paagal Deewana                        Akshay Kumar
Name: lead, Length: 1500, dtype: object


## Series Methods:

#### head and tail

In [58]:
subs_series.head()

0    48
1    57
2    40
3    43
4    44
Name: Subscribers gained, dtype: int64

In [61]:
kohli_series.head(3)

match_no
1     1
2    23
3    13
Name: runs, dtype: int64

In [60]:
subs_series.tail()

360    231
361    226
362    155
363    144
364    172
Name: Subscribers gained, dtype: int64

In [62]:
kohli_series.tail(3)

match_no
213    73
214    25
215     7
Name: runs, dtype: int64

#### sample

In [65]:
bolly_series.sample()

movie
Raazi    Alia Bhatt
Name: lead, dtype: object

In [67]:
bolly_series.sample(3)

movie
Bhoot Unkle    Jackie Shroff
Kaalakaandi    Saif Ali Khan
B.A. Pass       Shadab Kamal
Name: lead, dtype: object

#### value_counts

In [68]:
bolly_series.value_counts()

lead
Akshay Kumar        48
Amitabh Bachchan    45
Ajay Devgn          38
Salman Khan         31
Sanjay Dutt         26
                    ..
Diganth              1
Parveen Kaur         1
Seema Azmi           1
Akanksha Puri        1
Edwin Fernandes      1
Name: count, Length: 566, dtype: int64

#### sort_values()

In [69]:
kohli_series.sort_values()

match_no
87       0
211      0
207      0
206      0
91       0
      ... 
164    100
120    100
123    108
126    109
128    113
Name: runs, Length: 215, dtype: int64

In [86]:
highest_score = kohli_series.sort_values(ascending = False).head(1).values[0]
print(highest_score)

113


In [76]:
# to do permanent changes in the series: .sort_values(inplace=True)

#### sort_index

In [77]:
print(bolly_series)

movie
Uri: The Surgical Strike                   Vicky Kaushal
Battalion 609                                Vicky Ahuja
The Accidental Prime Minister (film)         Anupam Kher
Why Cheat India                            Emraan Hashmi
Evening Shadows                         Mona Ambegaonkar
                                              ...       
Hum Tumhare Hain Sanam                    Shah Rukh Khan
Aankhen (2002 film)                     Amitabh Bachchan
Saathiya (film)                             Vivek Oberoi
Company (film)                                Ajay Devgn
Awara Paagal Deewana                        Akshay Kumar
Name: lead, Length: 1500, dtype: object


In [78]:
bolly_series.sort_index()

movie
1920 (film)                   Rajniesh Duggall
1920: London                     Sharman Joshi
1920: The Evil Returns             Vicky Ahuja
1971 (2007 film)                Manoj Bajpayee
2 States (2014 film)              Arjun Kapoor
                                   ...        
Zindagi 50-50                      Veena Malik
Zindagi Na Milegi Dobara        Hrithik Roshan
Zindagi Tere Naam           Mithun Chakraborty
Zokkomon                       Darsheel Safary
Zor Lagaa Ke...Haiya!            Meghan Jadhav
Name: lead, Length: 1500, dtype: object

### Series Maths Methods

In [88]:
#count:

print(kohli_series.count())

215


In [90]:
# sums and product:

print(subs_series.sum())

49510


In [91]:
subs_series.product()

np.int64(0)

In [92]:
#mean/median/mode/std/var

print(subs_series.mean())

135.64383561643837


In [93]:
print(kohli_series.median())

24.0


In [94]:
 print(bolly_series.mode())

0    Akshay Kumar
Name: lead, dtype: object


In [95]:
 print(subs_series.std())

62.67502303725269


In [96]:
print(kohli_series.var())

688.0024777222344


In [97]:
#min/max

subs_series.max()


396

In [98]:
kohli_series.min()

0

In [99]:
#describe:

kohli_Series.describe()

NameError: name 'kohli_Series' is not defined