In [1]:
import pandas as pd

## Difference within NUMPY and PANDAS library
Pandas is designed for working with tabular or heterogeneous data while NumPy is best suited for working with homogeneous numerical array data.

## Two ways to import libraries
1. import Series and DataFrame into the local namespace  
    from pandas import Series, DataFrame
2. General convention using          import pandas as pd

## Two data structures in Pandas
1. Data Series
2. Data Frame

A Series is a one-dimensional array-like object containing a sequence of values (of similar types to NumPy types)
But, index is represented as  an associated array of data labels, instead of index values 0...n-1 in array of numpy


#Example of series with default indexes 0..n-1

In [2]:
x= pd.Series([4, 7, -5, 3])
print(x)

0    4
1    7
2   -5
3    3
dtype: int64


In [4]:
print(x.values)
print(x.index)

[ 4  7 -5  3]
RangeIndex(start=0, stop=4, step=1)


## setting user-defined indexes as done in dictioanry for keys

In [6]:
a= pd.Series([41, 71, -5, 3,38], index=['d', 'a', 'b', 'c','c'])
print(a,"\n",a.values,"\n",a.index)


d    41
a    71
b    -5
c     3
c    38
dtype: int64 
 [41 71 -5  3 38] 
 Index(['d', 'a', 'b', 'c', 'c'], dtype='object')


In [7]:
a.name

In [9]:
a.name='Deduction'
a.index.name='nameofstudents'
print(a)

nameofstudents
d    41
a    71
b    -5
c     3
c    38
Name: Deduction, dtype: int64


## setting user-defined indexes with duplicate values

In [10]:
a = pd.Series([41, 71, -5, 3,38], index=['d', 'a', 'a', 'c','e'])
print(a)

d    41
a    71
a    -5
c     3
e    38
dtype: int64


## using index labels to get values in series

In [11]:
print(a['a'],"\n",a['d'])

a    71
a    -5
dtype: int64 
 41


In [13]:
print(type(a['a']))

<class 'pandas.core.series.Series'>


## using list of index (fancy index) labels to retrieve  values

In [16]:
print(a[['c','d']],"\n",a['a'])

c     3
d    41
dtype: int64 
 a    71
a    -5
dtype: int64


## perform numpy like operations on the series values

In [17]:
a

d    41
a    71
a    -5
c     3
e    38
dtype: int64

In [18]:
l=a*2
print(l,"\n",type(l))
l1=a[a>0]
print(l1,type(l1))


d     82
a    142
a    -10
c      6
e     76
dtype: int64 
 <class 'pandas.core.series.Series'>
d    41
a    71
c     3
e    38
dtype: int64 <class 'pandas.core.series.Series'>


In [19]:
print(a>0)

d     True
a     True
a    False
c     True
e     True
dtype: bool


## in: membership operator

In [19]:
print(a,"\n",a.values,"\n",a.index)
x='f' in a
print(x)
x='e' in a
print(x)


d    41
a    71
a    -5
c     3
e    38
dtype: int64 
 [41 71 -5  3 38] 
 Index(['d', 'a', 'a', 'c', 'e'], dtype='object')
False
True


In [20]:
10 in a.values

False

## Constructing series from a data dictionary

In [21]:
city = {'Noida': 5000,'Delhi': 35000, 'madras': 71000, 'Mumbai': 16000}
cityseries = pd.Series(city)
print(cityseries)


Noida      5000
Delhi     35000
madras    71000
Mumbai    16000
dtype: int64


#creating data series thru dictionary

In [22]:
dictionary = {'A' : [50,70], 'B' : 10, 'C' : 80} 
series = pd.Series(dictionary) 
series


A    [50, 70]
B          10
C          80
dtype: object

## reseting row index order

In [24]:
dictionary = {'A' : 50, 'B' : 10, 'C' : 80} 
series1 = pd.Series(dictionary) 
print(series1)
# create a series 
series = pd.Series(dictionary, index =['B', 'C', 'A']) 
print(series)

A    50
B    10
C    80
dtype: int64
B    10
C    80
A    50
dtype: int64


In [25]:
cityseries

Noida      5000
Delhi     35000
madras    71000
Mumbai    16000
dtype: int64

In [26]:
city

{'Noida': 5000, 'Delhi': 35000, 'madras': 71000, 'Mumbai': 16000}

## creating series using dictionary and passing new indexes, for undefined index value NAN is used

In [27]:
newcity=['Delhi','chd','madras','mumbai']
print(newcity)
print(cityseries)
countryseries=pd.Series(city, newcity)
print(countryseries)

['Delhi', 'chd', 'madras', 'mumbai']
Noida      5000
Delhi     35000
madras    71000
Mumbai    16000
dtype: int64
Delhi     35000.0
chd           NaN
madras    71000.0
mumbai        NaN
dtype: float64


In [28]:
x=pd.isnull(countryseries)
print(x)


Delhi     False
chd        True
madras    False
mumbai     True
dtype: bool


## automatically aligns by index label in arithmetic operations:


In [29]:
cityseries

Noida      5000
Delhi     35000
madras    71000
Mumbai    16000
dtype: int64

In [30]:
countryseries

Delhi     35000.0
chd           NaN
madras    71000.0
mumbai        NaN
dtype: float64

## mismatch index values in data series are given NULL values while performing arithmetic operation

In [31]:
print(cityseries+countryseries)

Delhi      70000.0
Mumbai         NaN
Noida          NaN
chd            NaN
madras    142000.0
mumbai         NaN
dtype: float64


## converting data types of values/elements

In [32]:
b=pd.Series([12,13,14,'11'])
print(b)

0    12
1    13
2    14
3    11
dtype: object


In [33]:

d=b.astype(int)
c=pd.Series([12,13,14.0,12.5,13.5])
print(b,"\n",c,"\n",d)
print(c+d)


0    12
1    13
2    14
3    11
dtype: object 
 0    12.0
1    13.0
2    14.0
3    12.5
4    13.5
dtype: float64 
 0    12
1    13
2    14
3    11
dtype: int32
0    24.0
1    26.0
2    28.0
3    23.5
4     NaN
dtype: float64


In [34]:
import numpy as np


## creating a series with same element value

In [35]:
ex1 = pd.Series(6, index=np.arange(0,4))

In [36]:
ex1

0    6
1    6
2    6
3    6
dtype: int64

In [37]:
ex1[1:3]

1    6
2    6
dtype: int64

## Cannot use -1 as index in case default indices are used.
using -1 as index position with integer indexing will result in error. But not with labelled indexes

In [38]:
ex1[-1]

KeyError: -1

In [39]:
ex1.index=['A','C','B','D']
ex1.name='Marks'
ex1.index.name='SName'

In [40]:
ex1

SName
A    6
C    6
B    6
D    6
Name: Marks, dtype: int64

## display number of elements in a data series ex1

In [51]:
ex1.values.size

4

In [41]:
len(ex1.values)

4

In [42]:
ex1.size

4

In [43]:
len(ex1)

4

## difference in index and reindex:
index: an attribute of the object to which index labels can be assigned whose size must be equal to the size of values

Reindex: used to conform Series to new index with optional filling logic, placing NA/NaN in locations having no value in the previous index. Changes are not reflected inplace but may be in new series (to which assigned)

#What will be the output?

In [44]:
ex1

SName
A    6
C    6
B    6
D    6
Name: Marks, dtype: int64

In [None]:
ex1.index=['xx','yy']

In [46]:
ex1.reindex(['AA','BB','C','D','E'])

SName
AA    NaN
BB    NaN
C     6.0
D     6.0
E     NaN
Name: Marks, dtype: float64

In [47]:
ex1

SName
A    6
C    6
B    6
D    6
Name: Marks, dtype: int64

In [50]:
ex2=ex1.reindex(['AA','BB','C','D','E'])
ex2

SName
AA     NaN
BB     NaN
C     26.0
D      6.0
E      NaN
Name: Marks, dtype: float64

## add 20 to 3rd element in series ex1

In [49]:
ex1['C']+=20
ex1

SName
A     6
C    26
B     6
D     6
Name: Marks, dtype: int64

## alternative way: using default index position

In [65]:
ex1[1]*=30
ex1

SName
A      6
C    780
B      6
D      6
Name: Marks, dtype: int64

## **slicing** : int default index values are used then working is same as in array and list.
In case index labels are used then last value is also included in accessing the content

In [68]:
ex1[0:2]

SName
A      6
C    780
Name: Marks, dtype: int64

In [53]:
ex1['D':'A']

Series([], Name: Marks, dtype: int64)

#fancy index for accessing elements

In [118]:
ex1[['A','D','C']]

SName
A      6
D      6
C    780
Name: Marks, dtype: int64

In [121]:
ex1[['A','D']]=0

In [119]:
ex1[-1]

6

In [122]:
ex1

SName
A      0
C    780
B      6
D      0
Name: Marks, dtype: int64

In [123]:
ser=pd.Series(np.arange(3.))
ser

0    0.0
1    1.0
2    2.0
dtype: float64

In [None]:
ser[-1]

#use specific iloc and loc to differentiate between integer index  or labelled indexes as integer indexes given by user are also handled as labelled for data accessing etc.

In [125]:
ser.iloc[-1]

2.0

In [126]:
ser.loc[1]

1.0

#adding new leftover index with some default values

In [128]:
obj3 = pd.Series(['blue', 'purple', 'yellow'], index=np.arange(0,9,3))
print(obj3)
obj1=obj3.reindex(range(0,9,1))  #forward fill
print(obj1)


0      blue
3    purple
6    yellow
dtype: object
0      blue
1       NaN
2       NaN
3    purple
4       NaN
5       NaN
6    yellow
7       NaN
8       NaN
dtype: object


In [129]:
obj1=obj3.reindex(range(0,9,1), method='ffill')  #forward fill
print(obj1)


0      blue
1      blue
2      blue
3    purple
4    purple
5    purple
6    yellow
7    yellow
8    yellow
dtype: object


In [130]:
obj1=obj3.reindex(range(9), method='bfill')  #backward fill
print(obj1)

0      blue
1    purple
2    purple
3    purple
4    yellow
5    yellow
6    yellow
7       NaN
8       NaN
dtype: object


#removing a label using drop , no inplace change

In [131]:
obj1.drop(0)

1    purple
2    purple
3    purple
4    yellow
5    yellow
6    yellow
7       NaN
8       NaN
dtype: object

In [132]:
obj1

0      blue
1    purple
2    purple
3    purple
4    yellow
5    yellow
6    yellow
7       NaN
8       NaN
dtype: object

In [133]:
ex1

SName
A      0
C    780
B      6
D      0
Name: Marks, dtype: int64

In [134]:
ex1[3]=105
ex1

SName
A      0
C    780
B      6
D    105
Name: Marks, dtype: int64

#what you get from the following?

In [135]:
ex1[ex1>100]

SName
C    780
D    105
Name: Marks, dtype: int64

#how to get indexes for specific elements

In [136]:
ex1[ex1>100].index

Index(['C', 'D'], dtype='object', name='SName')

In [137]:
ex1.drop(ex1[ex1>100].index)

SName
A    0
B    6
Name: Marks, dtype: int64