# Pandas Fundaments 


In [1]:
import pandas as pd

### Series

In [2]:
#Series is One-dimensional ndarray with axis labels 
a= pd.Series([4,7,3])

In [3]:
a

0    4
1    7
2    3
dtype: int64

In [4]:
#Return Series as ndarray or ndarray-like depending on the dtype.
a.values

array([4, 7, 3], dtype=int64)

In [6]:
#Return list of index
list(a.index)

[0, 1, 2]

In [7]:
#Changing the default index
x =pd.Series([4,7,3],index=['a','b','c'])

In [8]:
x

a    4
b    7
c    3
dtype: int64

In [9]:
#access the values
x['a']

4

In [10]:
x[['a','c']]

a    4
c    3
dtype: int64

##### convert the python dict to series

In [11]:
sdata = {'a':10,'b':30,'c':12}

In [12]:
s = pd.Series(sdata)

In [13]:
s

a    10
b    30
c    12
dtype: int64

##### DataFrame

In [14]:
#DataFrame is Two-dimensional, size-mutable, potentially heterogeneous tabular data.
df = pd.DataFrame()
print(df)

Empty DataFrame
Columns: []
Index: []


In [15]:
data =[1,2,3,4,5]

In [16]:
df = pd.DataFrame(data) 

In [17]:
df

Unnamed: 0,0
0,1
1,2
2,3
3,4
4,5


In [18]:
data =[['Alex',10],['Bob',12],['Clarke',23]]

In [19]:
df = pd.DataFrame(data)

In [20]:
df

Unnamed: 0,0,1
0,Alex,10
1,Bob,12
2,Clarke,23


In [21]:
#Adding Column name instead of 0 and 1
df = pd.DataFrame(data,columns=['Name','Age'])

In [22]:
df

Unnamed: 0,Name,Age
0,Alex,10
1,Bob,12
2,Clarke,23


In [23]:
#Another way of creating DataFrame
data ={'Name':['Alex','Bob','Clarke'],'Age':[10,12,23]}

In [24]:
df2 = pd.DataFrame(data)

In [25]:
df2

Unnamed: 0,Name,Age
0,Alex,10
1,Bob,12
2,Clarke,23


In [26]:
#adding index
data ={'Name':['Alex','Bob','Clarke'],'Age':[10,12,23]}
df = pd.DataFrame(data,index=['a','b','c'])
print(df)

     Name  Age
a    Alex   10
b     Bob   12
c  Clarke   23


In [27]:
#other example
data = [{'a':10,'b':20,'c':30},{'a':12,'b':34,'c':23},{'a':33,'b':334,'c':11}]
df = pd.DataFrame(data)
df

Unnamed: 0,a,b,c
0,10,20,30
1,12,34,23
2,33,334,11


#### Creating data from series

In [28]:
d = {
    'one':pd.Series([1,2,3,5],index=['a','b','c','d']),
    'two':pd.Series([2,3,4],index=['b','c','d'])
}

In [29]:
df = pd.DataFrame(d)

In [30]:
df

Unnamed: 0,one,two
a,1,
b,2,2.0
c,3,3.0
d,5,4.0


##### Column

In [31]:
#accessing columns
df['one']

a    1
b    2
c    3
d    5
Name: one, dtype: int64

### Column addition

In [33]:
#new column 'three' is added in data 
df['three'] = pd.Series([111,222,333],index=['a','b','c'])

In [34]:
df

Unnamed: 0,one,two,three
a,1,,111.0
b,2,2.0,222.0
c,3,3.0,333.0
d,5,4.0,


In [35]:
#adding column 'one' and 'two'
df['four']=df['one']+df['two']

In [36]:
df

Unnamed: 0,one,two,three,four
a,1,,111.0,
b,2,2.0,222.0,4.0
c,3,3.0,333.0,6.0
d,5,4.0,,9.0


In [37]:
#deleting column with del function
del df['three']

In [38]:
df

Unnamed: 0,one,two,four
a,1,,
b,2,2.0,4.0
c,3,3.0,6.0
d,5,4.0,9.0


In [39]:
#deleting column with pop method
df.pop('four')

a    NaN
b    4.0
c    6.0
d    9.0
Name: four, dtype: float64

In [40]:
df

Unnamed: 0,one,two
a,1,
b,2,2.0
c,3,3.0
d,5,4.0


#### addition of rows

In [41]:
df = pd.DataFrame([[1,2],[3,4]],columns=['a','b'])
df2 = pd.DataFrame([[5,6],[7,8]],columns=['a','b'])

In [42]:
df

Unnamed: 0,a,b
0,1,2
1,3,4


In [43]:
df2

Unnamed: 0,a,b
0,5,6
1,7,8


In [45]:
#use append method for addition 
df = df.append(df2)

In [46]:
df

Unnamed: 0,a,b
0,1,2
1,3,4
0,5,6
1,7,8
0,5,6
1,7,8


In [47]:
data = [{'a':10,'b':20,'c':30},{'a':12,'b':34,'c':23},{'a':33,'b':334,'c':11}]
df = pd.DataFrame(data)

In [48]:
df

Unnamed: 0,a,b,c
0,10,20,30
1,12,34,23
2,33,334,11


In [51]:
#axis = 0 means adding column wise
df.sum(axis=0) 

a     55
b    388
c     64
dtype: int64

In [52]:
#axis = 1 means adding row wise
df.sum(axis=1)

0     60
1     69
2    378
dtype: int64

In [53]:
#average on columnwise
df.mean(axis=0)

a     18.333333
b    129.333333
c     21.333333
dtype: float64

In [54]:
#average on rowwise
df.mean(axis=1)

0     20.0
1     23.0
2    126.0
dtype: float64

In [55]:
#standard deviation on column wise
df.std()

a     12.741010
b    177.384704
c      9.609024
dtype: float64

In [56]:
#standard deviation on rowwise
df.std(axis=1)

0     10.000000
1     11.000000
2    180.468834
dtype: float64

In [58]:
#describe method gives generate descriptive statistics.
df.describe()

Unnamed: 0,a,b,c
count,3.0,3.0,3.0
mean,18.333333,129.333333,21.333333
std,12.74101,177.384704,9.609024
min,10.0,20.0,11.0
25%,11.0,27.0,17.0
50%,12.0,34.0,23.0
75%,22.5,184.0,26.5
max,33.0,334.0,30.0
