#  PANDAS

In [1]:
import pandas as pd
import numpy as np

#  Series 1-D

In [2]:
#  Create Series 

series1=pd.Series([1,2],index=['a','b'])
pd.Series(series1)

a    1
b    2
dtype: int64

In [3]:
#  Get Series Values

series1=pd.Series([1,2],index=['a','b'])
print("series : ",pd.Series(series1))
print("get the series values : ", series1.values)

series :  a    1
b    2
dtype: int64
get the series values :  [1 2]


In [4]:
#  Get Values by Index

series1=pd.Series([1,2],index=['a','b'])
print("series : ",pd.Series(series1))
print("get the series values : ", series1['a'])
print("get the series values : ", series1[['b','a']])

series :  a    1
b    2
dtype: int64
get the series values :  1
get the series values :  b    2
a    1
dtype: int64


In [5]:
#  Get Series Index

series1=pd.Series([1,2],index=['a','b'])
print("series : ",pd.Series(series1))
print("get the series values : ", series1.index)

series :  a    1
b    2
dtype: int64
get the series values :  Index(['a', 'b'], dtype='object')


In [6]:
#  Get Name Attribute

series1=pd.Series([1,2],index=['a','b'])
print("series : ",pd.Series(series1))
print("get the name att : ", series1.name)

series :  a    1
b    2
dtype: int64
get the name att :  None


In [7]:
#  Unique But Unsorted

series1=pd.Series([1,4,0],index=['a','b','c'])
series2=pd.Series([3,2,5],index=['e','f','g'])
series2 = series1.unique()
series2

array([1, 4, 0], dtype=int64)

In [8]:
#  Common Index Values are Added

series1+series2

a    2
b    8
c    0
dtype: int64

#  DataFrame 2D

In [9]:
#  Create  DataFrame

dict1 = {'state': ['Ohio', 'CA'],
         'year': [2000, 2010]}
df = pd.DataFrame(dict1)
df

Unnamed: 0,state,year
0,Ohio,2000
1,CA,2010


In [10]:
# columns are placed in sorted order

df= pd.DataFrame(dict1,index = ['row1', 'row2'])
df


Unnamed: 0,state,year
row1,Ohio,2000
row2,CA,2010


In [11]:
# Specifying Index

df= pd.DataFrame(dict1,columns = ['year', 'state'])
df

Unnamed: 0,year,state
0,2000,Ohio
1,2010,CA


In [12]:
# DataFrame

dict1 = {'col1': {'row1': 1, 'row2': 2},
         'col2': {'row1': 3, 'row2': 4} }
df1 = pd.DataFrame(dict1)
df1

Unnamed: 0,col1,col2
row1,1,3
row2,2,4


In [13]:
#  Get Columns names 

df1.columns

Index(['col1', 'col2'], dtype='object')

In [14]:
#  Get Row Names

df1.index

Index(['row1', 'row2'], dtype='object')

In [15]:
#  Get Name Attribute

df1.columns.name
df1

Unnamed: 0,col1,col2
row1,1,3
row2,2,4


In [16]:
#  Get Values

df1.values

array([[1, 3],
       [2, 4]], dtype=int64)

In [17]:
#  Create  DataFrame

dict1 = {'state': ['Ohio', 'CA'],
         'year': [2000, 2010]}
df1 = pd.DataFrame(dict1)
df1

Unnamed: 0,state,year
0,Ohio,2000
1,CA,2010


In [18]:
#  Get Column as Series 

df1['state']                     #or df1.state

0    Ohio
1      CA
Name: state, dtype: object

In [19]:
#  Assign a column that doesn’t exist will create a new

df1['eastern'] = df1.state == 'Ohio'
df1

Unnamed: 0,state,year,eastern
0,Ohio,2000,True
1,CA,2010,False


In [20]:
#  Delete a column

del df1['eastern']

In [21]:
df1

Unnamed: 0,state,year
0,Ohio,2000
1,CA,2010


In [22]:
#  Switch Columns and Rows 

df1.T

Unnamed: 0,0,1
state,Ohio,CA
year,2000,2010


In [23]:
#  MultiIndex 

series1 = pd.Series(np.random.randn(6), index = [['a', 'a', 'a', 'b', 'b', 'b'], [1, 2, 3, 1, 2, 3]])
series1.index.names = ['key1', 'key2']
series1

key1  key2
a     1       0.474434
      2      -0.251957
      3       0.758179
b     1      -0.338424
      2      -1.228377
      3       0.235407
dtype: float64

In [24]:
#  Series Partial Indexing

series1['b']                      # Outer Level

key2
1   -0.338424
2   -1.228377
3    0.235407
dtype: float64

In [25]:
#  Series Partial Indexing

series1[:, 2]                    # Inner Level

key1
a   -0.251957
b   -1.228377
dtype: float64

In [26]:
#  Swap Level (level interchanged)

swapSeries1 = series1.swaplevel('key1', 'key2')
swapSeries1

key2  key1
1     a       0.474434
2     a      -0.251957
3     a       0.758179
1     b      -0.338424
2     b      -1.228377
3     b       0.235407
dtype: float64

In [28]:
# Sum rows (that have same ‘key2’ value)

series1.sum(level = 'key2')

  series1.sum(level = 'key2')


key2
1    0.136010
2   -1.480334
3    0.993586
dtype: float64

In [39]:
# Sum columns .. 

series1.sum(level ='key1')

  series1.sum(level ='key1')


key1
a    0.980656
b   -1.331394
dtype: float64