# Introduction to Pandas

In [1]:
import numpy as np
import pandas as pd

In [2]:
print(pd.__version__)

1.3.2


In [3]:
print(dir(pd))

['BooleanDtype', 'Categorical', 'CategoricalDtype', 'CategoricalIndex', 'DataFrame', 'DateOffset', 'DatetimeIndex', 'DatetimeTZDtype', 'ExcelFile', 'ExcelWriter', 'Flags', 'Float32Dtype', 'Float64Dtype', 'Float64Index', 'Grouper', 'HDFStore', 'Index', 'IndexSlice', 'Int16Dtype', 'Int32Dtype', 'Int64Dtype', 'Int64Index', 'Int8Dtype', 'Interval', 'IntervalDtype', 'IntervalIndex', 'MultiIndex', 'NA', 'NaT', 'NamedAgg', 'Period', 'PeriodDtype', 'PeriodIndex', 'RangeIndex', 'Series', 'SparseDtype', 'StringDtype', 'Timedelta', 'TimedeltaIndex', 'Timestamp', 'UInt16Dtype', 'UInt32Dtype', 'UInt64Dtype', 'UInt64Index', 'UInt8Dtype', '__builtins__', '__cached__', '__doc__', '__docformat__', '__file__', '__getattr__', '__git_version__', '__loader__', '__name__', '__package__', '__path__', '__spec__', '__version__', '_config', '_hashtable', '_is_numpy_dev', '_lib', '_libs', '_np_version_under1p18', '_testing', '_tslib', '_typing', '_version', 'api', 'array', 'arrays', 'bdate_range', 'compat', 'con

# - Pandas Series

### empty series

In [4]:
s = pd.Series()
s

  s = pd.Series()


Series([], dtype: float64)

### series with list/tuple data

In [5]:
l = [1,2,3,4]
s = pd.Series(l)
s

0    1
1    2
2    3
3    4
dtype: int64

In [6]:
l = (1,2,3,4)
s = pd.Series(l)
s

0    1
1    2
2    3
3    4
dtype: int64

In [7]:
l = (1,2,3,"4")
s = pd.Series(l)
s

0    1
1    2
2    3
3    4
dtype: object

### series using numpy array

In [8]:
a = np.array([1,2,3,4])
print(a)

[1 2 3 4]


In [9]:
s = pd.Series(a)
s

0    1
1    2
2    3
3    4
dtype: int64

### series using dictionary

In [10]:
d = {'A':1,"B":2,"C":3}
print(d)

{'A': 1, 'B': 2, 'C': 3}


In [11]:
s = pd.Series(d)
s

A    1
B    2
C    3
dtype: int64

### series using index names

In [12]:
l = [1,2,3,4]
s = pd.Series(l,index=["A","B","C","D"])
s

A    1
B    2
C    3
D    4
dtype: int64

In [13]:
# l = [1,2,3,4]
# s = pd.Series(l,index=["A","B","C"])
# s

In [14]:
# l = [1,2,3,4]
# s = pd.Series(l,index=["A","B","C","D","E"])
# s

In [15]:
# l = [1]
# s = pd.Series(l,index=["A","B"])
# s

### series with scalar values

In [16]:
l = 40
s = pd.Series(l,index=["A","B","C"])
s

A    40
B    40
C    40
dtype: int64

In [17]:
l = 40
s = pd.Series(l,index=["A","B","C"],dtype="int8")
s

A    40
B    40
C    40
dtype: int8

## Indexing/Slicing for Series

In [18]:
a = [10,20,30,40,50]
s = pd.Series(a)
s

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [19]:
s[0]

10

In [20]:
s[3]

40

In [21]:
a = [10,20,30,40,50]
s = pd.Series(a,index=list("ABCDE"))
s

A    10
B    20
C    30
D    40
E    50
dtype: int64

In [22]:
s["A"]

10

In [23]:
s[0]

10

In [24]:
s[["A","C","D"]]

A    10
C    30
D    40
dtype: int64

In [25]:
s[2:]

C    30
D    40
E    50
dtype: int64

In [26]:
s[2:4]

C    30
D    40
dtype: int64

In [27]:
s["A":]

A    10
B    20
C    30
D    40
E    50
dtype: int64

In [28]:
s["A":'D']

A    10
B    20
C    30
D    40
dtype: int64

# Data Frames

## Empty DataFrame

In [29]:
df = pd.DataFrame()
print(df)

Empty DataFrame
Columns: []
Index: []


## Using Default option to create DataFrame

In [30]:
df = pd.DataFrame([1,2,3])
df

Unnamed: 0,0
0,1
1,2
2,3


## DataFrame using List/Tuple

In [31]:
a = [1,2,3,4]
df = pd.DataFrame(a)
df

Unnamed: 0,0
0,1
1,2
2,3
3,4


## DataFrame using numpy array

In [32]:
a = np.random.randint(1,10,15).reshape(5,3)
a

array([[1, 9, 2],
       [7, 2, 1],
       [9, 5, 6],
       [1, 2, 7],
       [1, 1, 2]])

In [33]:
df = pd.DataFrame(a)
df

Unnamed: 0,0,1,2
0,1,9,2
1,7,2,1
2,9,5,6
3,1,2,7
4,1,1,2


In [34]:
df = pd.DataFrame(a,columns=list("ABC"))
df

Unnamed: 0,A,B,C
0,1,9,2
1,7,2,1
2,9,5,6
3,1,2,7
4,1,1,2


## DataFrame using Dictionary

In [35]:
d = {"A":1,"B":2,"C":3}
df = pd.DataFrame(d,index=[100,101,102])
df

Unnamed: 0,A,B,C
100,1,2,3
101,1,2,3
102,1,2,3


## Attributes and Methods of a DataFrame

In [36]:
a = np.random.randint(1,20,30).reshape(5,6)
a

array([[14,  5, 14, 13,  7, 16],
       [ 6,  1, 10, 10,  8,  7],
       [ 8,  3,  1, 18, 18,  2],
       [ 6, 16, 15,  2, 11,  6],
       [ 2, 10,  4, 18,  5, 12]])

In [37]:
df = pd.DataFrame(a,columns=list("ABCDEF"))
df

Unnamed: 0,A,B,C,D,E,F
0,14,5,14,13,7,16
1,6,1,10,10,8,7
2,8,3,1,18,18,2
3,6,16,15,2,11,6
4,2,10,4,18,5,12


In [38]:
print(df.dtypes)
print(df.size)
print(df.values)
print(df.shape)
print(df.items())
print(df.index)
print(df.columns)
print(df.keys())

A    int64
B    int64
C    int64
D    int64
E    int64
F    int64
dtype: object
30
[[14  5 14 13  7 16]
 [ 6  1 10 10  8  7]
 [ 8  3  1 18 18  2]
 [ 6 16 15  2 11  6]
 [ 2 10  4 18  5 12]]
(5, 6)
<generator object DataFrame.items at 0x7f9bae4f7120>
RangeIndex(start=0, stop=5, step=1)
Index(['A', 'B', 'C', 'D', 'E', 'F'], dtype='object')
Index(['A', 'B', 'C', 'D', 'E', 'F'], dtype='object')


In [39]:
df.describe() # gives summary of complete Table data

Unnamed: 0,A,B,C,D,E,F
count,5.0,5.0,5.0,5.0,5.0,5.0
mean,7.2,7.0,8.8,12.2,9.8,8.6
std,4.38178,6.041523,6.140033,6.648308,5.069517,5.458938
min,2.0,1.0,1.0,2.0,5.0,2.0
25%,6.0,3.0,4.0,10.0,7.0,6.0
50%,6.0,5.0,10.0,13.0,8.0,7.0
75%,8.0,10.0,14.0,18.0,11.0,12.0
max,14.0,16.0,15.0,18.0,18.0,16.0


## setting name for rows and columns

In [40]:
df

Unnamed: 0,A,B,C,D,E,F
0,14,5,14,13,7,16
1,6,1,10,10,8,7
2,8,3,1,18,18,2
3,6,16,15,2,11,6
4,2,10,4,18,5,12


In [41]:
print(df)
print(df.rename(index={1:100,4:"B"},columns={"A":"ABC","D":"DEF"}))
print(df)

    A   B   C   D   E   F
0  14   5  14  13   7  16
1   6   1  10  10   8   7
2   8   3   1  18  18   2
3   6  16  15   2  11   6
4   2  10   4  18   5  12
     ABC   B   C  DEF   E   F
0     14   5  14   13   7  16
100    6   1  10   10   8   7
2      8   3   1   18  18   2
3      6  16  15    2  11   6
B      2  10   4   18   5  12
    A   B   C   D   E   F
0  14   5  14  13   7  16
1   6   1  10  10   8   7
2   8   3   1  18  18   2
3   6  16  15   2  11   6
4   2  10   4  18   5  12


In [42]:
print(df)
print(df.rename(index={1:100,4:"B"},columns={"A":"ABC","D":"DEF"},inplace=True))
print(df)

    A   B   C   D   E   F
0  14   5  14  13   7  16
1   6   1  10  10   8   7
2   8   3   1  18  18   2
3   6  16  15   2  11   6
4   2  10   4  18   5  12
None
     ABC   B   C  DEF   E   F
0     14   5  14   13   7  16
100    6   1  10   10   8   7
2      8   3   1   18  18   2
3      6  16  15    2  11   6
B      2  10   4   18   5  12


## Indexing/Slicing for Series

In [43]:
a = np.random.randint(1,20,30).reshape(5,6)
df = pd.DataFrame(a,columns=list("ABCDEF"))
df

Unnamed: 0,A,B,C,D,E,F
0,7,5,10,14,1,5
1,18,9,11,7,12,4
2,2,2,14,17,5,13
3,8,7,10,5,1,19
4,7,14,8,4,17,14


## using `[]` notation

In [44]:
df["A"]

0     7
1    18
2     2
3     8
4     7
Name: A, dtype: int64

In [45]:
df[["A","C"]]

Unnamed: 0,A,C
0,7,10
1,18,11
2,2,14
3,8,10
4,7,8


In [46]:
df[["A","C"]]

Unnamed: 0,A,C
0,7,10
1,18,11
2,2,14
3,8,10
4,7,8


## using `.` notation

In [47]:
df.A

0     7
1    18
2     2
3     8
4     7
Name: A, dtype: int64

### using loc()

In [48]:
# <DataFrame>.loc[<row_index_name>,<col_header_name>]

In [49]:
df.loc[0,"A"]

7

In [50]:
df.loc[0,["A","D"]]

A     7
D    14
Name: 0, dtype: int64

In [51]:
df.loc[0:,["A","D"]]

Unnamed: 0,A,D
0,7,14
1,18,7
2,2,17
3,8,5
4,7,4


In [52]:
df.loc[[0,4,3],["A","D"]]

Unnamed: 0,A,D
0,7,14
4,7,4
3,8,5


In [53]:
df.loc[[0,4,3],"A":]

Unnamed: 0,A,B,C,D,E,F
0,7,5,10,14,1,5
4,7,14,8,4,17,14
3,8,7,10,5,1,19


### using iloc()

In [54]:
df

Unnamed: 0,A,B,C,D,E,F
0,7,5,10,14,1,5
1,18,9,11,7,12,4
2,2,2,14,17,5,13
3,8,7,10,5,1,19
4,7,14,8,4,17,14


In [55]:
df.iloc[0,0]

7

In [56]:
# df.iloc[0,"A"]

In [57]:
df.iloc[0,2:5]

C    10
D    14
E     1
Name: 0, dtype: int64

In [58]:
df.iloc[::-1,::-1]

Unnamed: 0,F,E,D,C,B,A
4,14,17,4,8,14,7
3,19,1,5,10,7,8
2,13,5,17,14,2,2
1,4,12,7,11,9,18
0,5,1,14,10,5,7


## To Transpose DataFrame

In [59]:
df.T

Unnamed: 0,0,1,2,3,4
A,7,18,2,8,7
B,5,9,2,7,14
C,10,11,14,10,8
D,14,7,17,5,4
E,1,12,5,1,17
F,5,4,13,19,14


# Slicing for Data Frames

In [60]:
df.iloc[::-1,::-1]

Unnamed: 0,F,E,D,C,B,A
4,14,17,4,8,14,7
3,19,1,5,10,7,8
2,13,5,17,14,2,2
1,4,12,7,11,9,18
0,5,1,14,10,5,7


In [61]:
df.iloc[1:5:2,:4:3]

Unnamed: 0,A,D
1,18,7
3,8,5


# Setting options for DataFrame visulaization

# use of apply()

# use of applymap()

# - Basic Operations With Data frame

## Renaming Columns

## filtering a data frame. 

# - sort and merging dataframes

# - Importing external data to DataFrame

## reading `CSV` file

## reading `XLSX` file

## reading xlsx file with specific sheet name/number

## reading `HTML` file

## WebScrapping and fetching Table data

# - Exporting DataFrame to external file .csv,.html etc