# Pandas - Panel Data

Agenda
- Fundamentals
    * purpose
    * features
- Data Structure
    * Series
    * DataFrame
- Series
    * How to create series
    * BAsic Operations/Informations
    * Query in Series
- DataFrame
    * Creating DataFrames
    * Accessing DataFrame
    * Understanding DataFrame
    * Statistical Ops in DataFrame
        - Descriptive Statistics
        - Correlation Analysis
- Date and TimeDelta

In [2]:
# Installing Pandas
# !pip install pandas

In [5]:
# Import pandas  and numpy
import pandas as pd
import numpy as np

In [4]:
# Series -  Series is a One Dimensional array like object that contains the data and the labels/index

In [6]:
A = np.array([5,2,5,3,4])

In [7]:
print(A)

[5 2 5 3 4]


In [8]:
S =  pd.Series([5,2,5,3,4])
print(S)

0    5
1    2
2    5
3    3
4    4
dtype: int64


In [9]:
S2 =  pd.Series([5,2,5,3,4], index = ['Shirts','Polos', 'Chinos', 'Tie', 'Shoes'])
print(S2)

Shirts    5
Polos     2
Chinos    5
Tie       3
Shoes     4
dtype: int64


Shirts    5
Polos     2
Chinos    5
Tie       3
Shoes     4
dtype: int64


In [12]:
# Syntax  -  pd.Series(<list of values>, index = <list of index names>)
# Syntax  -  pd.Series(dictionary) --- This will make keys of the dictionary as index and values as the data

In [13]:
# Creating Series from a dictionary

In [37]:
d1 = {'Shirts': 20, 'polos' :40.0, 'Jeans':24, 'name': 'Jitendra'}

In [38]:
s3 = pd.Series(d1)

In [39]:
print(s3)

Shirts          20
polos         40.0
Jeans           24
name      Jitendra
dtype: object


In [40]:
s4 = pd.Series(d1, index = ['Jeans','Wallet','Shirts', 'Tie'])

In [41]:
print(s4)

Jeans      24
Wallet    NaN
Shirts     20
Tie       NaN
dtype: object


In [31]:
# In Pandas the basic datatypes int, float, bool, datetime, category, object

In [29]:
# Accessing Series Data

In [42]:
print(s3)

Shirts          20
polos         40.0
Jeans           24
name      Jitendra
dtype: object


In [43]:
print(s3[2])

24


In [44]:
print(s3['Jeans'])

24


In [45]:
print(s3.Jeans)

24


In [46]:
print(s3[0], type(s3[0]), sep = '------')
print(s3[1], type(s3[1]), sep = '------')
print(s3[2], type(s3[2]), sep = '------')
print(s3[3], type(s3[3]), sep = '------')

20------<class 'int'>
40.0------<class 'float'>
24------<class 'int'>
Jitendra------<class 'str'>


In [47]:
s3.Jeans = 90

In [48]:
print(s3)

Shirts          20
polos         40.0
Jeans           90
name      Jitendra
dtype: object


In [54]:
s3['wallet'] = 100

In [55]:
print(s3)

Shirts          20
polos         40.0
Jeans           90
name      Jitendra
wallet         100
dtype: object


In [56]:
# Attributes and functions of Series

In [57]:
print(s3.ndim) # 1
print(s3.shape) # (5,)
print(s3.size) # 5
print(s3.dtype) # object
print(s3.index) # list of indices
print(s3.values) # array of data

1
(5,)
5
object
Index(['Shirts', 'polos', 'Jeans', 'name', 'wallet'], dtype='object')
[20 40.0 90 'Jitendra' 100]


In [58]:
s3.head()

Shirts          20
polos         40.0
Jeans           90
name      Jitendra
wallet         100
dtype: object

In [59]:
s3.head(3)

Shirts      20
polos     40.0
Jeans       90
dtype: object

In [60]:
s3.tail()

Shirts          20
polos         40.0
Jeans           90
name      Jitendra
wallet         100
dtype: object

In [61]:
s3.tail(2)

name      Jitendra
wallet         100
dtype: object

In [62]:
s3.describe() # Generate descriptive Statistics - count, mean, std, min, 25th, 50th, 75th, max for numerical dtype
                                                 # count, unique, top, freq   

count      5
unique     5
top       20
freq       1
dtype: int64

In [69]:
s3['age'] = 40.0
s3['sd'] =20

In [70]:
print(s3)

Shirts          20
polos         40.0
Jeans           90
name      Jitendra
wallet         100
age           40.0
sd              20
dtype: object


In [71]:
s3.describe()

count      7
unique     5
top       20
freq       2
dtype: int64

In [72]:
S2.describe()

count    5.00000
mean     3.80000
std      1.30384
min      2.00000
25%      3.00000
50%      4.00000
75%      5.00000
max      5.00000
dtype: float64

In [76]:
np.percentile(S2.values, 10)

2.4

In [78]:
S2

Shirts    5
Polos     2
Chinos    5
Tie       3
Shoes     4
dtype: int64

In [80]:
print(s3)

Shirts          20
polos         40.0
Jeans           90
name      Jitendra
wallet         100
age           40.0
sd              20
dtype: object


In [79]:
s3.unique()

array([20, 40.0, 90, 'Jitendra', 100], dtype=object)

In [81]:
s3.nunique()

5

In [82]:
s3[s3 == 40.0]

polos    40.0
age      40.0
dtype: object

In [83]:
s3.value_counts()

20          2
40.0        2
90          1
Jitendra    1
100         1
dtype: int64

In [84]:
#Operations And Transformations