# Import Pandas

In [1]:
# using alias
import pandas as pd 

# Pandas Series

In [4]:
# Creating series using lists
data = [25,65,78,90]
series = pd.Series(data)

series

0    25
1    65
2    78
3    90
dtype: int64

In [5]:
#Accessing with custom indexing

my_index = ["A", "B", "C", "D"]

series_by_custom_index = pd.Series(data, index = my_index)

series_by_custom_index

A    25
B    65
C    78
D    90
dtype: int64

In [7]:
print(series_by_custom_index["C"])

78


# Pandas DataFrames

In [8]:
import pandas as pd

#Creating dataframes from dictionary
emps = {
    "Name" : ["Rahma Khan Isha", "Raiyaz Rifat", "SSB Rishi", "SZM Munna"], #List as series to python
    "City" : ["Tangail", "Dhaka", "Chittagong", "Rangpur"], #List as series to python
    "Age" : [19, 21, 22, 22] #List as series to python
}

df = pd.DataFrame(emps)

df

Unnamed: 0,Name,City,Age
0,Rahma Khan Isha,Tangail,19
1,Raiyaz Rifat,Dhaka,21
2,SSB Rishi,Chittagong,22
3,SZM Munna,Rangpur,22


In [10]:
# Accessing pandas using custom indexing

custom_label = ["NSU", "BUP", "NSU", "NSU"]

df_by_custom_label = pd.DataFrame(emps, index = custom_label)

df_by_custom_label

Unnamed: 0,Name,City,Age
NSU,Rahma Khan Isha,Tangail,19
BUP,Raiyaz Rifat,Dhaka,21
NSU,SSB Rishi,Chittagong,22
NSU,SZM Munna,Rangpur,22


# Data Selction & Indexing

## Column selection

In [12]:
df[["Name"]]

Unnamed: 0,Name
0,Rahma Khan Isha
1,Raiyaz Rifat
2,SSB Rishi
3,SZM Munna


In [13]:
df[["Name", "City"]]

Unnamed: 0,Name,City
0,Rahma Khan Isha,Tangail
1,Raiyaz Rifat,Dhaka
2,SSB Rishi,Chittagong
3,SZM Munna,Rangpur


## Row selection

### iloc

In [14]:
row_2 = df.iloc[2] #index location

In [15]:
row_2

Name     SSB Rishi
City    Chittagong
Age             22
Name: 2, dtype: object

In [16]:
selected_rows = df.iloc[0:2] #integer based indexing

selected_rows

Unnamed: 0,Name,City,Age
0,Rahma Khan Isha,Tangail,19
1,Raiyaz Rifat,Dhaka,21


### loc

In [17]:
# Label based location

df_by_custom_label.loc["BUP"]

Name    Raiyaz Rifat
City           Dhaka
Age               21
Name: BUP, dtype: object

In [18]:
df_by_custom_label.loc["NSU"]

Unnamed: 0,Name,City,Age
NSU,Rahma Khan Isha,Tangail,19
NSU,SSB Rishi,Chittagong,22
NSU,SZM Munna,Rangpur,22


### query

In [19]:
filtered = df.query("Age > 19")

In [20]:
filtered

Unnamed: 0,Name,City,Age
1,Raiyaz Rifat,Dhaka,21
2,SSB Rishi,Chittagong,22
3,SZM Munna,Rangpur,22


# Data Exploration & Information

## info()

In [21]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    4 non-null      object
 1   City    4 non-null      object
 2   Age     4 non-null      int64 
dtypes: int64(1), object(2)
memory usage: 228.0+ bytes


## describe()

In [22]:
df.describe()

Unnamed: 0,Age
count,4.0
mean,21.0
std,1.414214
min,19.0
25%,20.5
50%,21.5
75%,22.0
max,22.0


## head() & tail()

In [24]:
emps = {
    "Name" : ["Rahma Khan Isha", "Raiyaz Rifat", "SSB Rishi", "SZM Munna", "Atkia Sadia Anika", 
              "Ihsanul Haque", "Tamanna Rahman"], #List as series to python
    "City" : ["Tangail", "Dhaka", "Chittagong", "Rangpur", "Barishal", "Dhaka", "Dhaka"], #List as series to python
    "Age" : [19, 21, 22, 22, 23, 26, 25] #List as series to python
}

df = pd.DataFrame(emps)

In [25]:
df.head()

Unnamed: 0,Name,City,Age
0,Rahma Khan Isha,Tangail,19
1,Raiyaz Rifat,Dhaka,21
2,SSB Rishi,Chittagong,22
3,SZM Munna,Rangpur,22
4,Atkia Sadia Anika,Barishal,23


In [26]:
df.head(3)

Unnamed: 0,Name,City,Age
0,Rahma Khan Isha,Tangail,19
1,Raiyaz Rifat,Dhaka,21
2,SSB Rishi,Chittagong,22


In [27]:
df.tail()

Unnamed: 0,Name,City,Age
2,SSB Rishi,Chittagong,22
3,SZM Munna,Rangpur,22
4,Atkia Sadia Anika,Barishal,23
5,Ihsanul Haque,Dhaka,26
6,Tamanna Rahman,Dhaka,25


In [28]:
df.tail(3)

Unnamed: 0,Name,City,Age
4,Atkia Sadia Anika,Barishal,23
5,Ihsanul Haque,Dhaka,26
6,Tamanna Rahman,Dhaka,25


## value_counts()

In [29]:
df["City"].value_counts()

City
Dhaka         3
Tangail       1
Chittagong    1
Rangpur       1
Barishal      1
Name: count, dtype: int64