# Pandas

In [1]:
import pandas as pd

## DataFrame and Series

In [2]:
data = {
    'first_name': ['Asim', 'Junaid', "Osama", 'Mohsin', 'Danish'],
    'last_name': ['Ali', 'Javed', 'Qazi', 'Aftab', 'Rafique'],
    'buying_status': ['Yes', 'Yes', 'No', 'Yes', 'Yes'],
    'age': [21, 19, 25, 22, 20]
}

df = pd.DataFrame(data)
df['first_name']

0      Asim
1    Junaid
2     Osama
3    Mohsin
4    Danish
Name: first_name, dtype: object

In [3]:
df.last_name

0        Ali
1      Javed
2       Qazi
3      Aftab
4    Rafique
Name: last_name, dtype: object

In [4]:
type(df)

pandas.core.frame.DataFrame

In [5]:
type(df.first_name)

pandas.core.series.Series

In [6]:
df.set_index('first_name')

Unnamed: 0_level_0,last_name,buying_status,age
first_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Asim,Ali,Yes,21
Junaid,Javed,Yes,19
Osama,Qazi,No,25
Mohsin,Aftab,Yes,22
Danish,Rafique,Yes,20


## Read, Write, set_index & lookup

In [7]:
df.to_csv('data.csv')

In [8]:
person = pd.read_csv('data.csv', usecols=['first_name', 'last_name', 'age'])
person.set_index('first_name')

Unnamed: 0_level_0,last_name,age
first_name,Unnamed: 1_level_1,Unnamed: 2_level_1
Asim,Ali,21
Junaid,Javed,19
Osama,Qazi,25
Mohsin,Aftab,22
Danish,Rafique,20


In [9]:
person[['first_name', 'last_name']]

Unnamed: 0,first_name,last_name
0,Asim,Ali
1,Junaid,Javed
2,Osama,Qazi
3,Mohsin,Aftab
4,Danish,Rafique


## Basic Methods

In [10]:
person.head(2)

Unnamed: 0,first_name,last_name,age
0,Asim,Ali,21
1,Junaid,Javed,19


In [11]:
person.tail(2)

Unnamed: 0,first_name,last_name,age
3,Mohsin,Aftab,22
4,Danish,Rafique,20


In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   first_name     5 non-null      object
 1   last_name      5 non-null      object
 2   buying_status  5 non-null      object
 3   age            5 non-null      int64 
dtypes: int64(1), object(3)
memory usage: 288.0+ bytes


In [13]:
person.describe()

Unnamed: 0,age
count,5.0
mean,21.4
std,2.302173
min,19.0
25%,20.0
50%,21.0
75%,22.0
max,25.0


In [14]:
person.shape

(5, 3)

In [15]:
person.duplicated(subset=['age']).sum()

0

In [16]:
person = person.append(person)
person.head(2)

Unnamed: 0,first_name,last_name,age
0,Asim,Ali,21
1,Junaid,Javed,19


In [17]:
person.duplicated().sum()

5

In [18]:
person.drop_duplicates()

Unnamed: 0,first_name,last_name,age
0,Asim,Ali,21
1,Junaid,Javed,19
2,Osama,Qazi,25
3,Mohsin,Aftab,22
4,Danish,Rafique,20
