# pandas Basics

In [1]:
import numpy as np
import pandas as pd

In [2]:
# Dataframe
df = pd.DataFrame()
print(df)

Empty DataFrame
Columns: []
Index: []


In [3]:
df1 = pd.DataFrame([[1,2,3,4],[5,6,7,8],[2,7,3,23],[3,6,8,23],[23,23,63,12]])

# Show Dataframe1
df1

Unnamed: 0,0,1,2,3
0,1,2,3,4
1,5,6,7,8
2,2,7,3,23
3,3,6,8,23
4,23,23,63,12


In [4]:
# head function
df1.head()

Unnamed: 0,0,1,2,3
0,1,2,3,4
1,5,6,7,8
2,2,7,3,23
3,3,6,8,23
4,23,23,63,12


In [5]:
# show top 3
df1.head(3)

Unnamed: 0,0,1,2,3
0,1,2,3,4
1,5,6,7,8
2,2,7,3,23


In [6]:
# tail function
df1.tail()

Unnamed: 0,0,1,2,3
0,1,2,3,4
1,5,6,7,8
2,2,7,3,23
3,3,6,8,23
4,23,23,63,12


In [7]:
# show bottom 3
df1.tail(3)

Unnamed: 0,0,1,2,3
2,2,7,3,23
3,3,6,8,23
4,23,23,63,12


# Slicing and Adding Columns

In [8]:
# columns should always be equal to len of columns of DataFrame
df2 = pd.DataFrame([[1,2,3,4],[5,6,7,8],[2,7,3,23],[3,6,8,23],[23,23,63,12]],columns=['A1', 'B2', 'C', 'D'])

In [9]:
df2

Unnamed: 0,A1,B2,C,D
0,1,2,3,4
1,5,6,7,8
2,2,7,3,23
3,3,6,8,23
4,23,23,63,12


In [10]:
df2.shape

(5, 4)

In [11]:
# slicing
df1.iloc[0,1]

2

In [12]:
df1.iloc[0:2,0:2]

Unnamed: 0,0,1
0,1,2
1,5,6


# Reading a CSV File

In [13]:
df = pd.read_csv('social.csv')

In [14]:
df.head(5)

Unnamed: 0,Age,EstimatedSalary,Purchased
0,19,19000,0
1,35,20000,0
2,26,43000,0
3,27,57000,0
4,19,76000,0


In [15]:
print(df['Age'].dtype)

int64


# Writing a CSV File

In [16]:
df2 = pd.DataFrame([[1,3,4],[5,6,7,8],[2,7,3,23],[3,6,8,23],[23,23,63]],columns=['A', 'B', 'C', 'D'])
print(df2)

    A   B   C     D
0   1   3   4   NaN
1   5   6   7   8.0
2   2   7   3  23.0
3   3   6   8  23.0
4  23  23  63   NaN


In [17]:
df2.to_csv('export.csv')
df3 = pd.read_csv('export.csv')
df3

Unnamed: 0.1,Unnamed: 0,A,B,C,D
0,0,1,3,4,
1,1,5,6,7,8.0
2,2,2,7,3,23.0
3,3,3,6,8,23.0
4,4,23,23,63,


In [18]:
# indexing is False
df2.to_csv('export.csv',index=False)
df4 = pd.read_csv('export.csv')
df4

Unnamed: 0,A,B,C,D
0,1,3,4,
1,5,6,7,8.0
2,2,7,3,23.0
3,3,6,8,23.0
4,23,23,63,


# Merging DataFrames

In [19]:
df5 = pd.DataFrame([[1,2,3],[5,6,7],[2,7,3]], columns=['A','B','C'])
df5

Unnamed: 0,A,B,C
0,1,2,3
1,5,6,7
2,2,7,3


In [20]:
df6 = pd.DataFrame([[11,2,31],[22,8,35],[63,7,92]], columns=['X','Y','Z'])
df6

Unnamed: 0,X,Y,Z
0,11,2,31
1,22,8,35
2,63,7,92


In [21]:
# merge those rows & columns
# whose col-B of df5 has
# same value as of col-Y of df6
df7 = pd.merge(df5,df6,left_on='B',right_on='Y')
df7

Unnamed: 0,A,B,C,X,Y,Z
0,1,2,3,11,2,31
1,2,7,3,63,7,92


# Add Row

In [22]:
df = pd.read_csv('social.csv')
df.head()

Unnamed: 0,Age,EstimatedSalary,Purchased
0,19,19000,0
1,35,20000,0
2,26,43000,0
3,27,57000,0
4,19,76000,0


In [23]:
data = {'Age':['90'],'EstimatedSalary':['500000'],'Purchased':['1']}
data

{'Age': ['90'], 'EstimatedSalary': ['500000'], 'Purchased': ['1']}

In [24]:
df1 = pd.DataFrame(data)
df1.to_csv('social.csv', mode='a', index=False, header=False)
df = pd.read_csv('social.csv')
df

Unnamed: 0,Age,EstimatedSalary,Purchased
0,19,19000,0
1,35,20000,0
2,26,43000,0
3,27,57000,0
4,19,76000,0
5,90,500000,1


# Add Column

In [25]:
gender = ['F','M','M','F','M','F']
df["Gender"] = gender
df.to_csv('social.csv',mode='w', index=False, header=True)
df

Unnamed: 0,Age,EstimatedSalary,Purchased,Gender
0,19,19000,0,F
1,35,20000,0,M
2,26,43000,0,M
3,27,57000,0,F
4,19,76000,0,M
5,90,500000,1,F


# Delete Row

In [26]:
df = pd.read_csv('social.csv')
df.drop(5,axis=0,inplace=True)
df.to_csv('social.csv',mode='w', index=False, header=True)
df

Unnamed: 0,Age,EstimatedSalary,Purchased,Gender
0,19,19000,0,F
1,35,20000,0,M
2,26,43000,0,M
3,27,57000,0,F
4,19,76000,0,M


# Delete Column

In [27]:
df = pd.read_csv('social.csv')
df.drop('EstimatedSalary',axis=1,inplace=True)
df.to_csv('social.csv',mode='w', index=False, header=True)
df

Unnamed: 0,Age,Purchased,Gender
0,19,0,F
1,35,0,M
2,26,0,M
3,27,0,F
4,19,0,M


# Sorting

In [28]:
df = pd.read_csv('social.csv')

# Ascending Order
df.sort_values(by=["Age"],ascending=True)

Unnamed: 0,Age,Purchased,Gender
0,19,0,F
4,19,0,M
2,26,0,M
3,27,0,F
1,35,0,M


In [29]:
# Descending Order
df.sort_values(by=["Age"],ascending=False)

Unnamed: 0,Age,Purchased,Gender
1,35,0,M
3,27,0,F
2,26,0,M
0,19,0,F
4,19,0,M
