In [144]:
import numpy as np
import pandas as pd

## Creating a DataFrame


### by using dictionary

In [145]:
data = {
    'Name': ['John', 'Anna', 'Peter', 'Linda'],
    'Age': [28, 34, 29, 42],
    'City': ['New York', 'Paris', 'Berlin', 'London'],
    'Salary': [65000, 70000, 62000, 85000]
}
df = pd.DataFrame(data)

In [146]:
df

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


### by using list

In [147]:
data_list = [
    ['John', 28, 'New York', 65000],
    ['Anna', 34, 'Paris', 70000],
    ['Peter', 29, 'Berlin', 62000],
    ['Linda', 42, 'London', 85000]
]
df2 = pd.DataFrame(data_list)
columns = ["Name","Age","City","Salary"]
df2 = pd.DataFrame(data_list,columns=columns)

In [148]:
df2

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


## Selection and Indexing of Columns

In [149]:
df

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [150]:
df["Name"]

0     John
1     Anna
2    Peter
3    Linda
Name: Name, dtype: object

In [151]:
df["City"]

0    New York
1       Paris
2      Berlin
3      London
Name: City, dtype: object

In [152]:
df[["Name","Salary"]]

Unnamed: 0,Name,Salary
0,John,65000
1,Anna,70000
2,Peter,62000
3,Linda,85000


In [153]:
df

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


## Creating a new column
#### Number of values must match number of rows in DataFrame

In [154]:
df["Designtion"] = ["Engineer","Doctor","CA","Lawyer"]
df

Unnamed: 0,Name,Age,City,Salary,Designtion
0,John,28,New York,65000,Engineer
1,Anna,34,Paris,70000,Doctor
2,Peter,29,Berlin,62000,CA
3,Linda,42,London,85000,Lawyer


In [155]:
df[["Education","Marital_Status"]] = [["grad","m"],["pg","u"],["grad","u"],["pg","m"]]
df

Unnamed: 0,Name,Age,City,Salary,Designtion,Education,Marital_Status
0,John,28,New York,65000,Engineer,grad,m
1,Anna,34,Paris,70000,Doctor,pg,u
2,Peter,29,Berlin,62000,CA,grad,u
3,Linda,42,London,85000,Lawyer,pg,m


## Removing Columns
#### By default, axis = 0 for horizonal checking and set axis = 1 for vertical checking

In [156]:
df.drop("Marital_Status",axis=1)

Unnamed: 0,Name,Age,City,Salary,Designtion,Education
0,John,28,New York,65000,Engineer,grad
1,Anna,34,Paris,70000,Doctor,pg
2,Peter,29,Berlin,62000,CA,grad
3,Linda,42,London,85000,Lawyer,pg


In [157]:
df

Unnamed: 0,Name,Age,City,Salary,Designtion,Education,Marital_Status
0,John,28,New York,65000,Engineer,grad,m
1,Anna,34,Paris,70000,Doctor,pg,u
2,Peter,29,Berlin,62000,CA,grad,u
3,Linda,42,London,85000,Lawyer,pg,m


In [158]:
df.drop(["Education","Marital_Status","Designtion"],axis=1,inplace=True) 
# inplace=True updates the df

In [159]:
df

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [160]:
df.drop(0) # Removing rows temporarily

Unnamed: 0,Name,Age,City,Salary
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [161]:
df

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


## Selecting rows

In [162]:
df.loc[0]

Name          John
Age             28
City      New York
Salary       65000
Name: 0, dtype: object

In [163]:
df.loc[[0,1]]

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000


In [164]:
df.iloc[3]

Name       Linda
Age           42
City      London
Salary     85000
Name: 3, dtype: object

## Selecting subsets of rows and columns

In [165]:
df

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [166]:
df.loc[[0,1]][["City","Salary"]]

Unnamed: 0,City,Salary
0,New York,65000
1,Paris,70000


In [167]:
df.loc[[2,3]][["Name","Age"]]

Unnamed: 0,Name,Age
2,Peter,29
3,Linda,42


## Conditional selection

In [168]:
df

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [169]:
# I want to only select those peoples whose age is more than 30
df[df["Age"] > 30]

Unnamed: 0,Name,Age,City,Salary
1,Anna,34,Paris,70000
3,Linda,42,London,85000


In [170]:
# I want people whose age is more than 30 and their city is Paris
df[(df["Age"] > 30) & ((df["City"] == "Paris"))]

Unnamed: 0,Name,Age,City,Salary
1,Anna,34,Paris,70000


In [171]:
df[(df["City"]=="New York") | (df["Salary"]>=70000)]

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
3,Linda,42,London,85000


In [172]:
df

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000
