In [1]:
import numpy as np
import pandas as pd

**Creating a DataFrame**

In [3]:
data = {
    'Name': ['John', 'Anna', 'Peter', 'Linda'],
    'Age': [28, 34, 29, 42],
    'City': ['New York', 'Paris', 'Berlin', 'London'],
    'Salary': [65000, 70000, 62000, 85000]
}
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [6]:

data_list = [
    ['John', 28, 'New York', 65000],
    ['Anna', 34, 'Paris', 70000],
    ['Peter', 29, 'Berlin', 62000],
    ['Linda', 42, 'London', 85000]
]
df_2 = pd.DataFrame(data_list)
df_2

Unnamed: 0,0,1,2,3
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [9]:
header = ["Name", "Age", "City", "Salary"]
df_2 = pd.DataFrame(data_list, columns = header)
df_2

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


**Selection and Indexing of Columns**

In [11]:
df_2

Unnamed: 0,Name,Age,City,Salary
0,John,28,New York,65000
1,Anna,34,Paris,70000
2,Peter,29,Berlin,62000
3,Linda,42,London,85000


In [14]:
df_2[["Name"]]

Unnamed: 0,Name
0,John
1,Anna
2,Peter
3,Linda


In [15]:
df_2[["Name", "City"]]

Unnamed: 0,Name,City
0,John,New York
1,Anna,Paris
2,Peter,Berlin
3,Linda,London


**Creating a new column**

In [18]:
df_2["Designation"] = ["Doctor","Eng.","Doctor","Eng."]
df_2

Unnamed: 0,Name,Age,City,Salary,Designation
0,John,28,New York,65000,Doctor
1,Anna,34,Paris,70000,Eng.
2,Peter,29,Berlin,62000,Doctor
3,Linda,42,London,85000,Eng.


**Removing Columns**

In [21]:
df_2.drop(0, axis = 0) # deleting a row

Unnamed: 0,Name,Age,City,Salary,Designation
1,Anna,34,Paris,70000,Eng.
2,Peter,29,Berlin,62000,Doctor
3,Linda,42,London,85000,Eng.


In [26]:
df_2.drop(["City"], axis = 1) # deleting a coloumn

Unnamed: 0,Name,Age,Salary,Designation
0,John,28,65000,Doctor
1,Anna,34,70000,Eng.
2,Peter,29,62000,Doctor
3,Linda,42,85000,Eng.


**Selecting Rows**

In [28]:
df_2

Unnamed: 0,Name,Age,City,Salary,Designation
0,John,28,New York,65000,Doctor
1,Anna,34,Paris,70000,Eng.
2,Peter,29,Berlin,62000,Doctor
3,Linda,42,London,85000,Eng.


In [33]:
df_2.loc[[0]] # selecting a single row 

Unnamed: 0,Name,Age,City,Salary,Designation
0,John,28,New York,65000,Doctor


In [37]:
df_2.loc[[2,3]] # selecting multiple rows 

Unnamed: 0,Name,Age,City,Salary,Designation
2,Peter,29,Berlin,62000,Doctor
3,Linda,42,London,85000,Eng.


In [43]:
df_2.iloc[2]

Name            Peter
Age                29
City           Berlin
Salary          62000
Designation    Doctor
Name: 2, dtype: object

**Selecting Subsets of Rows and Columns**

In [62]:
data_list_2 = [
    ['John', 28, 'New York', 65000, 'Doctor'],
    ['Anna', 34, 'Paris', 70000, 'Eng.'],
    ['Peter', 29, 'Berlin', 62000, 'Doctor'],
    ['Linda', 42, 'London', 85000, 'Eng.'],
    ['Mark', 31, 'Toronto', 68000, 'Analyst'],
    ['Sophia', 26, 'Madrid', 58000, 'Designer'],
    ['Daniel', 38, 'Amsterdam', 79000, 'Manager'],
    ['Emily', 45, 'Chicago', 92000, 'Doctor'],
    ['Michael', 33, 'San Francisco', 88000, 'Eng.'],
    ['Olivia', 27, 'Rome', 60000, 'Analyst'],
    ['Lucas', 40, 'Sydney', 83000, 'Manager'],
    ['Nina', 35, 'Vienna', 72000, 'Consultant'],
    ['Robert', 50, 'Boston', 95000, 'Director'],
    ['Isabella', 29, 'Milan', 64000, 'Designer'],
    ['Thomas', 36, 'Zurich', 90000, 'Consultant']
]
df_3 = pd.DataFrame(
    data_list_2,
    columns = ['Name', 'Age', 'City', 'Salary', 'Designation'])

In [63]:
df.loc[[0,1]][["City","Salary"]]

Unnamed: 0,City,Salary
0,New York,65000
1,Paris,70000


In [64]:
df_3.loc[[1,3,7,8,11,14]][["Name", "Salary", "Designation"]]

Unnamed: 0,Name,Salary,Designation
1,Anna,70000,Eng.
3,Linda,85000,Eng.
7,Emily,92000,Doctor
8,Michael,88000,Eng.
11,Nina,72000,Consultant
14,Thomas,90000,Consultant


**Conditional Selection**

In [65]:
df_3 

Unnamed: 0,Name,Age,City,Salary,Designation
0,John,28,New York,65000,Doctor
1,Anna,34,Paris,70000,Eng.
2,Peter,29,Berlin,62000,Doctor
3,Linda,42,London,85000,Eng.
4,Mark,31,Toronto,68000,Analyst
5,Sophia,26,Madrid,58000,Designer
6,Daniel,38,Amsterdam,79000,Manager
7,Emily,45,Chicago,92000,Doctor
8,Michael,33,San Francisco,88000,Eng.
9,Olivia,27,Rome,60000,Analyst


In [66]:
df_3[df_3["Age"] > 30]

Unnamed: 0,Name,Age,City,Salary,Designation
1,Anna,34,Paris,70000,Eng.
3,Linda,42,London,85000,Eng.
4,Mark,31,Toronto,68000,Analyst
6,Daniel,38,Amsterdam,79000,Manager
7,Emily,45,Chicago,92000,Doctor
8,Michael,33,San Francisco,88000,Eng.
10,Lucas,40,Sydney,83000,Manager
11,Nina,35,Vienna,72000,Consultant
12,Robert,50,Boston,95000,Director
14,Thomas,36,Zurich,90000,Consultant


In [None]:
df_3[df_3["Age"] > 30 ]