# Getting Started with Pandas

## Run this file after completing "Getting Started with Python".

In [2]:
import pandas as pd

## Load a tabular data set to Pandas Dataframe

In [3]:
Table = pd.read_csv('../data/people-example.csv')

## Dataframe Basics

In [4]:
Table

Unnamed: 0,First Name,Last Name,Country,age
0,Bob,Smith,United States,24
1,Alice,Williams,Canada,23
2,Malcolm,Jone,England,22
3,Felix,Brown,USA,23
4,Alex,Cooper,Poland,23
5,Tod,Campbell,United States,22
6,Derek,Ward,Switzerland,25


# Inspect columns of Dataframe

In [4]:
Table['Country']

0    United States
1           Canada
2          England
3              USA
4           Poland
5    United States
6      Switzerland
Name: Country, dtype: object

In [5]:
Table['age']

0    24
1    23
2    22
3    23
4    23
5    22
6    25
Name: age, dtype: int64

In [6]:
Table[['Country', 'age']]

Unnamed: 0,Country,age
0,United States,24
1,Canada,23
2,England,22
3,USA,23
4,Poland,23
5,United States,22
6,Switzerland,25


Some simple columnar operations

In [7]:
Table['age'].mean()

23.142857142857142

In [8]:
Table['age'].max()

25

# Create new columns in DataFrame

In [9]:
Table

Unnamed: 0,First Name,Last Name,Country,age
0,Bob,Smith,United States,24
1,Alice,Williams,Canada,23
2,Malcolm,Jone,England,22
3,Felix,Brown,USA,23
4,Alex,Cooper,Poland,23
5,Tod,Campbell,United States,22
6,Derek,Ward,Switzerland,25


In [10]:
Table['Full Name'] = Table['First Name'] + ' ' + Table['Last Name'] #creating column from pre existing ones

In [11]:
Table['Random Column 1'] = pd.DataFrame([2,3,1,4,2,5,4]) #creating a new column
Table['Random Column 2'] = pd.DataFrame([2,3,1,4,2,5,4]) #creating a new column

In [12]:
Table

Unnamed: 0,First Name,Last Name,Country,age,Full Name,Random Column 1,Random Column 2
0,Bob,Smith,United States,24,Bob Smith,2,2
1,Alice,Williams,Canada,23,Alice Williams,3,3
2,Malcolm,Jone,England,22,Malcolm Jone,1,1
3,Felix,Brown,USA,23,Felix Brown,4,4
4,Alex,Cooper,Poland,23,Alex Cooper,2,2
5,Tod,Campbell,United States,22,Tod Campbell,5,5
6,Derek,Ward,Switzerland,25,Derek Ward,4,4


## Removeing Columns in DataFrame

In [13]:
Table = Table.drop(labels = ['Random Column 1', 'Random Column 2'], axis = 1)

In [14]:
Table

Unnamed: 0,First Name,Last Name,Country,age,Full Name
0,Bob,Smith,United States,24,Bob Smith
1,Alice,Williams,Canada,23,Alice Williams
2,Malcolm,Jone,England,22,Malcolm Jone
3,Felix,Brown,USA,23,Felix Brown
4,Alex,Cooper,Poland,23,Alex Cooper
5,Tod,Campbell,United States,22,Tod Campbell
6,Derek,Ward,Switzerland,25,Derek Ward


In [15]:
Table['age'] * Table['age'] #doesn't add another column

0    576
1    529
2    484
3    529
4    529
5    484
6    625
Name: age, dtype: int64

In [16]:
Table

Unnamed: 0,First Name,Last Name,Country,age,Full Name
0,Bob,Smith,United States,24,Bob Smith
1,Alice,Williams,Canada,23,Alice Williams
2,Malcolm,Jone,England,22,Malcolm Jone
3,Felix,Brown,USA,23,Felix Brown
4,Alex,Cooper,Poland,23,Alex Cooper
5,Tod,Campbell,United States,22,Tod Campbell
6,Derek,Ward,Switzerland,25,Derek Ward


# Advanced Dataframe operations

In [17]:
Table[Table['age'] < 23] #fetching rows with age < 23

Unnamed: 0,First Name,Last Name,Country,age,Full Name
2,Malcolm,Jone,England,22,Malcolm Jone
5,Tod,Campbell,United States,22,Tod Campbell


In [18]:
Table[Table['age'] < 23]['Full Name'] # fetching full names of people with age < 23

2    Malcolm Jone
5    Tod Campbell
Name: Full Name, dtype: object

In [19]:
Table = Table.replace({'Country': 'USA'}, 'United States') #replace 'USA' with 'United States' everywhere in Country column.

In [20]:
Table

Unnamed: 0,First Name,Last Name,Country,age,Full Name
0,Bob,Smith,United States,24,Bob Smith
1,Alice,Williams,Canada,23,Alice Williams
2,Malcolm,Jone,England,22,Malcolm Jone
3,Felix,Brown,United States,23,Felix Brown
4,Alex,Cooper,Poland,23,Alex Cooper
5,Tod,Campbell,United States,22,Tod Campbell
6,Derek,Ward,Switzerland,25,Derek Ward
