# Intro to Pandas

In [2]:
import pandas as pd

## Load Data to DataFrame

### CSVs

In [5]:
csv_path = "countries.csv"
df_csv = pd.read_csv(csv_path)

df_csv

Unnamed: 0,country,year,population
0,Afghanistan,1952,8425333
1,Afghanistan,1957,9240934
2,Afghanistan,1962,10267083
3,Afghanistan,1967,11537966
4,Afghanistan,1972,13079460
...,...,...,...
1699,Zimbabwe,1987,9216418
1700,Zimbabwe,1992,10704340
1701,Zimbabwe,1997,11404948
1702,Zimbabwe,2002,11926563


### Excel Spreadsheets

In [6]:
xlsx_path = "products.xlsx"

df_xlsx = pd.read_excel(xlsx_path)

df_xlsx

Unnamed: 0,product_id,price,merchant_id,brand,name
0,AVphzgbJLJeJML43fA0o,104.99,1001,Sanus,Sanus VLF410B1 10-Inch Super Slim Full-Motion ...
1,AVpgMuGwLJeJML43KY_c,69.00,1002,Boytone,Boytone - 2500W 2.1-Ch. Home Theater System - ...
2,AVpe9FXeLJeJML43zHrq,23.99,1001,DENAQ,DENAQ - AC Adapter for TOSHIBA SATELLITE
3,AVpfVJXu1cnluZ0-iwTT,290.99,1001,DreamWave,DreamWave - Tremor Portable Bluetooth Speaker ...
4,AVphUeKeilAPnD_x3-Be,244.01,1004,Yamaha,NS-SP1800BL 5.1-Channel Home Theater System (B...
...,...,...,...,...,...
1240,AVphFybdLJeJML43Wnza,64.95,1110,JBL,"JBL - 6 x 9"" 3-Way Car Speakers with Polypropy..."
1241,AVpe_qIa1cnluZ0-bjrN,871.06,1002,HP,HP - ProBook 14 Laptop - Intel Core i5 - 4GB M...
1242,AVphibxI1cnluZ0-DpxG,74.95,1238,Magellan,Magellan Roadmate 5322-LM 5 Touchscreen Portab...
1243,AVpgrtW3ilAPnD_xv67M,294.35,1239,Pyle Pro,PMX840BT Bluetooth 8-Channel 800-Watt Powered ...


### Dictionaries

In [8]:
nhl_players = {
    "player_name": ["Gabriel Landeskog", "Philipp Grubauer", "Tyler Seguin", "Mikko Rantanen"],
    "player_team": ["Colorado Avalanche", "Seattle Kraken", "Dallas Stars", "Colorado Avalanche"],
    "player_position": ["Left Wing", "Goaltender", "Center", "Right Wing"],
}

df_nhl_players = pd.DataFrame(nhl_players)

df_nhl_players

Unnamed: 0,player_name,player_team,player_position
0,Gabriel Landeskog,Colorado Avalanche,Left Wing
1,Philipp Grubauer,Seattle Kraken,Goaltender
2,Tyler Seguin,Dallas Stars,Center
3,Mikko Rantanen,Colorado Avalanche,Right Wing


## See First 5 rows of DataFrame

In [7]:
df_csv.head()

Unnamed: 0,country,year,population
0,Afghanistan,1952,8425333
1,Afghanistan,1957,9240934
2,Afghanistan,1962,10267083
3,Afghanistan,1967,11537966
4,Afghanistan,1972,13079460


## Create New DataFrame from a Single Column

In [9]:
df_country_names = df_csv[["country"]]

df_country_names

Unnamed: 0,country
0,Afghanistan
1,Afghanistan
2,Afghanistan
3,Afghanistan
4,Afghanistan
...,...
1699,Zimbabwe
1700,Zimbabwe
1701,Zimbabwe
1702,Zimbabwe


### As well as from multiple columns

In [10]:
df_product_info_simple = df_xlsx[["name", "price"]]

df_product_info_simple

Unnamed: 0,name,price
0,Sanus VLF410B1 10-Inch Super Slim Full-Motion ...,104.99
1,Boytone - 2500W 2.1-Ch. Home Theater System - ...,69.00
2,DENAQ - AC Adapter for TOSHIBA SATELLITE,23.99
3,DreamWave - Tremor Portable Bluetooth Speaker ...,290.99
4,NS-SP1800BL 5.1-Channel Home Theater System (B...,244.01
...,...,...
1240,"JBL - 6 x 9"" 3-Way Car Speakers with Polypropy...",64.95
1241,HP - ProBook 14 Laptop - Intel Core i5 - 4GB M...,871.06
1242,Magellan Roadmate 5322-LM 5 Touchscreen Portab...,74.95
1243,PMX840BT Bluetooth 8-Channel 800-Watt Powered ...,294.35


## Accessing DataFrame Cells

### With `iloc`

*locate cell by __index__*

`df.iloc[x, y]` where:
* x - row index
* y - column index

In [11]:
df_csv.iloc[0, 0]

'Afghanistan'

In [12]:
df_nhl_players.iloc[1, 2]

'Goaltender'

### With `loc`

*locate cell by __row index & column name__*

`df.loc[x, y]` where:
* x - row index
* y - column name

In [13]:
df_csv.loc[0, "country"]

'Afghanistan'

In [15]:
df_nhl_players.loc[1, "player_position"]

'Goaltender'

## Change DataFrame Index

In [16]:
df_players_new = df_nhl_players

df_players_new.index = ["row1", "row2", "row3", "row4"]

df_players_new

Unnamed: 0,player_name,player_team,player_position
row1,Gabriel Landeskog,Colorado Avalanche,Left Wing
row2,Philipp Grubauer,Seattle Kraken,Goaltender
row3,Tyler Seguin,Dallas Stars,Center
row4,Mikko Rantanen,Colorado Avalanche,Right Wing


### Using `loc` with new index

In [18]:
df_players_new.loc["row3", "player_name"]

'Tyler Seguin'

## Create New DataFrame with Slicing

### Using `iloc`

In [20]:
# 0:2 -- First two rows
# 0:3 -- First three columns

df_products_new = df_xlsx.iloc[0:2, 0:3]

df_products_new

Unnamed: 0,product_id,price,merchant_id
0,AVphzgbJLJeJML43fA0o,104.99,1001
1,AVpgMuGwLJeJML43KY_c,69.0,1002


### Using `loc`

__NOTE: column names needs to be in consecutive positions for slicing__

In [21]:
df_products_new2 = df_xlsx.loc[0:2, "merchant_id":"name"]

df_products_new2

Unnamed: 0,merchant_id,brand,name
0,1001,Sanus,Sanus VLF410B1 10-Inch Super Slim Full-Motion ...
1,1002,Boytone,Boytone - 2500W 2.1-Ch. Home Theater System - ...
2,1001,DENAQ,DENAQ - AC Adapter for TOSHIBA SATELLITE
