### Load Pandas

In [56]:
import numpy as np
import pandas as pd

### Pandas Basic

In [57]:
df = pd.DataFrame([[1,2,3],[4,5,6],[7,8,9],[10,11,12]],columns=['A','B','C'],index=['w','x','y','z'])

In [58]:
df

Unnamed: 0,A,B,C
w,1,2,3
x,4,5,6
y,7,8,9
z,10,11,12


In [59]:
df.head(2)

Unnamed: 0,A,B,C
w,1,2,3
x,4,5,6


In [60]:
df.tail(3)

Unnamed: 0,A,B,C
x,4,5,6
y,7,8,9
z,10,11,12


In [61]:
df.columns

Index(['A', 'B', 'C'], dtype='object')

In [62]:
df.index.to_list()

['w', 'x', 'y', 'z']

In [63]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, w to z
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   A       4 non-null      int64
 1   B       4 non-null      int64
 2   C       4 non-null      int64
dtypes: int64(3)
memory usage: 128.0+ bytes


In [64]:
df.describe()

Unnamed: 0,A,B,C
count,4.0,4.0,4.0
mean,5.5,6.5,7.5
std,3.872983,3.872983,3.872983
min,1.0,2.0,3.0
25%,3.25,4.25,5.25
50%,5.5,6.5,7.5
75%,7.75,8.75,9.75
max,10.0,11.0,12.0


In [65]:
df.shape

(4, 3)

In [66]:
df.size

12

In [67]:
df.nunique()

A    4
B    4
C    4
dtype: int64

In [68]:
df['A'].unique()

array([ 1,  4,  7, 10])

### Loading data from Files

In [69]:
# Loading data from csv files
coffee = pd.read_csv("./data/coffee.csv")
bios = pd.read_csv('./data/bios.csv')

In [70]:
# Loading data from paraquet files
results = pd.read_parquet('./data/results.parquet')

In [71]:
# Loading data from excel files
excel = pd.read_excel("./data/olympics-data.xlsx")

### Accessing data

In [72]:
print(coffee)

          Day Coffee Type  Units Sold
0      Monday    Espresso          25
1      Monday       Latte          15
2     Tuesday    Espresso          30
3     Tuesday       Latte          20
4   Wednesday    Espresso          35
5   Wednesday       Latte          25
6    Thursday    Espresso          40
7    Thursday       Latte          30
8      Friday    Espresso          45
9      Friday       Latte          35
10   Saturday    Espresso          45
11   Saturday       Latte          35
12     Sunday    Espresso          45
13     Sunday       Latte          35


In [73]:
display(coffee)

Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,25
1,Monday,Latte,15
2,Tuesday,Espresso,30
3,Tuesday,Latte,20
4,Wednesday,Espresso,35
5,Wednesday,Latte,25
6,Thursday,Espresso,40
7,Thursday,Latte,30
8,Friday,Espresso,45
9,Friday,Latte,35


In [74]:
coffee.head()

Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,25
1,Monday,Latte,15
2,Tuesday,Espresso,30
3,Tuesday,Latte,20
4,Wednesday,Espresso,35


In [75]:
coffee.tail(7)

Unnamed: 0,Day,Coffee Type,Units Sold
7,Thursday,Latte,30
8,Friday,Espresso,45
9,Friday,Latte,35
10,Saturday,Espresso,45
11,Saturday,Latte,35
12,Sunday,Espresso,45
13,Sunday,Latte,35


In [83]:
coffee.sample(5,random_state=1) # Pass in random_state to make deterministic

Unnamed: 0,Day,Coffee Type,Units Sold
3,Tuesday,Latte,20
7,Thursday,Latte,30
6,Thursday,Espresso,40
2,Tuesday,Espresso,30
10,Saturday,Espresso,45


In [86]:
# loc
# coffee.loc[Rows, Columns]
coffee.loc[1:3,["Day","Units Sold"]]

Unnamed: 0,Day,Units Sold
1,Monday,15
2,Tuesday,30
3,Tuesday,20


In [92]:
coffee.iloc[2:4,[0,2]]

Unnamed: 0,Day,Units Sold
2,Tuesday,30
3,Tuesday,20


In [93]:
# Some fancy stuff
coffee.index=coffee.Day

In [94]:
coffee.loc["Monday":"Wednesday"]

Unnamed: 0_level_0,Day,Coffee Type,Units Sold
Day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Monday,Monday,Espresso,25
Monday,Monday,Latte,15
Tuesday,Tuesday,Espresso,30
Tuesday,Tuesday,Latte,20
Wednesday,Wednesday,Espresso,35
Wednesday,Wednesday,Latte,25


In [96]:
coffee = pd.read_csv('./data/coffee.csv')

In [102]:
# Setting values
coffee.loc[1:3,["Units Sold"]]=10
coffee.head()

Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,25
1,Monday,Latte,10
2,Tuesday,Espresso,10
3,Tuesday,Latte,10
4,Wednesday,Espresso,35


In [106]:
# get single values (.at & .iat)
coffee.at[1,"Units Sold"]

np.int64(10)

In [108]:
coffee.iat[1,1]

'Latte'

In [115]:
# sort values
coffee.sort_values(["Units Sold"], ascending=False)

Unnamed: 0,Day,Coffee Type,Units Sold
10,Saturday,Espresso,45
8,Friday,Espresso,45
12,Sunday,Espresso,45
6,Thursday,Espresso,40
4,Wednesday,Espresso,35
11,Saturday,Latte,35
13,Sunday,Latte,35
9,Friday,Latte,35
7,Thursday,Latte,30
0,Monday,Espresso,25


In [120]:
coffee.sort_values(["Units Sold", "Coffee Type"], ascending=[1,0])

Unnamed: 0,Day,Coffee Type,Units Sold
1,Monday,Latte,10
3,Tuesday,Latte,10
2,Tuesday,Espresso,10
5,Wednesday,Latte,25
0,Monday,Espresso,25
7,Thursday,Latte,30
9,Friday,Latte,35
11,Saturday,Latte,35
13,Sunday,Latte,35
4,Wednesday,Espresso,35


In [121]:
# Iterate over dataframe using for loop
for index,row in coffee.iterrows():
    print(index)
    print(row,"\n\n\n")

0
Day              Monday
Coffee Type    Espresso
Units Sold           25
Name: 0, dtype: object 



1
Day            Monday
Coffee Type     Latte
Units Sold         10
Name: 1, dtype: object 



2
Day             Tuesday
Coffee Type    Espresso
Units Sold           10
Name: 2, dtype: object 



3
Day            Tuesday
Coffee Type      Latte
Units Sold          10
Name: 3, dtype: object 



4
Day            Wednesday
Coffee Type     Espresso
Units Sold            35
Name: 4, dtype: object 



5
Day            Wednesday
Coffee Type        Latte
Units Sold            25
Name: 5, dtype: object 



6
Day            Thursday
Coffee Type    Espresso
Units Sold           40
Name: 6, dtype: object 



7
Day            Thursday
Coffee Type       Latte
Units Sold           30
Name: 7, dtype: object 



8
Day              Friday
Coffee Type    Espresso
Units Sold           45
Name: 8, dtype: object 



9
Day            Friday
Coffee Type     Latte
Units Sold         35
Name: 9, dtype: object 


