### Accessing Data with Pandas

In [1]:
import pandas as pd

In [43]:
coffee = pd.read_csv('../warmup-data/coffee.csv')

In [None]:
# lets say we work on the coffee df; take a look at the first 5 rows
coffee.head()

Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,25
1,Monday,Latte,15
2,Tuesday,Espresso,30
3,Tuesday,Latte,20
4,Wednesday,Espresso,35


In [44]:
# here lets take a look at more data
coffee

Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,25
1,Monday,Latte,15
2,Tuesday,Espresso,30
3,Tuesday,Latte,20
4,Wednesday,Espresso,35
5,Wednesday,Latte,25
6,Thursday,Espresso,40
7,Thursday,Latte,30
8,Friday,Espresso,45
9,Friday,Latte,35


In [11]:
# look at the last 5 rows
coffee.tail(5)

Unnamed: 0,Day,Coffee Type,Units Sold
9,Friday,Latte,35
10,Saturday,Espresso,45
11,Saturday,Latte,35
12,Sunday,Espresso,45
13,Sunday,Latte,35


In [14]:
# we could access random data by using coffee.sample(); access 4 random samples
coffee.sample(4)

Unnamed: 0,Day,Coffee Type,Units Sold
12,Sunday,Espresso,45
11,Saturday,Latte,35
0,Monday,Espresso,25
1,Monday,Latte,15


In [19]:
# using sample, we can also pass a random_state= argument to get a deterministic random sample | meaning it does not change once it is set
coffee.sample(4, random_state=42)

Unnamed: 0,Day,Coffee Type,Units Sold
9,Friday,Latte,35
11,Saturday,Latte,35
0,Monday,Espresso,25
12,Sunday,Espresso,45


### loc and iloc
`loc` allows us to filter by rows and columns of our df<br><br>
The syntax is like:<br><br>
**df.loc[#Rows, #Columns]**

In [24]:
# lets say we want the first, second, and 5th row of the coffee df
coffee.loc[[0, 1, 5]]

Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,25
1,Monday,Latte,15
5,Wednesday,Latte,25


In [26]:
# we can also use a slice syntax, so lets say we want to get the first 6 rows
coffee.loc[0:5]

Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,25
1,Monday,Latte,15
2,Tuesday,Espresso,30
3,Tuesday,Latte,20
4,Wednesday,Espresso,35
5,Wednesday,Latte,25


In [None]:
# we can also do [0: onwards] 
coffee.loc[0:]

Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,25
1,Monday,Latte,15
2,Tuesday,Espresso,30
3,Tuesday,Latte,20
4,Wednesday,Espresso,35
5,Wednesday,Latte,25
6,Thursday,Espresso,40
7,Thursday,Latte,30
8,Friday,Espresso,45
9,Friday,Latte,35


In [None]:
# we can also do 5 to 8 (index), grab the day column and the number of units sold
coffee.loc[5:8, ["Day", "Units Sold"]]

Unnamed: 0,Day,Units Sold
5,Wednesday,25
6,Thursday,40
7,Thursday,30
8,Friday,45


In [41]:
# what if you need to see all of the data, but only the day and the units sold
coffee.loc[0:5, ["Day", "Units Sold"]]

Unnamed: 0,Day,Units Sold
0,Monday,25
1,Monday,15
2,Tuesday,30
3,Tuesday,20
4,Wednesday,35
5,Wednesday,25


#### iloc
`iloc` is the same as `loc` but its ***index*** based. 

In [None]:
# coffee.iloc[:, ["Day", "Units Sold"]] # you can use this as this will throw an error

# we can also use the iloc method to get the first 5 rows and the first 2 columns
# note that with the `iloc`, the upper index is not inclusive. meaning that if you do 0:5, it will only get the first 4 rows
coffee.iloc[0:5, [0,2]]

Unnamed: 0,Day,Units Sold
0,Monday,25
1,Monday,15
2,Tuesday,30
3,Tuesday,20
4,Wednesday,35


In [47]:
# lets say we had a mistake in the data in the units sold for lattes, instead of 15 we had 10
# so right now, if you print out the df, you will see that the value is 15 for monday, lets change it to 10
coffee.loc[1, "Units Sold"] = 10

# you can also set multiple values to 10 by changing the index from 1:3 for `loc`

In [46]:
coffee.head()

Unnamed: 0,Day,Coffee Type,Units Sold
0,Monday,Espresso,25
1,Monday,Latte,10
2,Tuesday,Espresso,30
3,Tuesday,Latte,20
4,Wednesday,Espresso,35


In [48]:
# worth mentioning that there is a slightly optimized way to getting values using .iat or .at
# .at is label based and .iat is integer based

coffee.at[0, "Units Sold"]

np.int64(25)

In [51]:
# we can easily grab columns by using the column name
coffee["Day"]

# or
coffee.Day

0        Monday
1        Monday
2       Tuesday
3       Tuesday
4     Wednesday
5     Wednesday
6      Thursday
7      Thursday
8        Friday
9        Friday
10     Saturday
11     Saturday
12       Sunday
13       Sunday
Name: Day, dtype: object

In [54]:
# sort the data 
coffee.sort_values("Units Sold", ascending=False)

Unnamed: 0,Day,Coffee Type,Units Sold
10,Saturday,Espresso,45
8,Friday,Espresso,45
12,Sunday,Espresso,45
6,Thursday,Espresso,40
4,Wednesday,Espresso,35
11,Saturday,Latte,35
13,Sunday,Latte,35
9,Friday,Latte,35
2,Tuesday,Espresso,30
7,Thursday,Latte,30


In [55]:
# additionally, we can start filtering values by passing a 2nd argument to the sort_values method
coffee.sort_values(["Units Sold", "Coffee Type"], ascending=False)

Unnamed: 0,Day,Coffee Type,Units Sold
8,Friday,Espresso,45
10,Saturday,Espresso,45
12,Sunday,Espresso,45
6,Thursday,Espresso,40
9,Friday,Latte,35
11,Saturday,Latte,35
13,Sunday,Latte,35
4,Wednesday,Espresso,35
7,Thursday,Latte,30
2,Tuesday,Espresso,30


In [70]:
# can also fix the ascending - 0 for Units Sold would be descending and 1 for Coffee Type would be ascending (alphabetical)
coffee.sort_values(["Units Sold", "Coffee Type"], ascending=[0,1])

Unnamed: 0,Day,Coffee Type,Units Sold
8,Friday,Espresso,45
10,Saturday,Espresso,45
12,Sunday,Espresso,45
6,Thursday,Espresso,40
4,Wednesday,Espresso,35
9,Friday,Latte,35
11,Saturday,Latte,35
13,Sunday,Latte,35
2,Tuesday,Espresso,30
7,Thursday,Latte,30


In [71]:
# lets say you iterate row by row, you can for 
for index, row in coffee.iterrows():
    print(index)
    print(row)
    print("\n\n\n\n")

0
Day              Monday
Coffee Type    Espresso
Units Sold           25
Name: 0, dtype: object





1
Day            Monday
Coffee Type     Latte
Units Sold         10
Name: 1, dtype: object





2
Day             Tuesday
Coffee Type    Espresso
Units Sold           30
Name: 2, dtype: object





3
Day            Tuesday
Coffee Type      Latte
Units Sold          20
Name: 3, dtype: object





4
Day            Wednesday
Coffee Type     Espresso
Units Sold            35
Name: 4, dtype: object





5
Day            Wednesday
Coffee Type        Latte
Units Sold            25
Name: 5, dtype: object





6
Day            Thursday
Coffee Type    Espresso
Units Sold           40
Name: 6, dtype: object





7
Day            Thursday
Coffee Type       Latte
Units Sold           30
Name: 7, dtype: object





8
Day              Friday
Coffee Type    Espresso
Units Sold           45
Name: 8, dtype: object





9
Day            Friday
Coffee Type     Latte
Units Sold         35
Name: 9, dtype: o