## Basic of Manipulating Pandas DateFrames

Adapted from "Manipulating DataFrames with pandas" @ [DataCamp](https://www.datacamp.com/courses/manipulating-dataframes-with-pandas)

In [3]:
import pandas as pd 

In [4]:
df = pd.read_csv('./sales.csv', index_col='month')
df

Unnamed: 0_level_0,eggs,salt,spam
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Jan,47,12.0,17
Feb,110,50.0,31
Mar,221,89.0,72
Apr,77,87.0,20
May,132,,52
Jun,205,60.0,55


## Indexing DataFrames
### Indexing with square brackets

In [None]:
df['salt']['Jan'] 

### Using column attribute and row label

In [None]:
 df.eggs['Mar']

### Using the .loc accessor

In [None]:
df.loc['May', 'spam'] 

### Using the .iloc accessor

In [None]:
df.iloc[4, 2] 

### Selecting only some columns

In [None]:
df

In [None]:
df_new =  df[['salt','eggs']]
df_new

## Slicing DataFrames

In [None]:
df

### Selecting a column (i.e., Series)

In [None]:
df['eggs'] 

In [None]:
type(df['eggs']) 

### Slicing and indexing a Series

In [None]:
df['eggs'][1:4] # Part of the eggs column

In [None]:
df['eggs'][4] # The value associated with May

### Using .loc[]

In [None]:
df.loc[:, :] # All rows, all columns 

In [None]:
df.loc[:, 'eggs':'salt'] # All rows, some columns 

In [None]:
 df.loc['Jan':'Apr',:] # Some rows, all columns 

In [None]:
 df.loc['Mar':'May', 'salt':'spam'] # Some rows, some columns

In [None]:
 df.iloc[2:5, 1:] # A block from middle of the DataFrame 

### Using lists rather than slices

In [None]:
df.loc['Jan':'May', ['eggs', 'spam']] 

In [None]:
 df.iloc[[0,4,5], 0:2] 

### Series versus 1-column DataFrame

In [None]:
df['eggs'] 

In [None]:
type(df['eggs']) 

In [None]:
 df[['eggs']] 

In [None]:
 type(df[['eggs']]) 

## Filtering DataFrames

### Creating a Boolean Series

In [None]:
 df.salt > 60

### Filtering with a Boolean Series

In [None]:
 df[df.salt > 60] 

In [None]:
 enough_salt_sold = df.salt > 60

In [None]:
df[enough_salt_sold] 

### DataFrames with zeros and NaNs

In [None]:
df2 = df.copy() 

In [None]:
df2['bacon'] = [0, 0, 50, 60, 70, 80]  # add a new column
df2['sushi'] = [0, 0, 0, 0, 0, 0]

In [None]:
df2 

### Select columns with all nonzeros

In [None]:
 df2.loc[:, df2.all()] 

### Select columns with any nonzeros

In [None]:
 df2.loc[:, df2.any()]

### Select columns with any NaNs

In [None]:
 df2.loc[:, df2.isnull().any()] 

### Select columns without NaNs

In [None]:
df2.loc[:, df2.notnull().all()] 

### Drop rows with any NaNs

In [None]:
df2

In [None]:
df2.dropna(how ='any')

### Filtering a column based on another

In [None]:
df2.eggs[df2.salt > 55] 

### Drop a row

In [None]:
df2.drop('Jan')

### Drop a column 

In [None]:
df2.drop('sushi', axis = 1)

## Transforming DataFrames

In [None]:
def dozens(n): 
    return n//12

In [None]:
df.apply(dozens)

In [None]:
df.apply(lambda n: n//12)

### The DataFrame index

In [None]:
df2

In [None]:
df2.index

### Working with string values

In [None]:
df2.index = df2.index.str.upper() 

In [None]:
df2

In [None]:
df.index = df.index.map(str.lower)

In [None]:
df2

### Defining columns using other columns

In [None]:
df2['salty_eggs'] = df2.salt + df2.eggs 

In [None]:
df2

## Exercises
### Read in the data from sales1.csv
1. Create a dataframe with all information for software sales
2. Create a dataframe with only the product and unit information for company named 'Hooli'