# Pandas - Filtering and Sorting Data

### Step 1. Import the necessary libraries

In [283]:
import numpy as np
import pandas as pd

### Step 2. Import the dataset from this [address](https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv). 

In [284]:
url="https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv"
data=pd.read_csv(url,sep='\t')
data.head()

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,$2.39
1,1,1,Izze,[Clementine],$3.39
2,1,1,Nantucket Nectar,[Apple],$3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,$2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",$16.98


Preparing the Data



In [311]:
# prices = [float(value[1 : -1]) for value in chipo.item_price]
# chipo.item_price = prices 
# OR
# dollarizer = lambda x: float(x[1:-1])
# chipo.item_price = chipo.item_price.apply(dollarizer)

In [285]:
data1=data.item_price.str.split(pat='$',expand=True)

In [286]:
data2=data.join(data1)

In [287]:
data2.drop(0,inplace=True,axis=1)

In [288]:
data2.drop('item_price',axis=1,inplace=True)

In [289]:
data2=data2.rename(columns={1:'item_price'})

In [290]:
data2['item_price']=data2['item_price'].astype(float)

In [291]:
data2.head()

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,2.39
1,1,1,Izze,[Clementine],3.39
2,1,1,Nantucket Nectar,[Apple],3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",16.98


### Step 3. Assign it to a variable called chipo.

In [292]:
chipo=data2
chipo.head()

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,2.39
1,1,1,Izze,[Clementine],3.39
2,1,1,Nantucket Nectar,[Apple],3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",16.98


### Step 4. How many products cost more than $10.00?

In [295]:
chipo[chipo.item_price >= 10.00].count()

order_id              1130
quantity              1130
item_name             1130
choice_description    1123
item_price            1130
dtype: int64

### Step 5. What is the price of each item? 
###### print a data frame with only two columns item_name and item_price

In [296]:
chipo[['item_name','item_price']].head()

Unnamed: 0,item_name,item_price
0,Chips and Fresh Tomato Salsa,2.39
1,Izze,3.39
2,Nantucket Nectar,3.39
3,Chips and Tomatillo-Green Chili Salsa,2.39
4,Chicken Bowl,16.98


### Step 6. Sort by the name of the item

In [297]:
chipo.sort_values(by=['item_name'],ascending=True,inplace=True,axis=0)
chipo.head()

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
3389,1360,2,6 Pack Soft Drink,[Diet Coke],12.98
341,148,1,6 Pack Soft Drink,[Diet Coke],6.49
1849,749,1,6 Pack Soft Drink,[Coke],6.49
1860,754,1,6 Pack Soft Drink,[Diet Coke],6.49
2713,1076,1,6 Pack Soft Drink,[Coke],6.49


### Step 7. What was the quantity of the most expensive item ordered?

In [305]:
chipo[chipo.item_price == chipo.item_price.max()]

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
3598,1443,15,Chips and Fresh Tomato Salsa,,44.25


### Step 8. How many times were a Veggie Salad Bowl ordered?

In [309]:
chipo[(chipo.item_name=="Veggie Salad")].count()

order_id              6
quantity              6
item_name             6
choice_description    6
item_price            6
dtype: int64

### Step 9. How many times people orderd more than one Canned Soda?

In [307]:
chipo[(chipo.item_name=="Canned Soda")&(chipo.quantity>=2)].count()

order_id              20
quantity              20
item_name             20
choice_description    20
item_price            20
dtype: int64