In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


In [2]:
df = pd.DataFrame([
  ['January', 100, 100, 23, 100],
  ['February', 51, 45, 145, 45],
  ['March', 81, 96, 65, 96],
  ['April', 80, 80, 54, 180],
  ['May', 51, 54, 54, 154],
  ['June', 112, 109, 79, 129]],
  columns=['month', 'clinic_east',
           'clinic_north', 'clinic_south',
           'clinic_west']
)

In [3]:
df.head()

Unnamed: 0,month,clinic_east,clinic_north,clinic_south,clinic_west
0,January,100,100,23,100
1,February,51,45,145,45
2,March,81,96,65,96
3,April,80,80,54,180
4,May,51,54,54,154


In [6]:
# Selecting rows
df.iloc[2] #Selecing 2 index row


month           March
clinic_east        81
clinic_north       96
clinic_south       65
clinic_west        96
Name: 2, dtype: object

In [9]:
df.iloc[2, 1] # Third row and second column

81

In [13]:
df.loc[2, "clinic_north"] # Here we can use column name as an index

96

In [14]:
df.loc[0:3, "month":"clinic_south"]

Unnamed: 0,month,clinic_east,clinic_north,clinic_south
0,January,100,100,23
1,February,51,45,145
2,March,81,96,65
3,April,80,80,54


In [16]:
# Selecting multiple rows
df.iloc[3:]

Unnamed: 0,month,clinic_east,clinic_north,clinic_south,clinic_west
3,April,80,80,54,180
4,May,51,54,54,154
5,June,112,109,79,129


In [18]:
# Selecting columns
df['month']

0     January
1    February
2       March
3       April
4         May
5        June
Name: month, dtype: object

In [19]:
# or
df.clinic_west

0    100
1     45
2     96
3    180
4    154
5    129
Name: clinic_west, dtype: int64

In [20]:
# Selecting multiple columns
df[['clinic_north', 'clinic_south']]

Unnamed: 0,clinic_north,clinic_south
0,100,23
1,45,145
2,96,65
3,80,54
4,54,54
5,109,79


In [22]:
# Selecing rows with logic I
df[df.month == 'January']

Unnamed: 0,month,clinic_east,clinic_north,clinic_south,clinic_west
0,January,100,100,23,100


In [28]:
# Select rows with logic II
df[(df.month == 'January') | (df.month == 'March') ]

Unnamed: 0,month,clinic_east,clinic_north,clinic_south,clinic_west
0,January,100,100,23,100
2,March,81,96,65,96


In [27]:
df[(df["month"] == "March") | (df["month"] == "April")]

Unnamed: 0,month,clinic_east,clinic_north,clinic_south,clinic_west
2,March,81,96,65,96
3,April,80,80,54,180


In [33]:
# Select rows with logic III
df[df.month.isin(['January', 'February', 'March'])]


Unnamed: 0,month,clinic_east,clinic_north,clinic_south,clinic_west
0,January,100,100,23,100
1,February,51,45,145,45
2,March,81,96,65,96


In [37]:
# Subset of rows or df.iloc[[0,3,5]]
df2 = df.loc[[0,3,5]]
df2

Unnamed: 0,month,clinic_east,clinic_north,clinic_south,clinic_west
0,January,100,100,23,100
3,April,80,80,54,180
5,June,112,109,79,129


In [40]:
df2.reset_index(inplace = True, drop = True)
df2

Unnamed: 0,month,clinic_east,clinic_north,clinic_south,clinic_west
0,January,100,100,23,100
1,April,80,80,54,180
2,June,112,109,79,129


In [41]:
pwd

'/home/roshan/Desktop/data/Data Manipiulation with Pandas'

**In this example, you’ll be the data analyst for ShoeFly.com, a fictional online shoe store. You’ve seen this data; now it’s your turn to work with it!**


In [44]:
# Load the data from shoefly.csv into the variable orders.
orders = pd.read_csv('shoefly.csv')
orders

Unnamed: 0,id,first_name,last_name,email,shoe_type,shoe_material,shoe_color
0,54791,Rebecca,Lindsay,RebeccaLindsay57@hotmail.com,clogs,faux-leather,black
1,53450,Emily,Joyce,EmilyJoyce25@gmail.com,ballet flats,faux-leather,navy
2,91987,Joyce,Waller,Joyce.Waller@gmail.com,sandals,fabric,black
3,14437,Justin,Erickson,Justin.Erickson@outlook.com,clogs,faux-leather,red
4,79357,Andrew,Banks,AB4318@gmail.com,boots,leather,brown
5,52386,Julie,Marsh,JulieMarsh59@gmail.com,sandals,fabric,black
6,20487,Thomas,Jensen,TJ5470@gmail.com,clogs,fabric,navy
7,76971,Janice,Hicks,Janice.Hicks@gmail.com,clogs,faux-leather,navy
8,21586,Gabriel,Porter,GabrielPorter24@gmail.com,clogs,leather,brown
9,62083,Frances,Palmer,FrancesPalmer50@gmail.com,wedges,leather,white


In [46]:
# Inspect the first 5 lines of the data
orders.head()

Unnamed: 0,id,first_name,last_name,email,shoe_type,shoe_material,shoe_color
0,54791,Rebecca,Lindsay,RebeccaLindsay57@hotmail.com,clogs,faux-leather,black
1,53450,Emily,Joyce,EmilyJoyce25@gmail.com,ballet flats,faux-leather,navy
2,91987,Joyce,Waller,Joyce.Waller@gmail.com,sandals,fabric,black
3,14437,Justin,Erickson,Justin.Erickson@outlook.com,clogs,faux-leather,red
4,79357,Andrew,Banks,AB4318@gmail.com,boots,leather,brown


In [48]:
# Your marketing department wants to send out an email blast to everyone who ordered shoes!
# Select all of the email addresses from the column email and save them to a variable called emails.
emails = orders.email
emails


0     RebeccaLindsay57@hotmail.com
1           EmilyJoyce25@gmail.com
2           Joyce.Waller@gmail.com
3      Justin.Erickson@outlook.com
4                 AB4318@gmail.com
5           JulieMarsh59@gmail.com
6                 TJ5470@gmail.com
7           Janice.Hicks@gmail.com
8        GabrielPorter24@gmail.com
9        FrancesPalmer50@gmail.com
10         JessicaHale25@gmail.com
11      LawrenceParker44@gmail.com
12         SusanDennis58@gmail.com
13                DO2680@gmail.com
14       Rebecca.Charles@gmail.com
15              JC2072@hotmail.com
16              VS4753@outlook.com
17          RoyTillman20@gmail.com
18       Thomas.Roberson@gmail.com
19         ANewton1977@outlook.com
Name: email, dtype: object

In [51]:
# Frances Palmer claims that her order was wrong. What did Frances Palmer order?
# Use logic to select that row of orders and save it to the variable frances_palmer.
frances_palmer = orders[(orders.first_name == "Frances") & (orders.last_name == "Palmer")]
frances_palmer


Unnamed: 0,id,first_name,last_name,email,shoe_type,shoe_material,shoe_color
9,62083,Frances,Palmer,FrancesPalmer50@gmail.com,wedges,leather,white


In [56]:
# We need some customer reviews for our comfortable shoes. Select all orders for shoe_type: 
# clogs, boots, and ballet flats and save them to the variable comfy_shoes.
comfy_shoes = orders[orders.shoe_type.isin(["clogs", "boots", "ballet flats"])]


In [57]:
comfy_shoes

Unnamed: 0,id,first_name,last_name,email,shoe_type,shoe_material,shoe_color
0,54791,Rebecca,Lindsay,RebeccaLindsay57@hotmail.com,clogs,faux-leather,black
1,53450,Emily,Joyce,EmilyJoyce25@gmail.com,ballet flats,faux-leather,navy
3,14437,Justin,Erickson,Justin.Erickson@outlook.com,clogs,faux-leather,red
4,79357,Andrew,Banks,AB4318@gmail.com,boots,leather,brown
6,20487,Thomas,Jensen,TJ5470@gmail.com,clogs,fabric,navy
7,76971,Janice,Hicks,Janice.Hicks@gmail.com,clogs,faux-leather,navy
8,21586,Gabriel,Porter,GabrielPorter24@gmail.com,clogs,leather,brown
10,91629,Jessica,Hale,JessicaHale25@gmail.com,clogs,leather,red
12,45832,Susan,Dennis,SusanDennis58@gmail.com,ballet flats,fabric,white
14,73431,Rebecca,Charles,Rebecca.Charles@gmail.com,boots,faux-leather,white


In [59]:
clinic_df = pd.DataFrame([
  ['January', 100, 100, 23, 100],
  ['February', 51, 45, 145, 45],
  ['March', 81, 96, 65, 96],
  ['April', 80, 80, 54, 180],
  ['May', 51, 54, 54, 154],
  ['June', 112, 109, 79, 129]],
  columns=['month', 'clinic_east',
           'clinic_north', 'clinic_south',
           'clinic_west'])

In [60]:
clinic_df.head()

Unnamed: 0,month,clinic_east,clinic_north,clinic_south,clinic_west
0,January,100,100,23,100
1,February,51,45,145,45
2,March,81,96,65,96
3,April,80,80,54,180
4,May,51,54,54,154


In [62]:
# If you wanted to select the row including all of the data for the month of May, which of the following lines of code would you use?

In [64]:
clinic_df.loc[4]

month           May
clinic_east      51
clinic_north     54
clinic_south     54
clinic_west     154
Name: 4, dtype: object

In [65]:
clinic_df[clinic_df.month == 'May']

Unnamed: 0,month,clinic_east,clinic_north,clinic_south,clinic_west
4,May,51,54,54,154


In [66]:
customers = pd.DataFrame([
  ['Jesse Sternberg', '193 6th Avenue', 31],
  ['Amy Lauder', '546 Marblehead Way', 43],
  ['Gerri Sanderson', '65 New York Street', 35],
  ['Austin Barnes', '2888 North Ogden Avenue', 28]],
  columns = ['name', 'address', 'age'])

In [67]:
customers

Unnamed: 0,name,address,age
0,Jesse Sternberg,193 6th Avenue,31
1,Amy Lauder,546 Marblehead Way,43
2,Gerri Sanderson,65 New York Street,35
3,Austin Barnes,2888 North Ogden Avenue,28


In [69]:
customers.age

0    31
1    43
2    35
3    28
Name: age, dtype: int64