## Pandas Dataframe
- **Table of columns and rows in pandas which can be restructured and filtered**
- **Formal def: A group of pandas series objects that share the same index**

In [20]:
import numpy as np
import pandas as pd
import os

- **Generating a dataframe using pandas**

In [21]:
np.random.seed(101)
mydata = np.random.randint(0,101,(4,3))
mydata

array([[95, 11, 81],
       [70, 63, 87],
       [75,  9, 77],
       [40,  4, 63]])

In [22]:
#naming rows (index for the 3 pandas series objects)
myindex = ['DL','PN','RJ','TN']  #states

#naming columns
mycols = ['Jan','Feb','Mar']

In [23]:
myindex

['DL', 'PN', 'RJ', 'TN']

In [24]:
mycols

['Jan', 'Feb', 'Mar']

In [25]:
#generating a pandas dataframe

df = pd.DataFrame(data = mydata,index = myindex,columns = mycols)
df

Unnamed: 0,Jan,Feb,Mar
DL,95,11,81
PN,70,63,87
RJ,75,9,77
TN,40,4,63


In [26]:
#extracting imp info about the dataframe using method info()

df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4 entries, DL to TN
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   Jan     4 non-null      int32
 1   Feb     4 non-null      int32
 2   Mar     4 non-null      int32
dtypes: int32(3)
memory usage: 80.0+ bytes


- **Reading from a CSV file to generate a pandas dataframe**

In [27]:
#Where is this python code located?

#find by using getcwd (get current working directory) command in os module
os.getcwd()

'C:\\Users\\Ritika Gupta\\Desktop\\Ritika\\6. Courses and Certifications\\Python udemy BOOTCAMP\\Self\\03-pandas'

In [20]:
#returns list of files in the current directory
ls

 Volume in drive C is OS
 Volume Serial Number is 5A30-25F0

 Directory of C:\Users\Ritika Gupta\Desktop\Python udemy BOOTCAMP\Self

09-06-2023  02:37    <DIR>          .
06-06-2023  01:30    <DIR>          ..
07-06-2023  17:58    <DIR>          .ipynb_checkpoints
06-06-2023  01:31            10,276 01-Python Crash Course Exercises.ipynb
07-06-2023  00:51            21,922 02-numpy-1.ipynb
07-06-2023  02:54            10,057 02-numpy-2.ipynb
07-06-2023  02:54            13,041 02-numpy-3.ipynb
07-06-2023  03:13            23,270 02-NumPy-Exercises.ipynb
07-06-2023  17:01             7,671 03-pandas-1 Series.ipynb
07-06-2023  17:57            11,409 03-pandas-2 Series operations.ipynb
09-06-2023  02:37             6,214 03-pandas-3 Dataframe creation.ipynb
               8 File(s)        103,860 bytes
               3 Dir(s)  330,018,885,632 bytes free


In [28]:
#reading into the csv file

df = pd.read_csv('tips.csv')

In [29]:
df

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608
2,21.01,3.50,Male,No,Sun,Dinner,3,7.00,Travis Walters,6011812112971322,Sun4458
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251
...,...,...,...,...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3,9.68,Michael Avila,5296068606052842,Sat2657
240,27.18,2.00,Female,Yes,Sat,Dinner,2,13.59,Monica Sanders,3506806155565404,Sat1766
241,22.67,2.00,Male,Yes,Sat,Dinner,2,11.34,Keith Wong,6011891618747196,Sat3880
242,17.82,1.75,Male,No,Sat,Dinner,2,8.91,Dennis Dixon,4375220550950,Sat17


- **Basic properties of reading into and handling a csv file using pandas**

In [30]:
df.columns
#list of column names as a string

Index(['total_bill', 'tip', 'sex', 'smoker', 'day', 'time', 'size',
       'price_per_person', 'Payer Name', 'CC Number', 'Payment ID'],
      dtype='object')

In [32]:
df.index

RangeIndex(start=0, stop=244, step=1)

In [33]:
#printing first few rows to analyse the general structure of the dataframe

df.head()
#by default, head() returns first 5 rows

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608
2,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251


In [34]:
#printing first ten rows using method head()
df.head(10)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608
2,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251
5,25.29,4.71,Male,No,Sun,Dinner,4,6.32,Erik Smith,213140353657882,Sun9679
6,8.77,2.0,Male,No,Sun,Dinner,2,4.38,Kristopher Johnson,2223727524230344,Sun5985
7,26.88,3.12,Male,No,Sun,Dinner,4,6.72,Robert Buck,3514785077705092,Sun8157
8,15.04,1.96,Male,No,Sun,Dinner,2,7.52,Joseph Mcdonald,3522866365840377,Sun6820
9,14.78,3.23,Male,No,Sun,Dinner,2,7.39,Jerome Abbott,3532124519049786,Sun3775


In [35]:
#printing the last five rows

df.tail()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
239,29.03,5.92,Male,No,Sat,Dinner,3,9.68,Michael Avila,5296068606052842,Sat2657
240,27.18,2.0,Female,Yes,Sat,Dinner,2,13.59,Monica Sanders,3506806155565404,Sat1766
241,22.67,2.0,Male,Yes,Sat,Dinner,2,11.34,Keith Wong,6011891618747196,Sat3880
242,17.82,1.75,Male,No,Sat,Dinner,2,8.91,Dennis Dixon,4375220550950,Sat17
243,18.78,3.0,Female,No,Thur,Dinner,2,9.39,Michelle Hardin,3511451626698139,Thur672


In [36]:
#printing last 10 rows
df.tail(10)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
234,15.53,3.0,Male,Yes,Sat,Dinner,2,7.76,Tracy Douglas,4097938155941930,Sat7220
235,10.07,1.25,Male,No,Sat,Dinner,2,5.04,Sean Gonzalez,3534021246117605,Sat4615
236,12.6,1.0,Male,Yes,Sat,Dinner,2,6.3,Matthew Myers,3543676378973965,Sat5032
237,32.83,1.17,Male,Yes,Sat,Dinner,2,16.42,Thomas Brown,4284722681265508,Sat2929
238,35.83,4.67,Female,No,Sat,Dinner,3,11.94,Kimberly Crane,676184013727,Sat9777
239,29.03,5.92,Male,No,Sat,Dinner,3,9.68,Michael Avila,5296068606052842,Sat2657
240,27.18,2.0,Female,Yes,Sat,Dinner,2,13.59,Monica Sanders,3506806155565404,Sat1766
241,22.67,2.0,Male,Yes,Sat,Dinner,2,11.34,Keith Wong,6011891618747196,Sat3880
242,17.82,1.75,Male,No,Sat,Dinner,2,8.91,Dennis Dixon,4375220550950,Sat17
243,18.78,3.0,Female,No,Thur,Dinner,2,9.39,Michelle Hardin,3511451626698139,Thur672


In [37]:
#printing basic info of the dataframe

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   total_bill        244 non-null    float64
 1   tip               244 non-null    float64
 2   sex               244 non-null    object 
 3   smoker            244 non-null    object 
 4   day               244 non-null    object 
 5   time              244 non-null    object 
 6   size              244 non-null    int64  
 7   price_per_person  244 non-null    float64
 8   Payer Name        244 non-null    object 
 9   CC Number         244 non-null    int64  
 10  Payment ID        244 non-null    object 
dtypes: float64(3), int64(2), object(6)
memory usage: 21.1+ KB


In [38]:
#printing basic statistical information for the numeric value columns
df.describe()

Unnamed: 0,total_bill,tip,size,price_per_person,CC Number
count,244.0,244.0,244.0,244.0,244.0
mean,19.785943,2.998279,2.569672,7.888197,2563496000000000.0
std,8.902412,1.383638,0.9511,2.914234,2369340000000000.0
min,3.07,1.0,1.0,2.88,60406790000.0
25%,13.3475,2.0,2.0,5.8,30407310000000.0
50%,17.795,2.9,2.0,7.255,3525318000000000.0
75%,24.1275,3.5625,3.0,9.39,4553675000000000.0
max,50.81,10.0,6.0,20.27,6596454000000000.0


In [41]:
df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
total_bill,244.0,19.78594,8.902412,3.07,13.3475,17.795,24.1275,50.81
tip,244.0,2.998279,1.383638,1.0,2.0,2.9,3.5625,10.0
size,244.0,2.569672,0.9510998,1.0,2.0,2.0,3.0,6.0
price_per_person,244.0,7.888197,2.914234,2.88,5.8,7.255,9.39,20.27
CC Number,244.0,2563496000000000.0,2369340000000000.0,60406790000.0,30407310000000.0,3525318000000000.0,4553675000000000.0,6596454000000000.0


- **Retrieving info from dataframe by Working with columns**

In [42]:
df.head(2)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608


In [43]:
df['total_bill']

0      16.99
1      10.34
2      21.01
3      23.68
4      24.59
       ...  
239    29.03
240    27.18
241    22.67
242    17.82
243    18.78
Name: total_bill, Length: 244, dtype: float64

In [44]:
df['price_per_person']

0       8.49
1       3.45
2       7.00
3      11.84
4       6.15
       ...  
239     9.68
240    13.59
241    11.34
242     8.91
243     9.39
Name: price_per_person, Length: 244, dtype: float64

In [45]:
type(df['total_bill'])

pandas.core.series.Series

In [46]:
df['total_bill'].sum()

4827.77

In [50]:
#printing multiple columns

df[['total_bill','tip','price_per_person']]

Unnamed: 0,total_bill,tip,price_per_person
0,16.99,1.01,8.49
1,10.34,1.66,3.45
2,21.01,3.50,7.00
3,23.68,3.31,11.84
4,24.59,3.61,6.15
...,...,...,...
239,29.03,5.92,9.68
240,27.18,2.00,13.59
241,22.67,2.00,11.34
242,17.82,1.75,8.91


In [51]:
#first 4 rows of the above data subset
df[['total_bill','tip','price_per_person']].head(4)

Unnamed: 0,total_bill,tip,price_per_person
0,16.99,1.01,8.49
1,10.34,1.66,3.45
2,21.01,3.5,7.0
3,23.68,3.31,11.84


In [84]:
#creating new columns

#creating a new column which tell the tip percentage wrt the total bill of a person
df['tip_percentage'] = 100* df['tip']/df['total_bill']
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,tip_percentage
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,3560325168603410,Sun2959,5.944673
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,4478071379779230,Sun4608,16.054159
2,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458,16.658734
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,4676137647685994,Sun5260,13.978041
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,4832732618637221,Sun2251,14.680765


In [85]:
#rounding off floating point column data

df['price_per_person'] = np.round(df['price_per_person'],decimals=0)
df['tip_percentage'] = np.round(df['tip_percentage'],decimals=2)

In [64]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,CC Number,Payment ID,tip_percentage
0,16.99,1.01,Female,No,Sun,Dinner,2,8.0,Christy Cunningham,3560325168603410,Sun2959,5.94
1,10.34,1.66,Male,No,Sun,Dinner,3,3.0,Douglas Tucker,4478071379779230,Sun4608,16.05
2,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,6011812112971322,Sun4458,16.66
3,23.68,3.31,Male,No,Sun,Dinner,2,12.0,Nathaniel Harris,4676137647685994,Sun5260,13.98
4,24.59,3.61,Female,No,Sun,Dinner,4,6.0,Tonya Carter,4832732618637221,Sun2251,14.68


In [86]:
#removing rows or columns using method drop()

#axis = 0  => drop row
#axis = 1  => drop column

#by default, axis = 0 , i.e., row is removed by default

df=df.drop('CC Number', axis=1)

#removing last row
df=df.drop(243)

In [79]:
df

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,Payment ID
0,16.99,1.01,Female,No,Sun,Dinner,2,8.49,Christy Cunningham,Sun2959
1,10.34,1.66,Male,No,Sun,Dinner,3,3.45,Douglas Tucker,Sun4608
2,21.01,3.50,Male,No,Sun,Dinner,3,7.00,Travis Walters,Sun4458
3,23.68,3.31,Male,No,Sun,Dinner,2,11.84,Nathaniel Harris,Sun5260
4,24.59,3.61,Female,No,Sun,Dinner,4,6.15,Tonya Carter,Sun2251
...,...,...,...,...,...,...,...,...,...,...
238,35.83,4.67,Female,No,Sat,Dinner,3,11.94,Kimberly Crane,Sat9777
239,29.03,5.92,Male,No,Sat,Dinner,3,9.68,Michael Avila,Sat2657
240,27.18,2.00,Female,Yes,Sat,Dinner,2,13.59,Monica Sanders,Sat1766
241,22.67,2.00,Male,Yes,Sat,Dinner,2,11.34,Keith Wong,Sat3880


- **Retrieving info from dataframe by Working with rows / INDEX**

In [87]:
df.index

RangeIndex(start=0, stop=243, step=1)

In [89]:
df.head(3)

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,Payment ID,tip_percentage
0,16.99,1.01,Female,No,Sun,Dinner,2,8.0,Christy Cunningham,Sun2959,5.94
1,10.34,1.66,Male,No,Sun,Dinner,3,3.0,Douglas Tucker,Sun4608,16.05
2,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,Sun4458,16.66


In [93]:
#generally index should be a uniquely labeled list

#setting payment id columns as the labeled index
df = df.set_index('Payment ID')

In [95]:
df.head(2)

Unnamed: 0_level_0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,tip_percentage
Payment ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Sun2959,16.99,1.01,Female,No,Sun,Dinner,2,8.0,Christy Cunningham,5.94
Sun4608,10.34,1.66,Male,No,Sun,Dinner,3,3.0,Douglas Tucker,16.05


In [102]:
#resetting index (undoing the action above)

df = df.reset_index()
df.head(2)

Unnamed: 0,Payment ID,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,tip_percentage
0,Sun2959,16.99,1.01,Female,No,Sun,Dinner,2,8.0,Christy Cunningham,5.94
1,Sun4608,10.34,1.66,Male,No,Sun,Dinner,3,3.0,Douglas Tucker,16.05


In [103]:
#printing rows using the labeled index payment id
df = df.set_index('Payment ID')
df.head()

Unnamed: 0_level_0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,tip_percentage
Payment ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Sun2959,16.99,1.01,Female,No,Sun,Dinner,2,8.0,Christy Cunningham,5.94
Sun4608,10.34,1.66,Male,No,Sun,Dinner,3,3.0,Douglas Tucker,16.05
Sun4458,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,16.66
Sun5260,23.68,3.31,Male,No,Sun,Dinner,2,12.0,Nathaniel Harris,13.98
Sun2251,24.59,3.61,Female,No,Sun,Dinner,4,6.0,Tonya Carter,14.68


In [104]:
#printing row using integer indexing i.e., 0,1,2,...
df.iloc[0]  #first row

#iloc = integer location

total_bill                       16.99
tip                               1.01
sex                             Female
smoker                              No
day                                Sun
time                            Dinner
size                                 2
price_per_person                   8.0
Payer Name          Christy Cunningham
tip_percentage                    5.94
Name: Sun2959, dtype: object

In [106]:
#printing row using labeled indexing

df.loc['Sun2959']   #first row

total_bill                       16.99
tip                               1.01
sex                             Female
smoker                              No
day                                Sun
time                            Dinner
size                                 2
price_per_person                   8.0
Payer Name          Christy Cunningham
tip_percentage                    5.94
Name: Sun2959, dtype: object

In [115]:
#printing multiple rows using iloc

df.iloc[0:4]    #slicing

Unnamed: 0_level_0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,tip_percentage
Payment ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Sun2959,16.99,1.01,Female,No,Sun,Dinner,2,8.0,Christy Cunningham,5.94
Sun4608,10.34,1.66,Male,No,Sun,Dinner,3,3.0,Douglas Tucker,16.05
Sun4458,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,16.66
Sun5260,23.68,3.31,Male,No,Sun,Dinner,2,12.0,Nathaniel Harris,13.98


In [116]:
df[0:4]  #same as above

Unnamed: 0_level_0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,tip_percentage
Payment ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Sun2959,16.99,1.01,Female,No,Sun,Dinner,2,8.0,Christy Cunningham,5.94
Sun4608,10.34,1.66,Male,No,Sun,Dinner,3,3.0,Douglas Tucker,16.05
Sun4458,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,16.66
Sun5260,23.68,3.31,Male,No,Sun,Dinner,2,12.0,Nathaniel Harris,13.98


In [112]:
df.iloc[[0,2,5,8]]

Unnamed: 0_level_0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,tip_percentage
Payment ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Sun2959,16.99,1.01,Female,No,Sun,Dinner,2,8.0,Christy Cunningham,5.94
Sun4458,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,16.66
Sun9679,25.29,4.71,Male,No,Sun,Dinner,4,6.0,Erik Smith,18.62
Sun6820,15.04,1.96,Male,No,Sun,Dinner,2,8.0,Joseph Mcdonald,13.03


In [110]:
df.iloc[0:5:2]

Unnamed: 0_level_0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,tip_percentage
Payment ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Sun2959,16.99,1.01,Female,No,Sun,Dinner,2,8.0,Christy Cunningham,5.94
Sun4458,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,16.66
Sun2251,24.59,3.61,Female,No,Sun,Dinner,4,6.0,Tonya Carter,14.68


In [111]:
df[0:5:2]

Unnamed: 0_level_0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,tip_percentage
Payment ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Sun2959,16.99,1.01,Female,No,Sun,Dinner,2,8.0,Christy Cunningham,5.94
Sun4458,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,16.66
Sun2251,24.59,3.61,Female,No,Sun,Dinner,4,6.0,Tonya Carter,14.68


In [118]:
df.head()

Unnamed: 0_level_0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,tip_percentage
Payment ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Sun2959,16.99,1.01,Female,No,Sun,Dinner,2,8.0,Christy Cunningham,5.94
Sun4608,10.34,1.66,Male,No,Sun,Dinner,3,3.0,Douglas Tucker,16.05
Sun4458,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,16.66
Sun5260,23.68,3.31,Male,No,Sun,Dinner,2,12.0,Nathaniel Harris,13.98
Sun2251,24.59,3.61,Female,No,Sun,Dinner,4,6.0,Tonya Carter,14.68


In [119]:
#printing multiple rows using loc

df.loc[['Sun2959','Sun4458']]

Unnamed: 0_level_0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,tip_percentage
Payment ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Sun2959,16.99,1.01,Female,No,Sun,Dinner,2,8.0,Christy Cunningham,5.94
Sun4458,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,16.66


In [125]:
#removing rows using drop()

df = df.drop('Sun2959')
df.head()

Unnamed: 0_level_0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,tip_percentage
Payment ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Sun4608,10.34,1.66,Male,No,Sun,Dinner,3,3.0,Douglas Tucker,16.05
Sun4458,21.01,3.5,Male,No,Sun,Dinner,3,7.0,Travis Walters,16.66
Sun5260,23.68,3.31,Male,No,Sun,Dinner,2,12.0,Nathaniel Harris,13.98
Sun2251,24.59,3.61,Female,No,Sun,Dinner,4,6.0,Tonya Carter,14.68
Sun9679,25.29,4.71,Male,No,Sun,Dinner,4,6.0,Erik Smith,18.62


In [135]:
#use slicing to remove/select rows using integer indexing

#example, if i needed to remove the 1st row

df = df.iloc[1:]
df

Unnamed: 0_level_0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,tip_percentage
Payment ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Sun2251,24.59,3.61,Female,No,Sun,Dinner,4,6.0,Tonya Carter,14.68
Sun9679,25.29,4.71,Male,No,Sun,Dinner,4,6.0,Erik Smith,18.62
Sun5985,8.77,2.00,Male,No,Sun,Dinner,2,4.0,Kristopher Johnson,22.81
Sun8157,26.88,3.12,Male,No,Sun,Dinner,4,7.0,Robert Buck,11.61
Sun6820,15.04,1.96,Male,No,Sun,Dinner,2,8.0,Joseph Mcdonald,13.03
...,...,...,...,...,...,...,...,...,...,...
Sat9777,35.83,4.67,Female,No,Sat,Dinner,3,12.0,Kimberly Crane,13.03
Sat2657,29.03,5.92,Male,No,Sat,Dinner,3,10.0,Michael Avila,20.39
Sat1766,27.18,2.00,Female,Yes,Sat,Dinner,2,14.0,Monica Sanders,7.36
Sat3880,22.67,2.00,Male,Yes,Sat,Dinner,2,11.0,Keith Wong,8.82


In [151]:
#inserting new row

first_row = df.iloc[0]
#appending first row
df = df.append(first_row)
df

  df = df.append(first_row)


Unnamed: 0_level_0,total_bill,tip,sex,smoker,day,time,size,price_per_person,Payer Name,tip_percentage
Payment ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Sun2251,24.59,3.61,Female,No,Sun,Dinner,4,6.0,Tonya Carter,14.68
Sun9679,25.29,4.71,Male,No,Sun,Dinner,4,6.0,Erik Smith,18.62
Sun5985,8.77,2.00,Male,No,Sun,Dinner,2,4.0,Kristopher Johnson,22.81
Sun8157,26.88,3.12,Male,No,Sun,Dinner,4,7.0,Robert Buck,11.61
Sun6820,15.04,1.96,Male,No,Sun,Dinner,2,8.0,Joseph Mcdonald,13.03
...,...,...,...,...,...,...,...,...,...,...
Sat2657,29.03,5.92,Male,No,Sat,Dinner,3,10.0,Michael Avila,20.39
Sat1766,27.18,2.00,Female,Yes,Sat,Dinner,2,14.0,Monica Sanders,7.36
Sat3880,22.67,2.00,Male,Yes,Sat,Dinner,2,11.0,Keith Wong,8.82
Sat17,17.82,1.75,Male,No,Sat,Dinner,2,9.0,Dennis Dixon,9.82
