In [1]:
import pandas as pd

In [2]:
melbourne_file_path = 'Introduction_Machine_Learning_Data.csv'
melbourne_data = pd.read_csv(melbourne_file_path) 
melbourne_data.columns

Index(['Suburb', 'Address', 'Rooms', 'Type', 'Price', 'Method', 'SellerG',
       'Date', 'Distance', 'Postcode', 'Bedroom2', 'Bathroom', 'Car',
       'Landsize', 'BuildingArea', 'YearBuilt', 'CouncilArea', 'Lattitude',
       'Longtitude', 'Regionname', 'Propertycount'],
      dtype='object')

In [3]:
reviews = melbourne_data

In [4]:
#Pandas indexing works in one of two paradigms. 
#The first is index-based selection: selecting data based on its numerical position in the data. 
#iloc follows this paradigm.

In [5]:
#To select the first row of data in a DataFrame, we may use the following:
reviews.iloc[0]

Suburb                      Abbotsford
Address                   85 Turner St
Rooms                                2
Type                                 h
Price                         1.48e+06
Method                               S
SellerG                         Biggin
Date                         3/12/2016
Distance                           2.5
Postcode                          3067
Bedroom2                             2
Bathroom                             1
Car                                  1
Landsize                           202
BuildingArea                       NaN
YearBuilt                          NaN
CouncilArea                      Yarra
Lattitude                     -37.7996
Longtitude                     144.998
Regionname       Northern Metropolitan
Propertycount                     4019
Name: 0, dtype: object

In [6]:
#Both loc and iloc are row-first, column-second. 
#This is the opposite of what we do in native Python, which is column-first, row-second.
#To get a column with iloc, we can do the following:
reviews.iloc[:, 0]

0           Abbotsford
1           Abbotsford
2           Abbotsford
3           Abbotsford
4           Abbotsford
             ...      
13575    Wheelers Hill
13576     Williamstown
13577     Williamstown
13578     Williamstown
13579       Yarraville
Name: Suburb, Length: 13580, dtype: object

In [7]:
reviews.iloc[:3, 0]

0    Abbotsford
1    Abbotsford
2    Abbotsford
Name: Suburb, dtype: object

In [8]:
reviews.iloc[1:3, 0]

1    Abbotsford
2    Abbotsford
Name: Suburb, dtype: object

In [9]:
reviews.iloc[-5:]

Unnamed: 0,Suburb,Address,Rooms,Type,Price,Method,SellerG,Date,Distance,Postcode,...,Bathroom,Car,Landsize,BuildingArea,YearBuilt,CouncilArea,Lattitude,Longtitude,Regionname,Propertycount
13575,Wheelers Hill,12 Strada Cr,4,h,1245000.0,S,Barry,26/08/2017,16.7,3150.0,...,2.0,2.0,652.0,,1981.0,,-37.90562,145.16761,South-Eastern Metropolitan,7392.0
13576,Williamstown,77 Merrett Dr,3,h,1031000.0,SP,Williams,26/08/2017,6.8,3016.0,...,2.0,2.0,333.0,133.0,1995.0,,-37.85927,144.87904,Western Metropolitan,6380.0
13577,Williamstown,83 Power St,3,h,1170000.0,S,Raine,26/08/2017,6.8,3016.0,...,2.0,4.0,436.0,,1997.0,,-37.85274,144.88738,Western Metropolitan,6380.0
13578,Williamstown,96 Verdon St,4,h,2500000.0,PI,Sweeney,26/08/2017,6.8,3016.0,...,1.0,5.0,866.0,157.0,1920.0,,-37.85908,144.89299,Western Metropolitan,6380.0
13579,Yarraville,6 Agnes St,4,h,1285000.0,SP,Village,26/08/2017,6.3,3013.0,...,1.0,1.0,362.0,112.0,1920.0,,-37.81188,144.88449,Western Metropolitan,6543.0


In [10]:
#Label-based selection
#In this paradigm, it's the data index value, not its position, which matters.

reviews.loc[0, 'Suburb']

'Abbotsford'

In [11]:
cols = ['Price','Method','SellerG']
indices = [0, 1, 10, 100]
df = reviews.loc[indices, cols]
df

Unnamed: 0,Price,Method,SellerG
0,1480000.0,S,Biggin
1,1035000.0,S,Biggin
10,700000.0,VB,Jellis
100,2000000.0,VB,RT


In [12]:
cols = ['Method','SellerG']
df = reviews.loc[:99, cols]
df

Unnamed: 0,Method,SellerG
0,S,Biggin
1,S,Biggin
2,SP,Biggin
3,PI,Biggin
4,VB,Nelson
...,...,...
95,S,Greg
96,S,Cayzer
97,S,Greg
98,S,Greg


In [13]:
reviews.loc[:, ['Price','Method','SellerG']]    

Unnamed: 0,Price,Method,SellerG
0,1480000.0,S,Biggin
1,1035000.0,S,Biggin
2,1465000.0,SP,Biggin
3,850000.0,PI,Biggin
4,1600000.0,VB,Nelson
...,...,...,...
13575,1245000.0,S,Barry
13576,1031000.0,SP,Williams
13577,1170000.0,S,Raine
13578,2500000.0,PI,Sweeney


In [14]:
#Remember that loc can index any stdlib type: strings, for example. 
#If we have a DataFrame with index values Apples, ..., Potatoes, ..., 
#and we want to select "all the alphabetical fruit choices between Apples and Potatoes", 
#then it's a lot more convenient to index df.loc['Apples':'Potatoes']

In [15]:
#This is particularly confusing when the DataFrame index is a simple numerical list, e.g. 0,...,1000. 
#In this case df.iloc[0:1000] will return 1000 entries, while df.loc[0:1000] return 1001 of them! 
#To get 1000 elements using loc, you will need to go one lower and ask for df.loc[0:999]

In [16]:
reviews.Suburb == 'Abbotsford'

0         True
1         True
2         True
3         True
4         True
         ...  
13575    False
13576    False
13577    False
13578    False
13579    False
Name: Suburb, Length: 13580, dtype: bool

In [17]:
reviews.loc[reviews.Suburb == 'Abbotsford']

Unnamed: 0,Suburb,Address,Rooms,Type,Price,Method,SellerG,Date,Distance,Postcode,...,Bathroom,Car,Landsize,BuildingArea,YearBuilt,CouncilArea,Lattitude,Longtitude,Regionname,Propertycount
0,Abbotsford,85 Turner St,2,h,1480000.0,S,Biggin,3/12/2016,2.5,3067.0,...,1.0,1.0,202.0,,,Yarra,-37.7996,144.9984,Northern Metropolitan,4019.0
1,Abbotsford,25 Bloomburg St,2,h,1035000.0,S,Biggin,4/02/2016,2.5,3067.0,...,1.0,0.0,156.0,79.0,1900.0,Yarra,-37.8079,144.9934,Northern Metropolitan,4019.0
2,Abbotsford,5 Charles St,3,h,1465000.0,SP,Biggin,4/03/2017,2.5,3067.0,...,2.0,0.0,134.0,150.0,1900.0,Yarra,-37.8093,144.9944,Northern Metropolitan,4019.0
3,Abbotsford,40 Federation La,3,h,850000.0,PI,Biggin,4/03/2017,2.5,3067.0,...,2.0,1.0,94.0,,,Yarra,-37.7969,144.9969,Northern Metropolitan,4019.0
4,Abbotsford,55a Park St,4,h,1600000.0,VB,Nelson,4/06/2016,2.5,3067.0,...,1.0,2.0,120.0,142.0,2014.0,Yarra,-37.8072,144.9941,Northern Metropolitan,4019.0
5,Abbotsford,129 Charles St,2,h,941000.0,S,Jellis,7/05/2016,2.5,3067.0,...,1.0,0.0,181.0,,,Yarra,-37.8041,144.9953,Northern Metropolitan,4019.0
6,Abbotsford,124 Yarra St,3,h,1876000.0,S,Nelson,7/05/2016,2.5,3067.0,...,2.0,0.0,245.0,210.0,1910.0,Yarra,-37.8024,144.9993,Northern Metropolitan,4019.0
7,Abbotsford,98 Charles St,2,h,1636000.0,S,Nelson,8/10/2016,2.5,3067.0,...,1.0,2.0,256.0,107.0,1890.0,Yarra,-37.806,144.9954,Northern Metropolitan,4019.0
8,Abbotsford,6/241 Nicholson St,1,u,300000.0,S,Biggin,8/10/2016,2.5,3067.0,...,1.0,1.0,0.0,,,Yarra,-37.8008,144.9973,Northern Metropolitan,4019.0
9,Abbotsford,10 Valiant St,2,h,1097000.0,S,Biggin,8/10/2016,2.5,3067.0,...,1.0,2.0,220.0,75.0,1900.0,Yarra,-37.801,144.9989,Northern Metropolitan,4019.0


In [18]:
reviews.loc[(reviews.Suburb == 'Abbotsford') & (reviews.Rooms>=3)]

Unnamed: 0,Suburb,Address,Rooms,Type,Price,Method,SellerG,Date,Distance,Postcode,...,Bathroom,Car,Landsize,BuildingArea,YearBuilt,CouncilArea,Lattitude,Longtitude,Regionname,Propertycount
2,Abbotsford,5 Charles St,3,h,1465000.0,SP,Biggin,4/03/2017,2.5,3067.0,...,2.0,0.0,134.0,150.0,1900.0,Yarra,-37.8093,144.9944,Northern Metropolitan,4019.0
3,Abbotsford,40 Federation La,3,h,850000.0,PI,Biggin,4/03/2017,2.5,3067.0,...,2.0,1.0,94.0,,,Yarra,-37.7969,144.9969,Northern Metropolitan,4019.0
4,Abbotsford,55a Park St,4,h,1600000.0,VB,Nelson,4/06/2016,2.5,3067.0,...,1.0,2.0,120.0,142.0,2014.0,Yarra,-37.8072,144.9941,Northern Metropolitan,4019.0
6,Abbotsford,124 Yarra St,3,h,1876000.0,S,Nelson,7/05/2016,2.5,3067.0,...,2.0,0.0,245.0,210.0,1910.0,Yarra,-37.8024,144.9993,Northern Metropolitan,4019.0
11,Abbotsford,40 Nicholson St,3,h,1350000.0,VB,Nelson,12/11/2016,2.5,3067.0,...,2.0,2.0,214.0,190.0,2005.0,Yarra,-37.8085,144.9964,Northern Metropolitan,4019.0
16,Abbotsford,42 Henry St,3,h,1200000.0,S,Jellis,16/07/2016,2.5,3067.0,...,2.0,1.0,113.0,110.0,1880.0,Yarra,-37.8056,144.993,Northern Metropolitan,4019.0
17,Abbotsford,78 Yarra St,3,h,1176500.0,S,LITTLE,16/07/2016,2.5,3067.0,...,1.0,1.0,138.0,105.0,1890.0,Yarra,-37.8021,144.9965,Northern Metropolitan,4019.0
18,Abbotsford,196 Nicholson St,3,h,955000.0,S,Collins,17/09/2016,2.5,3067.0,...,1.0,0.0,183.0,,,Yarra,-37.8022,144.9975,Northern Metropolitan,4019.0
20,Abbotsford,3/72 Charles St,4,h,1330000.0,PI,Kay,18/03/2017,2.5,3067.0,...,2.0,2.0,780.0,135.0,1900.0,Yarra,-37.8073,144.9952,Northern Metropolitan,4019.0
21,Abbotsford,13/11 Nicholson St,3,t,900000.0,S,Beller,18/03/2017,2.5,3067.0,...,2.0,2.0,0.0,,2010.0,Yarra,-37.8093,144.9959,Northern Metropolitan,4019.0


In [19]:
reviews.loc[(reviews.Suburb == 'Abbotsford') | (reviews.Rooms>=3)]

Unnamed: 0,Suburb,Address,Rooms,Type,Price,Method,SellerG,Date,Distance,Postcode,...,Bathroom,Car,Landsize,BuildingArea,YearBuilt,CouncilArea,Lattitude,Longtitude,Regionname,Propertycount
0,Abbotsford,85 Turner St,2,h,1480000.0,S,Biggin,3/12/2016,2.5,3067.0,...,1.0,1.0,202.0,,,Yarra,-37.79960,144.99840,Northern Metropolitan,4019.0
1,Abbotsford,25 Bloomburg St,2,h,1035000.0,S,Biggin,4/02/2016,2.5,3067.0,...,1.0,0.0,156.0,79.0,1900.0,Yarra,-37.80790,144.99340,Northern Metropolitan,4019.0
2,Abbotsford,5 Charles St,3,h,1465000.0,SP,Biggin,4/03/2017,2.5,3067.0,...,2.0,0.0,134.0,150.0,1900.0,Yarra,-37.80930,144.99440,Northern Metropolitan,4019.0
3,Abbotsford,40 Federation La,3,h,850000.0,PI,Biggin,4/03/2017,2.5,3067.0,...,2.0,1.0,94.0,,,Yarra,-37.79690,144.99690,Northern Metropolitan,4019.0
4,Abbotsford,55a Park St,4,h,1600000.0,VB,Nelson,4/06/2016,2.5,3067.0,...,1.0,2.0,120.0,142.0,2014.0,Yarra,-37.80720,144.99410,Northern Metropolitan,4019.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13575,Wheelers Hill,12 Strada Cr,4,h,1245000.0,S,Barry,26/08/2017,16.7,3150.0,...,2.0,2.0,652.0,,1981.0,,-37.90562,145.16761,South-Eastern Metropolitan,7392.0
13576,Williamstown,77 Merrett Dr,3,h,1031000.0,SP,Williams,26/08/2017,6.8,3016.0,...,2.0,2.0,333.0,133.0,1995.0,,-37.85927,144.87904,Western Metropolitan,6380.0
13577,Williamstown,83 Power St,3,h,1170000.0,S,Raine,26/08/2017,6.8,3016.0,...,2.0,4.0,436.0,,1997.0,,-37.85274,144.88738,Western Metropolitan,6380.0
13578,Williamstown,96 Verdon St,4,h,2500000.0,PI,Sweeney,26/08/2017,6.8,3016.0,...,1.0,5.0,866.0,157.0,1920.0,,-37.85908,144.89299,Western Metropolitan,6380.0


In [20]:
reviews.loc[reviews.Suburb.isin(['Abbotsford', 'Williamstown'])]

Unnamed: 0,Suburb,Address,Rooms,Type,Price,Method,SellerG,Date,Distance,Postcode,...,Bathroom,Car,Landsize,BuildingArea,YearBuilt,CouncilArea,Lattitude,Longtitude,Regionname,Propertycount
0,Abbotsford,85 Turner St,2,h,1480000.0,S,Biggin,3/12/2016,2.5,3067.0,...,1.0,1.0,202.0,,,Yarra,-37.79960,144.99840,Northern Metropolitan,4019.0
1,Abbotsford,25 Bloomburg St,2,h,1035000.0,S,Biggin,4/02/2016,2.5,3067.0,...,1.0,0.0,156.0,79.0,1900.0,Yarra,-37.80790,144.99340,Northern Metropolitan,4019.0
2,Abbotsford,5 Charles St,3,h,1465000.0,SP,Biggin,4/03/2017,2.5,3067.0,...,2.0,0.0,134.0,150.0,1900.0,Yarra,-37.80930,144.99440,Northern Metropolitan,4019.0
3,Abbotsford,40 Federation La,3,h,850000.0,PI,Biggin,4/03/2017,2.5,3067.0,...,2.0,1.0,94.0,,,Yarra,-37.79690,144.99690,Northern Metropolitan,4019.0
4,Abbotsford,55a Park St,4,h,1600000.0,VB,Nelson,4/06/2016,2.5,3067.0,...,1.0,2.0,120.0,142.0,2014.0,Yarra,-37.80720,144.99410,Northern Metropolitan,4019.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13084,Williamstown,54 Victoria St,5,h,2550000.0,PI,Williams,19/08/2017,6.8,3016.0,...,2.0,2.0,840.0,244.0,1910.0,,-37.86330,144.88871,Western Metropolitan,6380.0
13329,Williamstown,146 Ferguson St,3,h,832000.0,S,Williams,23/09/2017,6.8,3016.0,...,1.0,1.0,269.0,,,,-37.85808,144.89268,Western Metropolitan,6380.0
13576,Williamstown,77 Merrett Dr,3,h,1031000.0,SP,Williams,26/08/2017,6.8,3016.0,...,2.0,2.0,333.0,133.0,1995.0,,-37.85927,144.87904,Western Metropolitan,6380.0
13577,Williamstown,83 Power St,3,h,1170000.0,S,Raine,26/08/2017,6.8,3016.0,...,2.0,4.0,436.0,,1997.0,,-37.85274,144.88738,Western Metropolitan,6380.0


In [21]:
reviews.loc[reviews.BuildingArea.notnull()]

Unnamed: 0,Suburb,Address,Rooms,Type,Price,Method,SellerG,Date,Distance,Postcode,...,Bathroom,Car,Landsize,BuildingArea,YearBuilt,CouncilArea,Lattitude,Longtitude,Regionname,Propertycount
1,Abbotsford,25 Bloomburg St,2,h,1035000.0,S,Biggin,4/02/2016,2.5,3067.0,...,1.0,0.0,156.0,79.0,1900.0,Yarra,-37.80790,144.99340,Northern Metropolitan,4019.0
2,Abbotsford,5 Charles St,3,h,1465000.0,SP,Biggin,4/03/2017,2.5,3067.0,...,2.0,0.0,134.0,150.0,1900.0,Yarra,-37.80930,144.99440,Northern Metropolitan,4019.0
4,Abbotsford,55a Park St,4,h,1600000.0,VB,Nelson,4/06/2016,2.5,3067.0,...,1.0,2.0,120.0,142.0,2014.0,Yarra,-37.80720,144.99410,Northern Metropolitan,4019.0
6,Abbotsford,124 Yarra St,3,h,1876000.0,S,Nelson,7/05/2016,2.5,3067.0,...,2.0,0.0,245.0,210.0,1910.0,Yarra,-37.80240,144.99930,Northern Metropolitan,4019.0
7,Abbotsford,98 Charles St,2,h,1636000.0,S,Nelson,8/10/2016,2.5,3067.0,...,1.0,2.0,256.0,107.0,1890.0,Yarra,-37.80600,144.99540,Northern Metropolitan,4019.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13572,Watsonia,76 Kenmare St,2,h,650000.0,PI,Morrison,26/08/2017,14.5,3087.0,...,1.0,1.0,210.0,79.0,2006.0,,-37.70657,145.07878,Northern Metropolitan,2329.0
13573,Werribee,5 Nuragi Ct,4,h,635000.0,S,hockingstuart,26/08/2017,14.7,3030.0,...,2.0,1.0,662.0,172.0,1980.0,,-37.89327,144.64789,Western Metropolitan,16166.0
13576,Williamstown,77 Merrett Dr,3,h,1031000.0,SP,Williams,26/08/2017,6.8,3016.0,...,2.0,2.0,333.0,133.0,1995.0,,-37.85927,144.87904,Western Metropolitan,6380.0
13578,Williamstown,96 Verdon St,4,h,2500000.0,PI,Sweeney,26/08/2017,6.8,3016.0,...,1.0,5.0,866.0,157.0,1920.0,,-37.85908,144.89299,Western Metropolitan,6380.0


In [22]:
reviews.loc[reviews.BuildingArea.isnull()]

Unnamed: 0,Suburb,Address,Rooms,Type,Price,Method,SellerG,Date,Distance,Postcode,...,Bathroom,Car,Landsize,BuildingArea,YearBuilt,CouncilArea,Lattitude,Longtitude,Regionname,Propertycount
0,Abbotsford,85 Turner St,2,h,1480000.0,S,Biggin,3/12/2016,2.5,3067.0,...,1.0,1.0,202.0,,,Yarra,-37.79960,144.99840,Northern Metropolitan,4019.0
3,Abbotsford,40 Federation La,3,h,850000.0,PI,Biggin,4/03/2017,2.5,3067.0,...,2.0,1.0,94.0,,,Yarra,-37.79690,144.99690,Northern Metropolitan,4019.0
5,Abbotsford,129 Charles St,2,h,941000.0,S,Jellis,7/05/2016,2.5,3067.0,...,1.0,0.0,181.0,,,Yarra,-37.80410,144.99530,Northern Metropolitan,4019.0
8,Abbotsford,6/241 Nicholson St,1,u,300000.0,S,Biggin,8/10/2016,2.5,3067.0,...,1.0,1.0,0.0,,,Yarra,-37.80080,144.99730,Northern Metropolitan,4019.0
10,Abbotsford,411/8 Grosvenor St,2,u,700000.0,VB,Jellis,12/11/2016,2.5,3067.0,...,2.0,1.0,0.0,,,Yarra,-37.81100,145.00670,Northern Metropolitan,4019.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13569,Wantirna South,12 Armagh Cr,4,h,1323000.0,S,Barry,26/08/2017,14.7,3152.0,...,2.0,2.0,551.0,,,,-37.87608,145.22390,Eastern Metropolitan,7082.0
13570,Wantirna South,34 Fewster Dr,3,h,970000.0,S,Barry,26/08/2017,14.7,3152.0,...,2.0,2.0,674.0,,,,-37.88360,145.22805,Eastern Metropolitan,7082.0
13574,Westmeadows,9 Black St,3,h,582000.0,S,Red,26/08/2017,16.5,3049.0,...,2.0,2.0,256.0,,,,-37.67917,144.89390,Northern Metropolitan,2474.0
13575,Wheelers Hill,12 Strada Cr,4,h,1245000.0,S,Barry,26/08/2017,16.7,3150.0,...,2.0,2.0,652.0,,1981.0,,-37.90562,145.16761,South-Eastern Metropolitan,7392.0


In [23]:
reviews['critic'] = 'everyone'
reviews['critic']

0        everyone
1        everyone
2        everyone
3        everyone
4        everyone
           ...   
13575    everyone
13576    everyone
13577    everyone
13578    everyone
13579    everyone
Name: critic, Length: 13580, dtype: object

In [24]:
reviews

Unnamed: 0,Suburb,Address,Rooms,Type,Price,Method,SellerG,Date,Distance,Postcode,...,Car,Landsize,BuildingArea,YearBuilt,CouncilArea,Lattitude,Longtitude,Regionname,Propertycount,critic
0,Abbotsford,85 Turner St,2,h,1480000.0,S,Biggin,3/12/2016,2.5,3067.0,...,1.0,202.0,,,Yarra,-37.79960,144.99840,Northern Metropolitan,4019.0,everyone
1,Abbotsford,25 Bloomburg St,2,h,1035000.0,S,Biggin,4/02/2016,2.5,3067.0,...,0.0,156.0,79.0,1900.0,Yarra,-37.80790,144.99340,Northern Metropolitan,4019.0,everyone
2,Abbotsford,5 Charles St,3,h,1465000.0,SP,Biggin,4/03/2017,2.5,3067.0,...,0.0,134.0,150.0,1900.0,Yarra,-37.80930,144.99440,Northern Metropolitan,4019.0,everyone
3,Abbotsford,40 Federation La,3,h,850000.0,PI,Biggin,4/03/2017,2.5,3067.0,...,1.0,94.0,,,Yarra,-37.79690,144.99690,Northern Metropolitan,4019.0,everyone
4,Abbotsford,55a Park St,4,h,1600000.0,VB,Nelson,4/06/2016,2.5,3067.0,...,2.0,120.0,142.0,2014.0,Yarra,-37.80720,144.99410,Northern Metropolitan,4019.0,everyone
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13575,Wheelers Hill,12 Strada Cr,4,h,1245000.0,S,Barry,26/08/2017,16.7,3150.0,...,2.0,652.0,,1981.0,,-37.90562,145.16761,South-Eastern Metropolitan,7392.0,everyone
13576,Williamstown,77 Merrett Dr,3,h,1031000.0,SP,Williams,26/08/2017,6.8,3016.0,...,2.0,333.0,133.0,1995.0,,-37.85927,144.87904,Western Metropolitan,6380.0,everyone
13577,Williamstown,83 Power St,3,h,1170000.0,S,Raine,26/08/2017,6.8,3016.0,...,4.0,436.0,,1997.0,,-37.85274,144.88738,Western Metropolitan,6380.0,everyone
13578,Williamstown,96 Verdon St,4,h,2500000.0,PI,Sweeney,26/08/2017,6.8,3016.0,...,5.0,866.0,157.0,1920.0,,-37.85908,144.89299,Western Metropolitan,6380.0,everyone


In [25]:
indices = [1, 2, 3, 5, 8]
sample_reviews = reviews.loc[indices]
sample_reviews

Unnamed: 0,Suburb,Address,Rooms,Type,Price,Method,SellerG,Date,Distance,Postcode,...,Car,Landsize,BuildingArea,YearBuilt,CouncilArea,Lattitude,Longtitude,Regionname,Propertycount,critic
1,Abbotsford,25 Bloomburg St,2,h,1035000.0,S,Biggin,4/02/2016,2.5,3067.0,...,0.0,156.0,79.0,1900.0,Yarra,-37.8079,144.9934,Northern Metropolitan,4019.0,everyone
2,Abbotsford,5 Charles St,3,h,1465000.0,SP,Biggin,4/03/2017,2.5,3067.0,...,0.0,134.0,150.0,1900.0,Yarra,-37.8093,144.9944,Northern Metropolitan,4019.0,everyone
3,Abbotsford,40 Federation La,3,h,850000.0,PI,Biggin,4/03/2017,2.5,3067.0,...,1.0,94.0,,,Yarra,-37.7969,144.9969,Northern Metropolitan,4019.0,everyone
5,Abbotsford,129 Charles St,2,h,941000.0,S,Jellis,7/05/2016,2.5,3067.0,...,0.0,181.0,,,Yarra,-37.8041,144.9953,Northern Metropolitan,4019.0,everyone
8,Abbotsford,6/241 Nicholson St,1,u,300000.0,S,Biggin,8/10/2016,2.5,3067.0,...,1.0,0.0,,,Yarra,-37.8008,144.9973,Northern Metropolitan,4019.0,everyone


In [26]:
reviews.describe()

Unnamed: 0,Rooms,Price,Distance,Postcode,Bedroom2,Bathroom,Car,Landsize,BuildingArea,YearBuilt,Lattitude,Longtitude,Propertycount
count,13580.0,13580.0,13580.0,13580.0,13580.0,13580.0,13518.0,13580.0,7130.0,8205.0,13580.0,13580.0,13580.0
mean,2.937997,1075684.0,10.137776,3105.301915,2.914728,1.534242,1.610075,558.416127,151.96765,1964.684217,-37.809203,144.995216,7454.417378
std,0.955748,639310.7,5.868725,90.676964,0.965921,0.691712,0.962634,3990.669241,541.014538,37.273762,0.07926,0.103916,4378.581772
min,1.0,85000.0,0.0,3000.0,0.0,0.0,0.0,0.0,0.0,1196.0,-38.18255,144.43181,249.0
25%,2.0,650000.0,6.1,3044.0,2.0,1.0,1.0,177.0,93.0,1940.0,-37.856822,144.9296,4380.0
50%,3.0,903000.0,9.2,3084.0,3.0,1.0,2.0,440.0,126.0,1970.0,-37.802355,145.0001,6555.0
75%,3.0,1330000.0,13.0,3148.0,3.0,2.0,2.0,651.0,174.0,1999.0,-37.7564,145.058305,10331.0
max,10.0,9000000.0,48.1,3977.0,20.0,8.0,10.0,433014.0,44515.0,2018.0,-37.40853,145.52635,21650.0


In [27]:
reviews.Rooms.describe()

count    13580.000000
mean         2.937997
std          0.955748
min          1.000000
25%          2.000000
50%          3.000000
75%          3.000000
max         10.000000
Name: Rooms, dtype: float64

In [28]:
# for string data
reviews.Suburb.describe()

count         13580
unique          314
top       Reservoir
freq            359
Name: Suburb, dtype: object

In [29]:
reviews.Rooms.mean()

2.9379970544919

In [30]:
reviews.Suburb.unique()

array(['Abbotsford', 'Airport West', 'Albert Park', 'Alphington',
       'Altona', 'Altona North', 'Armadale', 'Ascot Vale', 'Ashburton',
       'Ashwood', 'Avondale Heights', 'Balaclava', 'Balwyn',
       'Balwyn North', 'Bentleigh', 'Bentleigh East', 'Box Hill',
       'Braybrook', 'Brighton', 'Brighton East', 'Brunswick',
       'Brunswick West', 'Bulleen', 'Burwood', 'Camberwell', 'Canterbury',
       'Carlton North', 'Carnegie', 'Caulfield', 'Caulfield North',
       'Caulfield South', 'Chadstone', 'Clifton Hill', 'Coburg',
       'Coburg North', 'Collingwood', 'Doncaster', 'Eaglemont',
       'Elsternwick', 'Elwood', 'Essendon', 'Essendon North', 'Fairfield',
       'Fitzroy', 'Fitzroy North', 'Flemington', 'Footscray', 'Glen Iris',
       'Glenroy', 'Gowanbrae', 'Hadfield', 'Hampton', 'Hampton East',
       'Hawthorn', 'Heidelberg Heights', 'Heidelberg West', 'Hughesdale',
       'Ivanhoe', 'Kealba', 'Keilor East', 'Kensington', 'Kew',
       'Kew East', 'Kooyong', 'Maidstone', 

In [31]:
#o see a list of unique values and how often they occur in the dataset, we can use the value_counts() method
reviews.Suburb.value_counts()

Reservoir         359
Richmond          260
Bentleigh East    249
Preston           239
Brunswick         222
                 ... 
Croydon South       1
Montrose            1
Yarra Glen          1
Attwood             1
Healesville         1
Name: Suburb, Length: 314, dtype: int64

In [33]:
#maps
#map is a term, borrowed from mathematics, for a function that takes one set of values and "maps" them to another set of values.
#There are two mapping methods

#map() is the first, and slightly simpler one
#The function you pass to map() should expect a single value from the Series (a point value, in the above example), 
#and return a transformed version of that value. 
#map() returns a new Series where all the values have been transformed by your function.

review_size_mean = reviews.Landsize.mean()
reviews.Landsize.map(lambda p: p - review_size_mean)

0       -356.416127
1       -402.416127
2       -424.416127
3       -464.416127
4       -438.416127
            ...    
13575     93.583873
13576   -225.416127
13577   -122.416127
13578    307.583873
13579   -196.416127
Name: Landsize, Length: 13580, dtype: float64

In [36]:
#apply() is the equivalent method if we want to transform a whole DataFrame by calling a custom method on each row.

review_room_mean = reviews.Rooms.mean()

def remean_size(row):
    row.Landsize = row.Landsize - review_size_mean
    row.Rooms = row.Rooms - review_room_mean
    return row

reviews.apply(remean_size, axis='columns')

Unnamed: 0,Suburb,Address,Rooms,Type,Price,Method,SellerG,Date,Distance,Postcode,...,Car,Landsize,BuildingArea,YearBuilt,CouncilArea,Lattitude,Longtitude,Regionname,Propertycount,critic
0,Abbotsford,85 Turner St,-0.937997,h,1480000.0,S,Biggin,3/12/2016,2.5,3067.0,...,1.0,-356.416127,,,Yarra,-37.79960,144.99840,Northern Metropolitan,4019.0,everyone
1,Abbotsford,25 Bloomburg St,-0.937997,h,1035000.0,S,Biggin,4/02/2016,2.5,3067.0,...,0.0,-402.416127,79.0,1900.0,Yarra,-37.80790,144.99340,Northern Metropolitan,4019.0,everyone
2,Abbotsford,5 Charles St,0.062003,h,1465000.0,SP,Biggin,4/03/2017,2.5,3067.0,...,0.0,-424.416127,150.0,1900.0,Yarra,-37.80930,144.99440,Northern Metropolitan,4019.0,everyone
3,Abbotsford,40 Federation La,0.062003,h,850000.0,PI,Biggin,4/03/2017,2.5,3067.0,...,1.0,-464.416127,,,Yarra,-37.79690,144.99690,Northern Metropolitan,4019.0,everyone
4,Abbotsford,55a Park St,1.062003,h,1600000.0,VB,Nelson,4/06/2016,2.5,3067.0,...,2.0,-438.416127,142.0,2014.0,Yarra,-37.80720,144.99410,Northern Metropolitan,4019.0,everyone
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13575,Wheelers Hill,12 Strada Cr,1.062003,h,1245000.0,S,Barry,26/08/2017,16.7,3150.0,...,2.0,93.583873,,1981.0,,-37.90562,145.16761,South-Eastern Metropolitan,7392.0,everyone
13576,Williamstown,77 Merrett Dr,0.062003,h,1031000.0,SP,Williams,26/08/2017,6.8,3016.0,...,2.0,-225.416127,133.0,1995.0,,-37.85927,144.87904,Western Metropolitan,6380.0,everyone
13577,Williamstown,83 Power St,0.062003,h,1170000.0,S,Raine,26/08/2017,6.8,3016.0,...,4.0,-122.416127,,1997.0,,-37.85274,144.88738,Western Metropolitan,6380.0,everyone
13578,Williamstown,96 Verdon St,1.062003,h,2500000.0,PI,Sweeney,26/08/2017,6.8,3016.0,...,5.0,307.583873,157.0,1920.0,,-37.85908,144.89299,Western Metropolitan,6380.0,everyone


In [37]:
#If we had called reviews.apply() with axis='index', then instead of passing a function to transform each row, 
#we would need to give a function to transform each column.

In [38]:
#Note that map() and apply() return new, transformed Series and DataFrames, respectively. 
#They don't modify the original data they're called on. 
#If we look at the first row of reviews, we can see that it still has its original points value.

reviews.head(5)

Unnamed: 0,Suburb,Address,Rooms,Type,Price,Method,SellerG,Date,Distance,Postcode,...,Car,Landsize,BuildingArea,YearBuilt,CouncilArea,Lattitude,Longtitude,Regionname,Propertycount,critic
0,Abbotsford,85 Turner St,2,h,1480000.0,S,Biggin,3/12/2016,2.5,3067.0,...,1.0,202.0,,,Yarra,-37.7996,144.9984,Northern Metropolitan,4019.0,everyone
1,Abbotsford,25 Bloomburg St,2,h,1035000.0,S,Biggin,4/02/2016,2.5,3067.0,...,0.0,156.0,79.0,1900.0,Yarra,-37.8079,144.9934,Northern Metropolitan,4019.0,everyone
2,Abbotsford,5 Charles St,3,h,1465000.0,SP,Biggin,4/03/2017,2.5,3067.0,...,0.0,134.0,150.0,1900.0,Yarra,-37.8093,144.9944,Northern Metropolitan,4019.0,everyone
3,Abbotsford,40 Federation La,3,h,850000.0,PI,Biggin,4/03/2017,2.5,3067.0,...,1.0,94.0,,,Yarra,-37.7969,144.9969,Northern Metropolitan,4019.0,everyone
4,Abbotsford,55a Park St,4,h,1600000.0,VB,Nelson,4/06/2016,2.5,3067.0,...,2.0,120.0,142.0,2014.0,Yarra,-37.8072,144.9941,Northern Metropolitan,4019.0,everyone


In [39]:
reviews.Suburb + " - " + reviews.Address

0            Abbotsford - 85 Turner St
1         Abbotsford - 25 Bloomburg St
2            Abbotsford - 5 Charles St
3        Abbotsford - 40 Federation La
4             Abbotsford - 55a Park St
                     ...              
13575     Wheelers Hill - 12 Strada Cr
13576     Williamstown - 77 Merrett Dr
13577       Williamstown - 83 Power St
13578      Williamstown - 96 Verdon St
13579          Yarraville - 6 Agnes St
Length: 13580, dtype: object

In [40]:
#All of the standard Python operators (>, <, ==, and so on) work in this manner.

reviews.Suburb > reviews.Address

0        True
1        True
2        True
3        True
4        True
         ... 
13575    True
13576    True
13577    True
13578    True
13579    True
Length: 13580, dtype: bool

In [41]:
reviews.Rooms > reviews.Distance

0        False
1        False
2         True
3         True
4         True
         ...  
13575    False
13576    False
13577    False
13578    False
13579    False
Length: 13580, dtype: bool

In [49]:
#idxmax() returns index of maximum value
Cost_idx = (reviews.Price / reviews.Landsize).idxmax()
Cost_idx

8

In [50]:
exp_home = reviews.loc[Cost_idx, 'Address']
exp_home

'6/241 Nicholson St'

In [51]:
#idxmin() returns index of minimum value
Cost_idx = (reviews.Price / reviews.Landsize).idxmin()
Cost_idx

11020

In [52]:
bargain_home = reviews.loc[Cost_idx, 'Address']
bargain_home

'389 Gore St'

In [58]:
#sum() to get a  count of number of times of occurance
north = reviews.Regionname.map(lambda desc: "Northern" in desc).sum()
south_eastern = reviews.Regionname.map(lambda desc: "South-Eastern" in desc).sum()
Regionname_counts = pd.Series([north, south_eastern], index=['Northern', 'South-Eastern'])

In [59]:
Regionname_counts

Northern         3931
South-Eastern     450
dtype: int64

In [63]:
def stars(row):
    if row.Suburb == 'Williamstown':
        return 3
    elif row.Rooms >= 3:
        return 3
    elif row.Rooms >= 1:
        return 2
    else:
        return 1

star_ratings = reviews.apply(stars, axis='columns')

In [64]:
star_ratings

0        2
1        2
2        3
3        3
4        3
        ..
13575    3
13576    3
13577    3
13578    3
13579    3
Length: 13580, dtype: int64

In [65]:
reviews['ratings'] = star_ratings

In [66]:
reviews

Unnamed: 0,Suburb,Address,Rooms,Type,Price,Method,SellerG,Date,Distance,Postcode,...,Landsize,BuildingArea,YearBuilt,CouncilArea,Lattitude,Longtitude,Regionname,Propertycount,critic,ratings
0,Abbotsford,85 Turner St,2,h,1480000.0,S,Biggin,3/12/2016,2.5,3067.0,...,202.0,,,Yarra,-37.79960,144.99840,Northern Metropolitan,4019.0,everyone,2
1,Abbotsford,25 Bloomburg St,2,h,1035000.0,S,Biggin,4/02/2016,2.5,3067.0,...,156.0,79.0,1900.0,Yarra,-37.80790,144.99340,Northern Metropolitan,4019.0,everyone,2
2,Abbotsford,5 Charles St,3,h,1465000.0,SP,Biggin,4/03/2017,2.5,3067.0,...,134.0,150.0,1900.0,Yarra,-37.80930,144.99440,Northern Metropolitan,4019.0,everyone,3
3,Abbotsford,40 Federation La,3,h,850000.0,PI,Biggin,4/03/2017,2.5,3067.0,...,94.0,,,Yarra,-37.79690,144.99690,Northern Metropolitan,4019.0,everyone,3
4,Abbotsford,55a Park St,4,h,1600000.0,VB,Nelson,4/06/2016,2.5,3067.0,...,120.0,142.0,2014.0,Yarra,-37.80720,144.99410,Northern Metropolitan,4019.0,everyone,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13575,Wheelers Hill,12 Strada Cr,4,h,1245000.0,S,Barry,26/08/2017,16.7,3150.0,...,652.0,,1981.0,,-37.90562,145.16761,South-Eastern Metropolitan,7392.0,everyone,3
13576,Williamstown,77 Merrett Dr,3,h,1031000.0,SP,Williams,26/08/2017,6.8,3016.0,...,333.0,133.0,1995.0,,-37.85927,144.87904,Western Metropolitan,6380.0,everyone,3
13577,Williamstown,83 Power St,3,h,1170000.0,S,Raine,26/08/2017,6.8,3016.0,...,436.0,,1997.0,,-37.85274,144.88738,Western Metropolitan,6380.0,everyone,3
13578,Williamstown,96 Verdon St,4,h,2500000.0,PI,Sweeney,26/08/2017,6.8,3016.0,...,866.0,157.0,1920.0,,-37.85908,144.89299,Western Metropolitan,6380.0,everyone,3
