# Operations

There are lots of operations with pandas that will be really useful to you, but don't fall into any distinct category. Let's show them here in this lecture:

In [1]:
import numpy as np
import pandas as pd

In [3]:
data = pd.read_excel("housesales.xlsx")

## Inspecting Data set

### data.info()

prints information about a DataFrame including the index dtype and column dtypes, non-null values and memory usage.

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 220 entries, 0 to 219
Data columns (total 10 columns):
sqft            220 non-null int64
hometype        220 non-null object
beds            220 non-null int64
baths           220 non-null int64
lotsize         220 non-null int64
yearbulit       220 non-null int64
daysonmarket    220 non-null int64
parkingtype     220 non-null int64
zip             220 non-null int64
listprice       220 non-null float64
dtypes: float64(1), int64(8), object(1)
memory usage: 17.3+ KB


### data.head()

Return the first `n` rows data,default
n value is 5 if not specified

In [5]:
data.head(10) # first 10 records in data

Unnamed: 0,sqft,hometype,beds,baths,lotsize,yearbulit,daysonmarket,parkingtype,zip,listprice
0,3040,Single Family Residential,4,3,54886,1961,349,1,80303,895.0
1,2920,Single Family Residential,3,3,38754,1966,81,1,80303,659.0
2,3845,Single Family Residential,4,3,46609,2005,19,1,80303,1840.0
3,5588,Single Family Residential,4,5,223463,2008,116,1,80303,6499.0
4,3934,Single Family Residential,4,4,40864,2014,130,1,80305,1500.0
5,7113,Single Family Residential,5,5,266152,1991,40,1,80303,2790.0
6,2340,Single Family Residential,4,3,7204,1959,39,1,80305,479.9
7,3857,Single Family Residential,4,3,10671,1995,37,1,80305,1487.5
8,6589,Single Family Residential,4,5,15682,1979,35,1,80305,2795.0
9,6549,Single Family Residential,5,6,30928,2000,92,1,80303,1965.0


## data.tail()

Return the last `n` rows.

In [6]:
data.tail(5) # Bottom 5 records in data

Unnamed: 0,sqft,hometype,beds,baths,lotsize,yearbulit,daysonmarket,parkingtype,zip,listprice
215,5180,Single Family Residential,6,4,128710,2006,81,1,80302,1500.0
216,5953,Single Family Residential,5,5,37373,2000,121,1,80301,2399.0
217,8619,Single Family Residential,6,6,47216,1991,81,1,80304,2495.0
218,650,Condo,1,1,22214,2002,62,1,80304,127.969
219,2608,Single Family Residential,3,3,86722,1991,172,1,80302,664.9


## data.columns

Get column names of a data frame

In [7]:
data.columns

Index(['sqft', 'hometype', 'beds', 'baths', 'lotsize', 'yearbulit',
       'daysonmarket', 'parkingtype', 'zip', 'listprice'],
      dtype='object')

## data.index

Get dataframe index

In [8]:
data.index

RangeIndex(start=0, stop=220, step=1)

## data.dtypes

Get column datatypes

In [9]:
data.dtypes

sqft              int64
hometype         object
beds              int64
baths             int64
lotsize           int64
yearbulit         int64
daysonmarket      int64
parkingtype       int64
zip               int64
listprice       float64
dtype: object

### data.shape

Returns number of rows and columns

In [10]:
data.shape

(220, 10)

### data.values

Only the values in the DataFrame will be returned, the axes labels
will be removed.

In [13]:
data.values

array([[3040, 'Single Family Residential', 4, ..., 1, 80303, 895.0],
       [2920, 'Single Family Residential', 3, ..., 1, 80303, 659.0],
       [3845, 'Single Family Residential', 4, ..., 1, 80303, 1840.0],
       ...,
       [8619, 'Single Family Residential', 6, ..., 1, 80304, 2495.0],
       [650, 'Condo', 1, ..., 1, 80304, 127.969],
       [2608, 'Single Family Residential', 3, ..., 1, 80302, 664.9]],
      dtype=object)

## data['column name'].unique()

Get unique values of a specified column.

In [14]:
data['hometype'].unique()

array(['Single Family Residential', 'Townhouse', 'Condo'], dtype=object)

## data['column name'].nunique()

The nunique( ) shows the number of unique values.

In [15]:
data['hometype'].nunique()

3

## data['column name'].value_counts()

value_counts( ) creates a frequency distribution. By default ascending = False i.e. it will show the 'Index' having the maximum frequency on the top.

In [16]:
data['hometype'].value_counts()

Single Family Residential    176
Condo                         37
Townhouse                      7
Name: hometype, dtype: int64

## Data Cleaning

### data.columns = ['new col name1','new col name2']

Rename `all` column names of
DataFrame at a time.

In [18]:
df = pd.read_csv("tennis.csv")
df.head(2)

Unnamed: 0,outlook,temp,humidity,windy,play
0,sunny,hot,high,False,no
1,sunny,hot,high,True,no


In [20]:
df.columns

Index(['outlook', 'temp', 'humidity', 'windy', 'play'], dtype='object')

In [23]:
df.columns = ['out','tem','hum','wind','ply']

In [24]:
df.head(2)

Unnamed: 0,out,tem,hum,wind,ply
0,sunny,hot,high,False,no
1,sunny,hot,high,True,no


### data.rename()

`data.rename(columns={‘old_columnname’:‘new_
columnname’}, inplace=True)`

Rename a specific column names.

In [25]:
df.head(1)

Unnamed: 0,out,tem,hum,wind,ply
0,sunny,hot,high,False,no


In [29]:
df.rename(columns={'out':'outlook'})

Unnamed: 0,outlook,tem,hum,wind,ply
0,sunny,hot,high,False,no
1,sunny,hot,high,True,no
2,overcast,hot,high,False,yes
3,rainy,mild,high,False,yes
4,rainy,cool,normal,False,yes
5,rainy,cool,normal,True,no
6,overcast,cool,normal,True,yes
7,sunny,mild,high,False,no
8,sunny,cool,normal,False,yes
9,rainy,mild,normal,False,yes


In [28]:
df.head(1)

Unnamed: 0,out,tem,hum,wind,ply
0,sunny,hot,high,False,no


In [30]:
df.rename(columns={'out':'outlook'},
         inplace=True)

In [31]:
df.head(2)

Unnamed: 0,outlook,tem,hum,wind,ply
0,sunny,hot,high,False,no
1,sunny,hot,high,True,no


In [52]:
l = list(range(1,11))


In [53]:
ll =[]
def even(l):
    ll =[]
    for i in l:
        if i%2 == 0:
            ll.append(i)
    return ll

In [54]:
even(l)

[2, 4, 6, 8, 10]

In [55]:
a = lambda x : x%2 ==0

In [56]:
list(filter(a,l))

[2, 4, 6, 8, 10]

In [61]:
df.rename(index=lambda x:x+1) # Mass Renaming of row index

Unnamed: 0,outlook,tem,hum,wind,ply
1,sunny,hot,high,False,no
2,sunny,hot,high,True,no
3,overcast,hot,high,False,yes
4,rainy,mild,high,False,yes
5,rainy,cool,normal,False,yes
6,rainy,cool,normal,True,no
7,overcast,cool,normal,True,yes
8,sunny,mild,high,False,no
9,sunny,cool,normal,False,yes
10,rainy,mild,normal,False,yes


### data['column name'].astype( )

Suppose we want to convert it to float (numeric variable with decimals).

In [62]:
data.head(2)

Unnamed: 0,sqft,hometype,beds,baths,lotsize,yearbulit,daysonmarket,parkingtype,zip,listprice
0,3040,Single Family Residential,4,3,54886,1961,349,1,80303,895.0
1,2920,Single Family Residential,3,3,38754,1966,81,1,80303,659.0


In [63]:
data.dtypes

sqft              int64
hometype         object
beds              int64
baths             int64
lotsize           int64
yearbulit         int64
daysonmarket      int64
parkingtype       int64
zip               int64
listprice       float64
dtype: object

In [68]:
data['listprice'] = data['listprice'].astype('int')
data.dtypes

sqft             int64
hometype        object
beds             int64
baths            int64
lotsize          int64
yearbulit        int64
daysonmarket     int64
parkingtype      int64
zip              int64
listprice        int32
dtype: object

In [69]:
data.memory_usage()

Index             80
sqft            1760
hometype        1760
beds            1760
baths           1760
lotsize         1760
yearbulit       1760
daysonmarket    1760
parkingtype     1760
zip             1760
listprice        880
dtype: int64

### data.replace()

Replace values given in `to_replace` with `value`.Values of the DataFrame are replaced with other values dynamically.

`data.replace(to_replace,value)`

In [70]:
data.head(2)

Unnamed: 0,sqft,hometype,beds,baths,lotsize,yearbulit,daysonmarket,parkingtype,zip,listprice
0,3040,Single Family Residential,4,3,54886,1961,349,1,80303,895
1,2920,Single Family Residential,3,3,38754,1966,81,1,80303,659


In [71]:
data['hometype'].unique()

array(['Single Family Residential', 'Townhouse', 'Condo'], dtype=object)

In [72]:
d = {'Single Family Residential':1,'Townhouse':2,'Condo':3}
data.replace(d).head(2)

Unnamed: 0,sqft,hometype,beds,baths,lotsize,yearbulit,daysonmarket,parkingtype,zip,listprice
0,3040,1,4,3,54886,1961,349,1,80303,895
1,2920,1,3,3,38754,1966,81,1,80303,659


In [None]:
data.head(2)

In [75]:
#sunny = 1
#overcast =2
#rainy = 3

d = {'sunny':1,
    'overcast':2,
    'rainy':3}

df.replace(d,inplace=True)

In [76]:
df.head(2)

Unnamed: 0,outlook,tem,hum,wind,ply
0,1,hot,high,False,no
1,1,hot,high,True,no


In [78]:
data['parkingtype'].unique()

array([1, 0], dtype=int64)

In [79]:
data['parkingtype'].replace([1,0],['Yes','No'],inplace=True)

### data.set_index()

Set the DataFrame index (row labels) using one or more existing
columns. By default yields a new object.

In [88]:
df = pd.read_csv("tennis.csv")
df.head(2)

Unnamed: 0,outlook,temp,humidity,windy,play
0,sunny,hot,high,False,no
1,sunny,hot,high,True,no


In [89]:
df['outlook'].unique()

array(['sunny', 'overcast', 'rainy'], dtype=object)

In [92]:
df.set_index('outlook').columns

Index(['temp', 'humidity', 'windy', 'play'], dtype='object')

In [93]:
df.set_index('outlook').index

Index(['sunny', 'sunny', 'overcast', 'rainy', 'rainy', 'rainy', 'overcast',
       'sunny', 'sunny', 'rainy', 'sunny', 'overcast', 'overcast', 'rainy'],
      dtype='object', name='outlook')

In [94]:
df.set_index(["humidity",'outlook'])

Unnamed: 0_level_0,Unnamed: 1_level_0,temp,windy,play
humidity,outlook,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
high,sunny,hot,False,no
high,sunny,hot,True,no
high,overcast,hot,False,yes
high,rainy,mild,False,yes
normal,rainy,cool,False,yes
normal,rainy,cool,True,no
normal,overcast,cool,True,yes
high,sunny,mild,False,no
normal,sunny,cool,False,yes
normal,rainy,mild,False,yes


In [95]:
data.set_index('yearbulit')

Unnamed: 0_level_0,sqft,hometype,beds,baths,lotsize,daysonmarket,parkingtype,zip,listprice
yearbulit,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1961,3040,Single Family Residential,4,3,54886,349,Yes,80303,895
1966,2920,Single Family Residential,3,3,38754,81,Yes,80303,659
2005,3845,Single Family Residential,4,3,46609,19,Yes,80303,1840
2008,5588,Single Family Residential,4,5,223463,116,Yes,80303,6499
2014,3934,Single Family Residential,4,4,40864,130,Yes,80305,1500
1991,7113,Single Family Residential,5,5,266152,40,Yes,80303,2790
1959,2340,Single Family Residential,4,3,7204,39,Yes,80305,479
1995,3857,Single Family Residential,4,3,10671,37,Yes,80305,1487
1979,6589,Single Family Residential,4,5,15682,35,Yes,80305,2795
2000,6549,Single Family Residential,5,6,30928,92,Yes,80303,1965


## Sort and Filter the Data

### data.sort_values()

`df.sort_values(by =[‘Column1’, ‘Column2’],ascending=[True,True’])`

Sort by the values along either axis

In [96]:
df.head()

Unnamed: 0,outlook,temp,humidity,windy,play
0,sunny,hot,high,False,no
1,sunny,hot,high,True,no
2,overcast,hot,high,False,yes
3,rainy,mild,high,False,yes
4,rainy,cool,normal,False,yes


In [98]:
df.sort_values(by='outlook',ascending=True)

Unnamed: 0,outlook,temp,humidity,windy,play
2,overcast,hot,high,False,yes
6,overcast,cool,normal,True,yes
11,overcast,mild,high,True,yes
12,overcast,hot,normal,False,yes
3,rainy,mild,high,False,yes
4,rainy,cool,normal,False,yes
5,rainy,cool,normal,True,no
9,rainy,mild,normal,False,yes
13,rainy,mild,high,True,no
0,sunny,hot,high,False,no


In [99]:
df.sort_values(by='outlook',ascending=False)

Unnamed: 0,outlook,temp,humidity,windy,play
0,sunny,hot,high,False,no
1,sunny,hot,high,True,no
7,sunny,mild,high,False,no
8,sunny,cool,normal,False,yes
10,sunny,mild,normal,True,yes
3,rainy,mild,high,False,yes
4,rainy,cool,normal,False,yes
5,rainy,cool,normal,True,no
9,rainy,mild,normal,False,yes
13,rainy,mild,high,True,no


In [101]:
data.sort_values(by='sqft',ascending=False)

Unnamed: 0,sqft,hometype,beds,baths,lotsize,yearbulit,daysonmarket,parkingtype,zip,listprice
186,10466,Single Family Residential,5,6,190970,2011,50,Yes,80304,7500
217,8619,Single Family Residential,6,6,47216,1991,81,Yes,80304,2495
158,8401,Single Family Residential,6,6,33541,2009,103,Yes,80304,3450
105,7980,Single Family Residential,6,3,13945,1912,37,Yes,80302,4250
49,7333,Single Family Residential,5,5,12500,2007,295,Yes,80302,1895
5,7113,Single Family Residential,5,5,266152,1991,40,Yes,80303,2790
176,6908,Single Family Residential,5,6,45782,2004,37,Yes,80304,2995
95,6740,Single Family Residential,5,5,11667,2008,47,Yes,80302,2399
8,6589,Single Family Residential,4,5,15682,1979,35,Yes,80305,2795
9,6549,Single Family Residential,5,6,30928,2000,92,Yes,80303,1965


In [107]:
df = df.sort_values(by = ['outlook','humidity','temp',])

In [108]:
df

Unnamed: 0,outlook,temp,humidity,windy,play
2,overcast,hot,high,False,yes
11,overcast,mild,high,True,yes
6,overcast,cool,normal,True,yes
12,overcast,hot,normal,False,yes
3,rainy,mild,high,False,yes
13,rainy,mild,high,True,no
4,rainy,cool,normal,False,yes
5,rainy,cool,normal,True,no
9,rainy,mild,normal,False,yes
0,sunny,hot,high,False,no


## data.sort_index()

Sort object by labels (along an axis)

In [109]:
df.sort_index(ascending=False)

Unnamed: 0,outlook,temp,humidity,windy,play
13,rainy,mild,high,True,no
12,overcast,hot,normal,False,yes
11,overcast,mild,high,True,yes
10,sunny,mild,normal,True,yes
9,rainy,mild,normal,False,yes
8,sunny,cool,normal,False,yes
7,sunny,mild,high,False,no
6,overcast,cool,normal,True,yes
5,rainy,cool,normal,True,no
4,rainy,cool,normal,False,yes


## data.T

To transpose rows into columns and columns into rows

In [110]:
df.T

Unnamed: 0,2,11,6,12,3,13,4,5,9,0,1,7,8,10
outlook,overcast,overcast,overcast,overcast,rainy,rainy,rainy,rainy,rainy,sunny,sunny,sunny,sunny,sunny
temp,hot,mild,cool,hot,mild,mild,cool,cool,mild,hot,hot,mild,cool,mild
humidity,high,high,normal,normal,high,high,normal,normal,normal,high,high,high,normal,normal
windy,False,True,True,False,False,True,False,True,False,False,True,False,False,True
play,yes,yes,yes,yes,yes,no,yes,no,yes,no,no,no,yes,yes


## data .drop()

Pandas provide data analysts a way to delete and filter data frame using .drop() method. Rows or columns can be removed using index label or column name using this method.

`**DataFrame.drop(labels=None, axis=0, index=None, columns=None, level=None, inplace=False, errors=’raise’)`**

- labels: String or list of strings referring row or column name.
- axis: int or string value, 0 ‘index’ for Rows and 1 ‘columns’ for Columns.
- index or columns: Single label or list. index or columns are an alternative to axis and cannot be used together.
- level: Used to specify level in case data frame is having multiple level index.
- inplace: Makes changes in original Data Frame if True.
- errors: Ignores error if any value from the list doesn’t exists and drops rest of the values when errors = ‘ignore’

In [111]:
df.head(2)

Unnamed: 0,outlook,temp,humidity,windy,play
2,overcast,hot,high,False,yes
11,overcast,mild,high,True,yes


In [113]:
df.drop('outlook',axis = 1) # Drop outlook column

Unnamed: 0,temp,humidity,windy,play
2,hot,high,False,yes
11,mild,high,True,yes
6,cool,normal,True,yes
12,hot,normal,False,yes
3,mild,high,False,yes
13,mild,high,True,no
4,cool,normal,False,yes
5,cool,normal,True,no
9,mild,normal,False,yes
0,hot,high,False,no


In [115]:
df.drop(['outlook','windy'],axis = 1) # Drop outlook and windy column

Unnamed: 0,temp,humidity,play
2,hot,high,yes
11,mild,high,yes
6,cool,normal,yes
12,hot,normal,yes
3,mild,high,yes
13,mild,high,no
4,cool,normal,yes
5,cool,normal,no
9,mild,normal,yes
0,hot,high,no


## Filter  or Subset

In [122]:
data.head(2)

Unnamed: 0,sqft,hometype,beds,baths,lotsize,yearbulit,daysonmarket,parkingtype,zip,listprice
0,3040,Single Family Residential,4,3,54886,1961,349,Yes,80303,895
1,2920,Single Family Residential,3,3,38754,1966,81,Yes,80303,659


In [126]:
# subset of single Family Residential

exp = data['hometype'] == 'Single Family Residential'

data[exp]

Unnamed: 0,sqft,hometype,beds,baths,lotsize,yearbulit,daysonmarket,parkingtype,zip,listprice
0,3040,Single Family Residential,4,3,54886,1961,349,Yes,80303,895
1,2920,Single Family Residential,3,3,38754,1966,81,Yes,80303,659
2,3845,Single Family Residential,4,3,46609,2005,19,Yes,80303,1840
3,5588,Single Family Residential,4,5,223463,2008,116,Yes,80303,6499
4,3934,Single Family Residential,4,4,40864,2014,130,Yes,80305,1500
5,7113,Single Family Residential,5,5,266152,1991,40,Yes,80303,2790
6,2340,Single Family Residential,4,3,7204,1959,39,Yes,80305,479
7,3857,Single Family Residential,4,3,10671,1995,37,Yes,80305,1487
8,6589,Single Family Residential,4,5,15682,1979,35,Yes,80305,2795
9,6549,Single Family Residential,5,6,30928,2000,92,Yes,80303,1965


In [133]:
data.head()

Unnamed: 0,sqft,hometype,beds,baths,lotsize,yearbulit,daysonmarket,parkingtype,zip,listprice
0,3040,Single Family Residential,4,3,54886,1961,349,Yes,80303,895
1,2920,Single Family Residential,3,3,38754,1966,81,Yes,80303,659
2,3845,Single Family Residential,4,3,46609,2005,19,Yes,80303,1840
3,5588,Single Family Residential,4,5,223463,2008,116,Yes,80303,6499
4,3934,Single Family Residential,4,4,40864,2014,130,Yes,80305,1500


In [136]:
#beds = 4 and parkingtype = Yes

exp = (data['beds'] == 4) & (data['parkingtype']=='Yes')
data[exp]

Unnamed: 0,sqft,hometype,beds,baths,lotsize,yearbulit,daysonmarket,parkingtype,zip,listprice
0,3040,Single Family Residential,4,3,54886,1961,349,Yes,80303,895
2,3845,Single Family Residential,4,3,46609,2005,19,Yes,80303,1840
3,5588,Single Family Residential,4,5,223463,2008,116,Yes,80303,6499
4,3934,Single Family Residential,4,4,40864,2014,130,Yes,80305,1500
6,2340,Single Family Residential,4,3,7204,1959,39,Yes,80305,479
7,3857,Single Family Residential,4,3,10671,1995,37,Yes,80305,1487
8,6589,Single Family Residential,4,5,15682,1979,35,Yes,80305,2795
11,3602,Single Family Residential,4,3,14851,1967,42,Yes,80305,1900
14,1796,Single Family Residential,4,2,6970,1962,45,Yes,80305,545
17,3136,Single Family Residential,4,3,11064,1972,135,Yes,80305,700


In [137]:
# listprice >1500 of single family residential with 2 beds 
exp = (data['listprice']>1500) & (data['hometype']=='Single Family Residential')&(data['beds']==2)

data[exp]

Unnamed: 0,sqft,hometype,beds,baths,lotsize,yearbulit,daysonmarket,parkingtype,zip,listprice
100,5026,Single Family Residential,2,2,31450,2008,241,Yes,80302,4250


In [138]:
data[(data['listprice']>1500) & (data['hometype']=='Single Family Residential')&(data['beds']==2)]

Unnamed: 0,sqft,hometype,beds,baths,lotsize,yearbulit,daysonmarket,parkingtype,zip,listprice
100,5026,Single Family Residential,2,2,31450,2008,241,Yes,80302,4250


In [None]:
data[data['parkingtype']==1].head(3)

In [None]:
data [(data['parkingtype']==1) & (data['beds']==4)]

In [None]:
data[(data['parkingtype']==1) | (data['beds']==3)]

## DataFrame.query()

Analyzing data requires a lot of filtering operations. Pandas provide many methods to filter a Data frame and `Dataframe.query()`

**`DataFrame.query(expr, inplace=False)`**

- expr: Expression in string form to filter data.
- inplace: Make changes in the original data frame if True


In [139]:
exp = data['parkingtype']=='Yes'
data[exp]

Unnamed: 0,sqft,hometype,beds,baths,lotsize,yearbulit,daysonmarket,parkingtype,zip,listprice
0,3040,Single Family Residential,4,3,54886,1961,349,Yes,80303,895
1,2920,Single Family Residential,3,3,38754,1966,81,Yes,80303,659
2,3845,Single Family Residential,4,3,46609,2005,19,Yes,80303,1840
3,5588,Single Family Residential,4,5,223463,2008,116,Yes,80303,6499
4,3934,Single Family Residential,4,4,40864,2014,130,Yes,80305,1500
5,7113,Single Family Residential,5,5,266152,1991,40,Yes,80303,2790
6,2340,Single Family Residential,4,3,7204,1959,39,Yes,80305,479
7,3857,Single Family Residential,4,3,10671,1995,37,Yes,80305,1487
8,6589,Single Family Residential,4,5,15682,1979,35,Yes,80305,2795
9,6549,Single Family Residential,5,6,30928,2000,92,Yes,80303,1965


In [142]:
#exp = parkingtype == "Yes"
data.query('parkingtype == "Yes"')

Unnamed: 0,sqft,hometype,beds,baths,lotsize,yearbulit,daysonmarket,parkingtype,zip,listprice
0,3040,Single Family Residential,4,3,54886,1961,349,Yes,80303,895
1,2920,Single Family Residential,3,3,38754,1966,81,Yes,80303,659
2,3845,Single Family Residential,4,3,46609,2005,19,Yes,80303,1840
3,5588,Single Family Residential,4,5,223463,2008,116,Yes,80303,6499
4,3934,Single Family Residential,4,4,40864,2014,130,Yes,80305,1500
5,7113,Single Family Residential,5,5,266152,1991,40,Yes,80303,2790
6,2340,Single Family Residential,4,3,7204,1959,39,Yes,80305,479
7,3857,Single Family Residential,4,3,10671,1995,37,Yes,80305,1487
8,6589,Single Family Residential,4,5,15682,1979,35,Yes,80305,2795
9,6549,Single Family Residential,5,6,30928,2000,92,Yes,80303,1965


In [143]:
data.query('parkingtype =="Yes" and beds==4')

Unnamed: 0,sqft,hometype,beds,baths,lotsize,yearbulit,daysonmarket,parkingtype,zip,listprice
0,3040,Single Family Residential,4,3,54886,1961,349,Yes,80303,895
2,3845,Single Family Residential,4,3,46609,2005,19,Yes,80303,1840
3,5588,Single Family Residential,4,5,223463,2008,116,Yes,80303,6499
4,3934,Single Family Residential,4,4,40864,2014,130,Yes,80305,1500
6,2340,Single Family Residential,4,3,7204,1959,39,Yes,80305,479
7,3857,Single Family Residential,4,3,10671,1995,37,Yes,80305,1487
8,6589,Single Family Residential,4,5,15682,1979,35,Yes,80305,2795
11,3602,Single Family Residential,4,3,14851,1967,42,Yes,80305,1900
14,1796,Single Family Residential,4,2,6970,1962,45,Yes,80305,545
17,3136,Single Family Residential,4,3,11064,1972,135,Yes,80305,700


In [150]:
#zip either 80303 or 80305 with beds=2

data.query('(zip == 80303 or zip == 80305) and beds==2')

Unnamed: 0,sqft,hometype,beds,baths,lotsize,yearbulit,daysonmarket,parkingtype,zip,listprice
24,954,Condo,2,2,26,1990,17,Yes,80303,255
32,1128,Condo,2,2,1128,1972,22,Yes,80303,229
55,864,Condo,2,1,48,1969,11,No,80303,205
66,2122,Condo,2,2,27037,2009,170,Yes,80303,1389
68,1338,Condo,2,3,81969,2009,75,Yes,80303,437
77,1420,Townhouse,2,2,1060,1997,2,Yes,80303,419
87,1052,Condo,2,2,127897,2006,34,Yes,80303,425
90,1394,Condo,2,2,127897,2008,173,Yes,80303,499
93,1407,Condo,2,2,128066,2006,60,Yes,80303,485
116,1074,Single Family Residential,2,1,148104,1880,772,No,80303,749


In [None]:
data.query('hometype == "Single Family Residential" and parkingtype == 1 and beds == 3').shape

## data.sample( )

data.sample(n = ,frac = )

sample( ) is used to draw random samples from the dataset containing all the columns. Here n = 5 depicts we need 5 columns and frac = 0.1 tells that we need 10 percent of the data as my sample.

In [146]:
data.sample(n=5)

Unnamed: 0,sqft,hometype,beds,baths,lotsize,yearbulit,daysonmarket,parkingtype,zip,listprice
75,542,Condo,1,1,26,1988,3,No,80302,224
218,650,Condo,1,1,22214,2002,62,Yes,80304,127
22,2436,Single Family Residential,3,2,92042,1951,158,Yes,80302,3250
178,1408,Single Family Residential,3,1,9931,1949,58,Yes,80304,574
39,5353,Single Family Residential,4,4,33585,2008,354,Yes,80303,1850


In [147]:
data.sample(frac=0.25)

Unnamed: 0,sqft,hometype,beds,baths,lotsize,yearbulit,daysonmarket,parkingtype,zip,listprice
23,5769,Single Family Residential,5,6,19471,2012,174,Yes,80302,3875
34,3426,Single Family Residential,5,3,5837,1996,32,Yes,80302,1199
91,4789,Single Family Residential,5,5,10333,2014,46,Yes,80302,2790
45,4176,Single Family Residential,3,3,12371,1923,398,Yes,80302,1700
81,4382,Single Family Residential,4,4,12863,1904,102,Yes,80302,2300
118,5068,Single Family Residential,5,4,16117,1993,16,Yes,80303,1185
153,1241,Townhouse,2,2,1974,1982,8,Yes,80301,369
98,905,Condo,1,1,27798,2009,151,Yes,80302,599
7,3857,Single Family Residential,4,3,10671,1995,37,Yes,80305,1487
176,6908,Single Family Residential,5,6,45782,2004,37,Yes,80304,2995
