<font size="8"> Introduction to Pandas </font> 


<font size="5"> Import libraries </font>  


In [None]:
import numpy as np
import pandas as pd

<font size="5"> Initializing instances in Pandas </font>  



In [None]:
df = pd.DataFrame({'Name': ['Elaine', 'Josh', 'Lin', 'Naho', 'Steve', 'Charles'],
                   'Age': [44, 17, 64, 33, 45, 25],
                   'Salary': [7000, 5800, 7900, 5600, 6700, 5900],
                   'Group': ['MA', 'NW', 'NM', 'WA', 'PA', 'TX']})
df


<font size="5"> Overview of the dataframe </font>  


In [None]:
# check top rows
print(df.head())

In [None]:
# check bottom rows
print(df.tail(3)) 

In [None]:
# check index
print(df.index) 

In [None]:
# check attribute names
print(df.columns) 

<font size="5"> Basic operations </font>  


In [None]:
# Taking subsets of columns
print(df['Age'])

In [None]:
# Selecting rows with index
print(df[2:4])

In [10]:
# Selecting rows and columns together with value
print(df.loc[1:4, ['Salary','Name']])

   Salary   Name
1    5800   Josh
2    7900    Lin
3    5600   Naho
4    6700  Steve


In [None]:
# Selecting rows and columns together with index
print(df.iloc[1:4, 2:4])

In [None]:
# Selecting rows and columns together with index
print(df.iloc[[1,3,4], [1,3]])

In [None]:
# Selecting with query
print(df[df['Salary']>100])

In [None]:
# Selecting with multiple queries
print(df[(df['Salary']>100) & (df['Age']>30)])

In [None]:
# sort according to specific attribute
print(df.sort_values(by='Age', ascending=False)) 

In [13]:
df2 = pd.DataFrame({'Name': ['Chris', 'Peter'],
                   'Age': [18, np.nan],
                   'Salary': [np.nan, 9],
                   'Group': ['NV', 'FL']})

df_new = df.append(df2, ignore_index=True)
df_new

Unnamed: 0,Name,Age,Salary,Group
0,Elaine,44.0,7000.0,MA
1,Josh,17.0,5800.0,NW
2,Lin,64.0,7900.0,NM
3,Naho,33.0,5600.0,WA
4,Steve,45.0,6700.0,PA
5,Charles,25.0,5900.0,TX
6,Chris,18.0,,NV
7,Peter,,9.0,FL


In [14]:
print(df_new.dropna(how='any', subset=['Salary']))


      Name   Age  Salary Group
0   Elaine  44.0  7000.0    MA
1     Josh  17.0  5800.0    NW
2      Lin  64.0  7900.0    NM
3     Naho  33.0  5600.0    WA
4    Steve  45.0  6700.0    PA
5  Charles  25.0  5900.0    TX
7    Peter   NaN     9.0    FL


In [None]:
print(df_new.fillna(value='None'))

In [None]:
# export as numpy array
print(df_new.values)

<font size="5"> Challenge </font>  


Implement a function process() that given the path to a CSV file as input, select those entries with a valid license (not NaN), room type of Private room, price less than or equal to 80, minimum nights less than or equal to 2, number of reviewers more than or equal to 500, or those with a valid license (not NaN), a room type of Entire home/apt, price less than or equal to 100, minimum nights less than or equal to 1, number of reviewers more than or equal to 100, and finally present the result sorted according to latitude and then longitude in an ascending order and with only columns of id, latitude, longitude, room_type, price and availability_365.

In [19]:
def process(path):
    default = pd.read_csv(path)
    
    df = default.dropna(how='any', subset = ['license'])
    df = df[df['room_type'] == 'Private room']
    df = df[df['price'] <= 80]
    df = df[df['minimum_nights'] <= 2]
    df = df[df['number_of_reviews'] >= 500]
    df = df[df['room_type'] == 'Private room']
    
    df2 = default.dropna(how = 'any', subset = ['license'])
    df2 = df2[df2['room_type'] == 'Entire home/apt']
    df2 = df2[df2['price'] <= 100]
    df2 = df2[df2['minimum_nights'] <= 1]
    df2 = df2[df2['number_of_reviews'] >= 100]
    
    final = df.append(df2, ignore_index=True)
    final = final.loc[:, ['id', 'latitude', 'longitude', 'room_type', 'price', 'availability_365']]
    final = final.sort_values(by = ['latitude', 'longitude'], ascending = True)
    final = final.reset_index(drop=True)
    return final
    
df = process('listings.csv')
print(df)

          id   latitude  longitude        room_type  price  availability_365
0    3508800  52.333100   4.903000     Private room     49                67
1   17774718  52.353180   4.846890  Entire home/apt     89               312
2    7276869  52.354920   4.898650     Private room     69                33
3    1247334  52.355960   4.903390     Private room     78                 0
4    7190115  52.356980   4.900150     Private room     71                21
5    7571471  52.358730   4.855080     Private room     65               282
6    1008223  52.365610   4.840380     Private room     51               324
7   22563427  52.369180   4.880470  Entire home/apt     94                44
8    4449764  52.373130   4.951460     Private room     69                 0
9   31731136  52.378250   4.892390  Entire home/apt     78                57
10    608432  52.381996   4.888583     Private room     56                42
11  28170875  52.383880   4.787870  Entire home/apt     99               312