##### M2W2
#### *Indexing, Selecting, and Assigning Data in Pandas* 

In [1]:
import pandas as pd 

In [2]:
data = {
    'Name': [ 'Olivia', 'Emma', 'Liam', 'Sophia', 'Mason', 'Ava', 'Jacob', 'Isabella', 'Noah', 'Ethan' ],
    'Age': [ 39, 29, 35, 18, 28, 40, 23, 26, 22, 45 ],
    'City': [ 'Sydney', 'Toronto', 'Madrid', 'Berlin', 'Rome', 'Los Angeles', 'New York', 'London', 'Paris', 'Tokyo' ],
    'Year': [ 1985, 1995, 1989, 2006, 1996, 1984, 2001, 1998, 2002, 1979 ]
}

# convert the dictionary into a DataFrame
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,City,Year
0,Olivia,39,Sydney,1985
1,Emma,29,Toronto,1995
2,Liam,35,Madrid,1989
3,Sophia,18,Berlin,2006
4,Mason,28,Rome,1996
5,Ava,40,Los Angeles,1984
6,Jacob,23,New York,2001
7,Isabella,26,London,1998
8,Noah,22,Paris,2002
9,Ethan,45,Tokyo,1979


In [3]:
# Check data type for each column
df.dtypes

Name    object
Age      int64
City    object
Year     int64
dtype: object

In [4]:
# variable.column_name     # dot notation (column without spaces, special characters, conflicts with DataFrame methods)
# variable['column_name']  # bracket notation - safer

# Access the Name column
df.Name

# df['Name']

0      Olivia
1        Emma
2        Liam
3      Sophia
4       Mason
5         Ava
6       Jacob
7    Isabella
8        Noah
9       Ethan
Name: Name, dtype: object

In [5]:
# Access multiple columns
name_city = df[['Name','City']]       
name_city

Unnamed: 0,Name,City
0,Olivia,Sydney
1,Emma,Toronto
2,Liam,Madrid
3,Sophia,Berlin
4,Mason,Rome
5,Ava,Los Angeles
6,Jacob,New York
7,Isabella,London
8,Noah,Paris
9,Ethan,Tokyo


##### _*Pandas .loc*_

##### Syntax: df.loc[row_indexer, column_indexer] 

In [6]:
# Access a single row
single_row = df.loc[2]
single_row

Name      Liam
Age         35
City    Madrid
Year      1989
Name: 2, dtype: object

In [7]:
# Access rows 0, 3 and 4
row_list = df.loc[[0, 3, 4]]
row_list

Unnamed: 0,Name,Age,City,Year
0,Olivia,39,Sydney,1985
3,Sophia,18,Berlin,2006
4,Mason,28,Rome,1996


In [8]:
# Access a list of columns
column_list = df.loc[:,['Name', 'Age']] # [:] indicates that all the rows are to be selected
column_list

Unnamed: 0,Name,Age
0,Olivia,39
1,Emma,29
2,Liam,35
3,Sophia,18
4,Mason,28
5,Ava,40
6,Jacob,23
7,Isabella,26
8,Noah,22
9,Ethan,45


In [9]:
# Access second row of 'Name' column
specific_value = df.loc[1, 'Name']
specific_value

'Emma'

In [10]:
# Slice rows from index 1 to 3
slice_rows = df.loc[1:3]

print("Sliced Rows:")
print(slice_rows)
print()

# Slicing columns from 'Name', 'Age', and 'City'
slice_columns = df.loc[1:3, 'Name':'City']

print("Sliced Columns:")
print(slice_columns)

Sliced Rows:
     Name  Age     City  Year
1    Emma   29  Toronto  1995
2    Liam   35   Madrid  1989
3  Sophia   18   Berlin  2006

Sliced Columns:
     Name  Age     City
1    Emma   29  Toronto
2    Liam   35   Madrid
3  Sophia   18   Berlin


In [11]:
# Boolean indexing With .loc
boolean_index = df.loc[df['Age'] > 29]

print("Filtered DataFrame: ")
print(boolean_index)


Filtered DataFrame: 
     Name  Age         City  Year
0  Olivia   39       Sydney  1985
2    Liam   35       Madrid  1989
5     Ava   40  Los Angeles  1984
9   Ethan   45        Tokyo  1979


##### _*Pandas .iloc*_

##### Syntax: df.iloc[row_indexer, column_indexer] 

In [12]:
# Select the last element in the list (the slice starts at the last element, and ends at the end of the list)
single_row = df.iloc[-1:] 
single_row

Unnamed: 0,Name,Age,City,Year
9,Ethan,45,Tokyo,1979


In [13]:
# Access rows 0, 3 and 4
row_list = df.iloc[[0, 3, 4]]
print("List of Rows:")
print(row_list)
print()

# Select rows 1 to 2 (exclusive row 3) and columns 1 to 2 (exclusive column 3)
selected_data = df.iloc[1:3, 1:3]
print("Multiple Rows:")
print(selected_data)
print()

List of Rows:
     Name  Age    City  Year
0  Olivia   39  Sydney  1985
3  Sophia   18  Berlin  2006
4   Mason   28    Rome  1996

Multiple Rows:
   Age     City
1   29  Toronto
2   35   Madrid



In [14]:
# Access columns 1 and 2
column_list = df.iloc[:,[1, 2]]

print("List of Columns:")
print(column_list)
print()

# Access a specific value
specific_value = df.iloc[0, 2] # value located at the intersection of the first row and the third column 

print("Specific Value:")
print(specific_value)

List of Columns:
   Age         City
0   39       Sydney
1   29      Toronto
2   35       Madrid
3   18       Berlin
4   28         Rome
5   40  Los Angeles
6   23     New York
7   26       London
8   22        Paris
9   45        Tokyo

Specific Value:
Sydney


In [15]:
# Slice rows from position 1 to 3 
slice_rows = df.iloc[1:4]   # the position 4 is not included If you want to include the 4th position, you should use df.iloc[1:5]

print("Sliced Rows:")
print(slice_rows)
print()      

# Slice columns from position 0 to 1
slice_columns = df.iloc[:, 0:2] # same here        # Single brackets for a range of columns (slicing)

print("Sliced Columns:")
print(slice_columns)

Sliced Rows:
     Name  Age     City  Year
1    Emma   29  Toronto  1995
2    Liam   35   Madrid  1989
3  Sophia   18   Berlin  2006

Sliced Columns:
       Name  Age
0    Olivia   39
1      Emma   29
2      Liam   35
3    Sophia   18
4     Mason   28
5       Ava   40
6     Jacob   23
7  Isabella   26
8      Noah   22
9     Ethan   45


##### _*The main differences between .loc and .iloc are as follows:*_


| Basis	          | .loc ( specific labels)*      |  .iloc (specific position)        |
|-----------------|-------------------------------|-----------------------------------|
|Indexing         | Label-based indexing          |	Integer-based indexing            |
|Endpoint         |	Endpoint is included          |	Endpoint is not included          |
|Boolean indexing |	Boolean indexing is supported |	Boolean indexing is not supported |

*Integers may be used but they are interpreted as a label.

##### _*Assigning data in Pandas*_

In [16]:
df

Unnamed: 0,Name,Age,City,Year
0,Olivia,39,Sydney,1985
1,Emma,29,Toronto,1995
2,Liam,35,Madrid,1989
3,Sophia,18,Berlin,2006
4,Mason,28,Rome,1996
5,Ava,40,Los Angeles,1984
6,Jacob,23,New York,2001
7,Isabella,26,London,1998
8,Noah,22,Paris,2002
9,Ethan,45,Tokyo,1979


In [17]:
# Change value in the entire column
df['City'] = 'Dallas'
df

Unnamed: 0,Name,Age,City,Year
0,Olivia,39,Dallas,1985
1,Emma,29,Dallas,1995
2,Liam,35,Dallas,1989
3,Sophia,18,Dallas,2006
4,Mason,28,Dallas,1996
5,Ava,40,Dallas,1984
6,Jacob,23,Dallas,2001
7,Isabella,26,Dallas,1998
8,Noah,22,Dallas,2002
9,Ethan,45,Dallas,1979


In [18]:
# Assign value to the particular cells
df.iloc[:3, 2] = 'Rome'
df.iloc[-1, 2] = 'Caracas'
df

Unnamed: 0,Name,Age,City,Year
0,Olivia,39,Rome,1985
1,Emma,29,Rome,1995
2,Liam,35,Rome,1989
3,Sophia,18,Dallas,2006
4,Mason,28,Dallas,1996
5,Ava,40,Dallas,1984
6,Jacob,23,Dallas,2001
7,Isabella,26,Dallas,1998
8,Noah,22,Dallas,2002
9,Ethan,45,Caracas,1979


##### _*Append a new row to the Dataframe (assign the value to the column)*_

In [19]:
# len() calculates the number of rows and len(df.index) -> (which gives us the next available index one more than the current highest index) 
df.loc[len(df.index)] = ['Amy', 37, 'Chicago', 1987] 

print("Modified DataFrame:")
df

Modified DataFrame:


Unnamed: 0,Name,Age,City,Year
0,Olivia,39,Rome,1985
1,Emma,29,Rome,1995
2,Liam,35,Rome,1989
3,Sophia,18,Dallas,2006
4,Mason,28,Dallas,1996
5,Ava,40,Dallas,1984
6,Jacob,23,Dallas,2001
7,Isabella,26,Dallas,1998
8,Noah,22,Dallas,2002
9,Ethan,45,Caracas,1979
