## Slicing and modifying pandas data frames/series

#### Lecture agenda : 

- Slicing pandas dataframes
- Modifying pandas dataframes
- Slicing pandas data series

In [None]:
import pandas as pd
import numpy as np

#### Slicing pandas dataframe

In [None]:
# Loading dataframe from a file 

df = pd.read_csv(
    filepath_or_buffer='data/mall_customers.csv'
)

df.head()

In [None]:
# Get single element

df.loc[0, 'Gender']

In [None]:
df.loc[0:1, 'Gender':'Annual Income (k$)']

In [None]:
df.loc[[0, 1], ['Gender', 'Annual Income (k$)']]

In [None]:
# Get single row - returns pandas series

sub = df.loc[0, :]

print(type(sub))

sub

In [None]:
# Get single row - returns pandas data frame

sub = df.loc[[0], :]

print(type(sub))

sub

In [None]:
# Get multiple rows

sub_df = df.loc[[0,2], :]

sub_df

In [None]:
# Get a single column - returns pandas series

df.loc[:, 'Age']

In [None]:
# Get a single column - returns df

sub_df = df.loc[:, ['Age']]

print(type(sub_df))
sub_df

In [None]:
# Get multiple columns

sub_df = df.loc[:, ['Age', 'Gender']]

sub_df

In [None]:
# Get a single column - returns series

sub_df = df['Age']

print(type(sub_df))
sub_df

In [None]:
# Get a single column - returns df

sub_df = df[['Age']]

print(type(sub_df))
sub_df

In [None]:
# Get multiple columns

sub_df = df[['Age', 'Gender']]

sub_df

#### Slicing pandas data based on boolean expression

In [None]:
# Create new dataframe

data = {
    'A': [1, 2, 3, 4],
    'B': [5, 4, 3, 2],
    'C': ['a', 'b', 'c', 'd']
}

df = pd.DataFrame(data)

df

In [None]:
bool_list_rows = [True, False, True, False]

In [None]:
# Slice based on boolean array

cols = ['A', 'B']

sliced_df = df.loc[bool_list_rows, cols]

sliced_df

In [None]:
bool_list_cols = [True, True, False]

In [None]:
# Slice based on boolean array

sliced_df = df.loc[bool_list_rows, bool_list_cols]

sliced_df

In [None]:
# Select both, rows and cols
# by using booleans

sliced_df = df[bool_list_rows]

sliced_df


In [None]:
df['A'] > 2

In [None]:
# Slice based on single column condition

sliced_df = df[df['A'] > 2]
sliced_df

In [None]:
sliced_df = sliced_df.reset_index(drop=True)

sliced_df

#### Modifying pandas dataframe

In [None]:
# New data frame

data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 32, 18],
}

df = pd.DataFrame(data)

df

In [None]:
# Modify single value

df.loc[1, 'Name'] = 'Alex'

df

In [None]:
# Modify entire row

df.loc[1, :] = ['Steve', 42]

df


In [None]:
# Modify whole column

df['Age'] = np.array([21, 22, 23])

df

In [None]:
# Modify whole column with constant

df['Age'] = 20

df

In [None]:
# Add new columns

df['proffesion'] = ['Engineer', 'Doctor', 'Actor']

df

In [None]:
# Add new values for a slice

df.loc[[0,1], ['Age', 'proffesion']] = [[25, 'Manager'], [30, 'Surgeon']]

df

In [None]:
# Adding pandas series as new column

new_series = pd.Series([30, 34 , 50])

print(new_series)

df['Age'] = new_series

df

In [None]:
# Adding dataframe as new columns / columns

new_df = pd.DataFrame([[True, False], [True, False], [False, False]], columns=['C1', 'C2'])
new_df

In [None]:
df[['C1', 'C2']] = new_df
df

In [None]:
df[['A1', 'A2']] = new_df
df

In [None]:
new_df.index = [2,1,0]

new_df

In [None]:
df[['D1', 'D2']] = new_df
df

In [None]:
new_df.index = [20,10,0]

df[['E1', 'E2']] = new_df
df

In [None]:
new_df.index = [20,10,0]

df[['K1', 'K2']] = new_df.to_numpy()
df

### Slicing pandas data series

In [None]:
ds = pd.Series([30, 34 , 50, 33])

ds


In [None]:
ds[0]

In [None]:
ds[1:3]

In [None]:
ds[[0,2]]

In [None]:
ds[[True, False, False, True]]