### Pandas - Modifying a DataFrame.
- Adding columns to a DataFrame.
- Using lambda functions to calculate complex quantities.
- Renaming columns.

In [1]:
# At the beginning, we're gonna create a new DataFrame useful for next examples.
import pandas as pd
df = pd.DataFrame({
    'id_user': [10,20,30,40,50,60],
    'name_user': ['Peter', 'Jumia', 'Donald', 'Erika', 'John', 'Ron'],
    'age_user': [23,45,18,33,63, 39],
    'score_user' : [123, 6453, 8, 987, 2345, 9847]
}, columns = ['id_user', 'name_user', 'age_user', 'score_user'])
df

Unnamed: 0,id_user,name_user,age_user,score_user
0,10,Peter,23,123
1,20,Jumia,45,6453
2,30,Donald,18,8
3,40,Erika,33,987
4,50,John,63,2345
5,60,Ron,39,9847


In [2]:
# Add a first column:
# We input a boolean list…
df['win_bonus'] = [True, False, False, True, True, False]
df

Unnamed: 0,id_user,name_user,age_user,score_user,win_bonus
0,10,Peter,23,123,True
1,20,Jumia,45,6453,False
2,30,Donald,18,8,False
3,40,Erika,33,987,True
4,50,John,63,2345,True
5,60,Ron,39,9847,False


In [3]:
# Add a second column:
# We input a unique value…
df['state'] = 'Active'
df

Unnamed: 0,id_user,name_user,age_user,score_user,win_bonus,state
0,10,Peter,23,123,True,Active
1,20,Jumia,45,6453,False,Active
2,30,Donald,18,8,False,Active
3,40,Erika,33,987,True,Active
4,50,John,63,2345,True,Active
5,60,Ron,39,9847,False,Active


In [4]:
# Add an other column:
# We input a calculation with a lambda function…
# DON'T UNDERSTAND TO PUT ARGUMENT axis = 1 (apply function to each row) !!!
# axis = 0 by default (apply function to each column)
df['point_value']  = df.apply(lambda row: row.score_user / 100 if row.win_bonus == False else row.score_user, axis = 1)
df['point_value'] = df['point_value'].astype('int')
df

Unnamed: 0,id_user,name_user,age_user,score_user,win_bonus,state,point_value
0,10,Peter,23,123,True,Active,123
1,20,Jumia,45,6453,False,Active,64
2,30,Donald,18,8,False,Active,0
3,40,Erika,33,987,True,Active,987
4,50,John,63,2345,True,Active,2345
5,60,Ron,39,9847,False,Active,98


In [5]:
# Apply a Lambda to a Row: "like above"
# We can apply without specifying a single column and add the argument axis=1, 
# the input to our lambda function will be an entire row, not a column.
# To access particular values of the row, we use the syntax row.column_name or row[‘column_name’].

# We can create this column using a lambda function and the keyword axis=1:
df['user_prime'] = df.apply(lambda row: row.point_value * 1.8 \
                           if row.age_user > 30 \
                           else 0, axis=1)
df

Unnamed: 0,id_user,name_user,age_user,score_user,win_bonus,state,point_value,user_prime
0,10,Peter,23,123,True,Active,123,0.0
1,20,Jumia,45,6453,False,Active,64,115.2
2,30,Donald,18,8,False,Active,0,0.0
3,40,Erika,33,987,True,Active,987,1776.6
4,50,John,63,2345,True,Active,2345,4221.0
5,60,Ron,39,9847,False,Active,98,176.4


In [6]:
# Add a column:
# With a simple calculation.
df['score_user_x10'] = df.score_user * 10
df

Unnamed: 0,id_user,name_user,age_user,score_user,win_bonus,state,point_value,user_prime,score_user_x10
0,10,Peter,23,123,True,Active,123,0.0,1230
1,20,Jumia,45,6453,False,Active,64,115.2,64530
2,30,Donald,18,8,False,Active,0,0.0,80
3,40,Erika,33,987,True,Active,987,1776.6,9870
4,50,John,63,2345,True,Active,2345,4221.0,23450
5,60,Ron,39,9847,False,Active,98,176.4,98470


In [7]:
# How to modify only one column?
# For example Series.str.upper()
# Convert strings in the Series/Index to uppercase. 
df['name_user'].str.upper()

0     PETER
1     JUMIA
2    DONALD
3     ERIKA
4      JOHN
5       RON
Name: name_user, dtype: object

In [8]:
# Review lambda!
# Before lambda it was like this, a classic function:
def myfunction(var):
    if var > 100:
        return "We have a high number"
    else:
        return "Game over…"
        
# With lambda it'll be faster:
lambdafunction = lambda var: "We have a high number" \
             if var > 100 \
             else "Game over…"


In [9]:
# Rename all Columns:
# Change all of the column names at once by setting the .columns property to a different list.
df.columns = ["id_users", "name_users", "age_users", "score_users", "win_bonus", "state", "point_values", "users_prime", "score_users_x10"]
df

Unnamed: 0,id_users,name_users,age_users,score_users,win_bonus,state,point_values,users_prime,score_users_x10
0,10,Peter,23,123,True,Active,123,0.0,1230
1,20,Jumia,45,6453,False,Active,64,115.2,64530
2,30,Donald,18,8,False,Active,0,0.0,80
3,40,Erika,33,987,True,Active,987,1776.6,9870
4,50,John,63,2345,True,Active,2345,4221.0,23450
5,60,Ron,39,9847,False,Active,98,176.4,98470


In [10]:
# Rename indvidual Column:
# .rename method
# Using inplace=True lets us edit the original DataFrame.
df.rename(columns = {'state' : 'state_users', 'id_users' : 'id_user'}, inplace=True)
df

Unnamed: 0,id_user,name_users,age_users,score_users,win_bonus,state_users,point_values,users_prime,score_users_x10
0,10,Peter,23,123,True,Active,123,0.0,1230
1,20,Jumia,45,6453,False,Active,64,115.2,64530
2,30,Donald,18,8,False,Active,0,0.0,80
3,40,Erika,33,987,True,Active,987,1776.6,9870
4,50,John,63,2345,True,Active,2345,4221.0,23450
5,60,Ron,39,9847,False,Active,98,176.4,98470
