In [1]:
import pandas as pd
pd.options.mode.chained_assignment = None 
import numpy as np
from pandas.testing import assert_frame_equal

In [2]:
df_fintech = pd.read_csv("clean_fintech.csv")
df_fintech.head()

Unnamed: 0,user_id,churn,age,credit_score,deposits,withdrawal,purchases_partners,purchases,cc_taken,cc_recommended,...,registered_phones,payment_type,waiting_4_loan,cancelled_loan,received_loan,rejected_loan,zodiac_sign,rewards_earned,reward_rate,is_referred
0,1,False,21.0,577.0,48,4,52,45,0,245,...,2,Semi-Monthly,False,False,False,False,Pisces,56.0,1.87,False
1,8,True,31.0,519.0,0,0,0,0,0,49,...,0,Bi-Weekly,False,False,False,False,Virgo,18.0,0.6,True
2,9,False,26.0,542.5156,0,0,30,0,0,56,...,2,Weekly,False,False,False,False,Sagittarius,23.0,0.77,False
3,10,False,33.0,558.0,0,0,76,0,0,166,...,0,Bi-Weekly,False,False,False,False,Leo,45.0,1.5,True
4,11,False,26.0,559.0,0,0,206,0,0,304,...,0,Bi-Weekly,False,False,False,False,Virgo,60.0,2.0,True


## Transformations to perform:
- Multiply all numeric columns * 2.
- Delete the letter "e" from all str columns.
- Set all bool variables to True.
- Create 3 extra numeric columns:
    - Mean of purchases.
    - Median of age.
    - Mean of credit_score.

In [3]:
df_fintech_2 = df_fintech[['age','credit_score','purchases','zodiac_sign','payment_type','churn','cancelled_loan','received_loan']].copy()
df_fintech_2.age = df_fintech_2.age.astype(np.int64)
df_fintech_3 = df_fintech_2.copy()

In [4]:
df_fintech_2.dtypes

age                 int64
credit_score      float64
purchases           int64
zodiac_sign        object
payment_type       object
churn                bool
cancelled_loan       bool
received_loan        bool
dtype: object

In [5]:
def transform_bool(df):
    for c in df.select_dtypes(include=['bool']):
        df[c] = True
    return df

def transform_str(df):
    for c in df.select_dtypes(include=['object']):
        df[c] = df[c].str.replace("e","")
    return df

def transform_numeric(df):
    for c in df.select_dtypes(include=['number']):
        df[c] = [i*2 for i in df[c]]
    return df

def transform_extracols(df):
    df['purchases_mean'] = df.purchases.mean().astype(np.int64)
    df['age_median'] = df.age.median()
    df['score_mean'] = df.credit_score.mean()

In [6]:
transform_str(df_fintech_2)
transform_numeric(df_fintech_2)
transform_bool(df_fintech_2)
transform_extracols(df_fintech_2)

In [7]:
df_fintech_2.dtypes

age                 int64
credit_score      float64
purchases           int64
zodiac_sign        object
payment_type       object
churn                bool
cancelled_loan       bool
received_loan        bool
purchases_mean      int64
age_median        float64
score_mean        float64
dtype: object

In [8]:
df_fintech_2.head()

Unnamed: 0,age,credit_score,purchases,zodiac_sign,payment_type,churn,cancelled_loan,received_loan,purchases_mean,age_median,score_mean
0,42,1154.0,90,Piscs,Smi-Monthly,True,True,True,6,60.0,1085.152626
1,62,1038.0,0,Virgo,Bi-Wkly,True,True,True,6,60.0,1085.152626
2,52,1085.0312,0,Sagittarius,Wkly,True,True,True,6,60.0,1085.152626
3,66,1116.0,0,Lo,Bi-Wkly,True,True,True,6,60.0,1085.152626
4,52,1118.0,0,Virgo,Bi-Wkly,True,True,True,6,60.0,1085.152626


---

In [9]:
# Previously define expected
# Apply transform df_orig inside the assert function.
def assert_transform(df_orig):
    
    #transform orig
    df_orig.pipe(transform_str).pipe(transform_numeric).pipe(transform_bool).pipe(transform_extracols)
    
    #expected df
    df_expected = pd.DataFrame({
    'age':[42,62,52,66,52],
    'credit_score':[1154.0000,1038.0000,1085.0312,1116.0000,1118.0000],
    'purchases':[90,0,0,0,0],
    'zodiac_sign':['Piscs','Virgo','Sagittarius','Lo','Virgo'],
    'payment_type':['Smi-Monthly','Bi-Wkly','Wkly','Bi-Wkly','Bi-Wkly'],
    'churn':[True,True,True,True,True],
    'cancelled_loan':[True,True,True,True,True],
    'received_loan':[True,True,True,True,True],
    'purchases_mean':[18,18,18,18,18],
    'age_median':[52.0,52.0,52.0,52.0,52.0],
    'score_mean':[1102.20624,1102.20624,1102.20624,1102.20624,1102.20624]})
    
    assert_frame_equal(df_expected, df_orig)

In [10]:
assert_transform(df_fintech_3.head())