In [3]:
from data_transform_classes import DataTransforming
import pandas as pd
file_path = 'loan_payments2.csv'
df = pd.read_csv(file_path)

In [4]:
#%run DataTransform.py (Magic Method)

# **Converting Columns to the Correct Format**

## *Converting to Numeric:*

In [5]:
print(df['term'])

0        36 months
1        36 months
2        36 months
3        36 months
4        36 months
           ...    
54226    36 months
54227    36 months
54228    36 months
54229    36 months
54230    36 months
Name: term, Length: 54231, dtype: object


In [6]:
df['term'] = df['term'].str.replace('months', '')
dt = DataTransforming.to_numeric(df,'term')
print("\nDataFrame after conversion:")
print(df['term'])


DataFrame after conversion:
0        36.0
1        36.0
2        36.0
3        36.0
4        36.0
         ... 
54226    36.0
54227    36.0
54228    36.0
54229    36.0
54230    36.0
Name: term, Length: 54231, dtype: float64


## *Converting to Category*:

In [7]:
print(df[['grade','sub_grade','verification_status','loan_status','purpose','employment_length','home_ownership']])

      grade sub_grade verification_status  \
0         A        A4        Not Verified   
1         A        A3        Not Verified   
2         A        A4     Source Verified   
3         C        C4     Source Verified   
4         A        A1            Verified   
...     ...       ...                 ...   
54226     B        B2        Not Verified   
54227     C        C2        Not Verified   
54228     A        A2        Not Verified   
54229     A        A2        Not Verified   
54230     C        C5        Not Verified   

                                             loan_status             purpose  \
0                                                Current         credit_card   
1                                                Current         credit_card   
2                                             Fully Paid         credit_card   
3                                             Fully Paid  debt_consolidation   
4                                                Current  d

In [8]:
dt = DataTransforming.to_category_codes(df,'grade')
print("\nDataFrame after conversion:")
print(df['grade'])


DataFrame after conversion:
0        1
1        1
2        1
3        3
4        1
        ..
54226    2
54227    3
54228    1
54229    1
54230    3
Name: grade, Length: 54231, dtype: int8


In [9]:
dt = DataTransforming.to_category(df,'sub_grade')
print("\nDataFrame after conversion:")
print(df['sub_grade'])


DataFrame after conversion:
0        A4
1        A3
2        A4
3        C4
4        A1
         ..
54226    B2
54227    C2
54228    A2
54229    A2
54230    C5
Name: sub_grade, Length: 54231, dtype: category
Categories (35, object): ['A1', 'A2', 'A3', 'A4', ..., 'G2', 'G3', 'G4', 'G5']


In [10]:
dt = DataTransforming.to_category(df,'verification_status')
print("\nDataFrame after conversion:")
print(df['verification_status'])


DataFrame after conversion:
0           Not Verified
1           Not Verified
2        Source Verified
3        Source Verified
4               Verified
              ...       
54226       Not Verified
54227       Not Verified
54228       Not Verified
54229       Not Verified
54230       Not Verified
Name: verification_status, Length: 54231, dtype: category
Categories (3, object): ['Not Verified', 'Source Verified', 'Verified']


In [11]:
dt = DataTransforming.to_category(df,'loan_status')
print("\nDataFrame after conversion:")
print(df['loan_status'])


DataFrame after conversion:
0                                                  Current
1                                                  Current
2                                               Fully Paid
3                                               Fully Paid
4                                                  Current
                               ...                        
54226                                           Fully Paid
54227                                           Fully Paid
54228    Does not meet the credit policy. Status:Fully ...
54229                                           Fully Paid
54230    Does not meet the credit policy. Status:Charge...
Name: loan_status, Length: 54231, dtype: category
Categories (9, object): ['Charged Off', 'Current', 'Default', 'Does not meet the credit policy. Status:Charg..., ..., 'Fully Paid', 'In Grace Period', 'Late (16-30 days)', 'Late (31-120 days)']


In [12]:
dt = DataTransforming.to_category(df,'purpose')
print("\nDataFrame after conversion:")
print(df['purpose'])


DataFrame after conversion:
0               credit_card
1               credit_card
2               credit_card
3        debt_consolidation
4        debt_consolidation
                ...        
54226                 other
54227                 other
54228    debt_consolidation
54229                 house
54230                 other
Name: purpose, Length: 54231, dtype: category
Categories (14, object): ['car', 'credit_card', 'debt_consolidation', 'educational', ..., 'renewable_energy', 'small_business', 'vacation', 'wedding']


In [13]:
dt = DataTransforming.to_category(df,'home_ownership')
print("\nDataFrame after conversion:")
print(df['home_ownership'])


DataFrame after conversion:
0        MORTGAGE
1            RENT
2        MORTGAGE
3            RENT
4        MORTGAGE
           ...   
54226    MORTGAGE
54227        RENT
54228    MORTGAGE
54229        RENT
54230    MORTGAGE
Name: home_ownership, Length: 54231, dtype: category
Categories (5, object): ['MORTGAGE', 'NONE', 'OTHER', 'OWN', 'RENT']


In [14]:
dt = DataTransforming.to_category(df,'employment_length')
print("\nDataFrame after conversion:")
print(df['employment_length'])


DataFrame after conversion:
0          5 years
1          9 years
2          8 years
3           1 year
4        10+ years
           ...    
54226       1 year
54227     < 1 year
54228    10+ years
54229      4 years
54230      9 years
Name: employment_length, Length: 54231, dtype: category
Categories (11, object): ['1 year', '10+ years', '2 years', '3 years', ..., '7 years', '8 years', '9 years', '< 1 year']


## *Converting to DateTime:*

In [15]:
print(df[['issue_date','earliest_credit_line','last_payment_date','next_payment_date','last_credit_pull_date']])

      issue_date earliest_credit_line last_payment_date next_payment_date  \
0       Jan-2021             Oct-1987          Jan-2022          Feb-2022   
1       Jan-2021             Sep-2001          Jan-2022          Feb-2022   
2       Jan-2021             Sep-1998          Oct-2021               NaN   
3       Jan-2021             Jun-2008          Jun-2021               NaN   
4       Jan-2021             Apr-2002          Jan-2022          Feb-2022   
...          ...                  ...               ...               ...   
54226   Jul-2013             Apr-2003          Jul-2016               NaN   
54227   Oct-2013             Jan-1999          Oct-2016               NaN   
54228   Aug-2013             Feb-1984          Sep-2016          Sep-2016   
54229   Aug-2013             Mar-1995          Mar-2014               NaN   
54230   Jul-2013             Oct-1998          Mar-2014          Dec-2014   

      last_credit_pull_date  
0                  Jan-2022  
1              

In [16]:
dt = DataTransforming.to_datetime(df,'issue_date')
print("\nDataFrame after conversion:")
print(df['issue_date'])


DataFrame after conversion:
0       2021-01-01
1       2021-01-01
2       2021-01-01
3       2021-01-01
4       2021-01-01
           ...    
54226   2013-07-01
54227   2013-10-01
54228   2013-08-01
54229   2013-08-01
54230   2013-07-01
Name: issue_date, Length: 54231, dtype: datetime64[ns]


In [17]:
dt = DataTransforming.to_datetime(df,'earliest_credit_line')
print("\nDataFrame after conversion:")
print(df['earliest_credit_line'])


DataFrame after conversion:
0       1987-10-01
1       2001-09-01
2       1998-09-01
3       2008-06-01
4       2002-04-01
           ...    
54226   2003-04-01
54227   1999-01-01
54228   1984-02-01
54229   1995-03-01
54230   1998-10-01
Name: earliest_credit_line, Length: 54231, dtype: datetime64[ns]


In [18]:
dt = DataTransforming.to_datetime(df,'last_payment_date')
print("\nDataFrame after conversion:")
print(df['last_payment_date'])


DataFrame after conversion:
0       2022-01-01
1       2022-01-01
2       2021-10-01
3       2021-06-01
4       2022-01-01
           ...    
54226   2016-07-01
54227   2016-10-01
54228   2016-09-01
54229   2014-03-01
54230   2014-03-01
Name: last_payment_date, Length: 54231, dtype: datetime64[ns]


In [19]:
dt = DataTransforming.to_datetime(df,'next_payment_date')
print("\nDataFrame after conversion:")
print(df['next_payment_date'])


DataFrame after conversion:
0       2022-02-01
1       2022-02-01
2              NaT
3              NaT
4       2022-02-01
           ...    
54226          NaT
54227          NaT
54228   2016-09-01
54229          NaT
54230   2014-12-01
Name: next_payment_date, Length: 54231, dtype: datetime64[ns]


In [20]:
dt = DataTransforming.to_datetime(df,'last_credit_pull_date')
print("\nDataFrame after conversion:")
print(df['last_credit_pull_date'])


DataFrame after conversion:
0       2022-01-01
1       2022-01-01
2       2021-10-01
3       2021-06-01
4       2022-01-01
           ...    
54226   2016-07-01
54227   2016-09-01
54228   2013-05-01
54229   2013-05-01
54230   2015-01-01
Name: last_credit_pull_date, Length: 54231, dtype: datetime64[ns]


In [21]:
df['payment_plan'] = df['payment_plan'].str.replace('y', 'Yes')
df['payment_plan'] = df['payment_plan'].str.replace('n', 'No')
dt = DataTransforming.to_string(df,'last_credit_pull_date')
print("\nDataFrame after conversion:")
print(df['payment_plan'])


DataFrame after conversion:
0        No
1        No
2        No
3        No
4        No
         ..
54226    No
54227    No
54228    No
54229    No
54230    No
Name: payment_plan, Length: 54231, dtype: object


## *Saving to a new csv file:*

In [22]:
df.to_csv('altered_loan_payments2.csv', index=False)