# Import the necessary functions and classes.

In [1]:
from db_connector.py import open_table

from data_transform.py import DataTransform

from data_frame_info.py import DataFrameInfo

from plotter.py import plotter

from data_frame_transform.py import DataFrameTransform

ModuleNotFoundError: No module named 'db_connector.py'; 'db_connector' is not a package

# Retrieve table.

In [None]:
raw_table = open_table()

# Call methods and classes.

In [None]:
transform_call = DataTransform(raw_table)

# Remove strings from the 'term' column.
transform_call.remove_term_column_strings()

# Convert all date columns to datetime format.
transform_call.iterate_through_columns()

# Initialise a DataFrameInfo call with the raw dataframe.
find_info = DataFrameInfo(raw_table)

# Create table of percentage null values in the raw dataframe.
null_percentages_table = find_info.percentage_null_values()


# Sort data based on the data information.

In [None]:
# Columns with null values:
all_null_columns = ['mths_since_last_record', 'mths_since_last_major_derog', 'next_payment_date', 'mths_since_last_delinq', 'employment_length', 'last_payment_date', 'last_credit_pull_date','term', 'int_rate', 'funded_amount', 'collections_12_mths_ex_med']

 # Columns with >50% null values:
highest_null_proportion_columns = ['mths_since_last_record', 'mths_since_last_major_derog', 'next_payment_date', 'mths_since_last_delinq']

# Categorical columns:
categorical_columns = ['employment_length', 'term'] # Contains null values

# Date columns
date_columns = ['last_payment_date', 'last_credit_pull_date'] # Contains null values

# Columns with <10% null values:
low_null_columns = ['int_rate', 'funded_amount', 'last_payment_date', 'last_credit_pull_date', 'collections_12_mths_ex_med']


# Assess skewness of columns with low percentage null values.

In [None]:
find_info.column_skew(low_null_columns)

# Sort the low percentage null columns by skewness.

In [None]:
# Columns with <1 skew:
low_skew_columns = ['int_rate', 'funded_amount']

# Columns with >1 skew:
high_skew_columns = ['collections_12_mths_ex_med']

# Initialise a dataframe transform call.

In [None]:
data_frame_transform_call = DataFrameTransform(raw_table, null_percentages_table, highest_null_proportion_columns, low_skew_columns, categorical_columns, high_skew_columns, date_columns)

# Remove the null values in the dataframe.

In [None]:
# Drop all columns with >50% null values.
data_frame_transform_call.drop_columns()

# Impute null values in categorical columns with the mode.
data_frame_transform_call.impute_with_mode()

# Impute null values in columns with >1 skew with the median.
data_frame_transform_call.impute_with_median()

# Drop rows with null values in columns with <1% null values.
data_frame_transform_call.drop_rows()

# Impute remaining null values in columns with <10% null values with the mean.
no_null_table = data_frame_transform_call.impute_with_mean()

# Visualise missing values before and after removal of null values.

In [None]:
# Call the plotter class with the raw data frame and produce matrix.
visualise = Plotter(raw_table)
visualise.missing_data()

# Call the plotter class with the null-removed
visualise = Plotter(no_null_table)
visualise.missing_data()

# Assess dataframe after null removal.

In [None]:
# Call DataFrameInfo class with latest dataframe.
new_info = DataFrameInfo(no_null_table)

# View data types within new dataframe.
new_table_data_types = new_info.find_column_types()

# Assess skew of new dataframe.
all_skew = new_info.column_skew()

# Visualise the skewness of the dataframe.
new_info.get_histogram()

# List all column names in the dataframe.
all_column_names = list(no_null_table)

# Call the DataFrameTransform class after removal of nulls in the dataframe.
transform_post_null_removal = DataFrameTransform(raw_table, null_percentages_table, highest_null_proportion_columns, low_skew_columns, categorical_columns, high_skew_columns, date_columns, all_skew)

# Call the DataFrameTransform class after removal of nulls in the dataframe.
transform_post_null_removal = DataFrameTransform(raw_table, null_percentages_table, highest_null_proportion_columns, low_skew_columns, categorical_columns, high_skew_columns, date_columns, all_skew)

transform_post_null_removal.boxcox_transform_skew()