# Score Cluster Model

### Load all required modules

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import display
import os 
import cx_Oracle
import seaborn as sns
import joblib
import numpy as np

from scipy import stats
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split

# Update path to where function file resides
if os.name == 'nt':
    state = !cd
    
    # Load DB Connection File from Windows Machine
    os.chdir(r'Directory Name')
    from db_connection import oracle_connection
    
    # Load function file from Windows Machine
    os.chdir(r'Directory Name')
    from general_functions import *
elif os.name == 'posix':
    state = !pwd
    
    # Load DB Connection File from Mac Machine
    os.chdir('Directory Name')
    from db_connection import oracle_connection
    
    # Load function file from Mac Machine
    os.chdir('Directory Name')
    from general_functions import *
else:
    print('No OS!')

#Change directory back to working Jupyter Notebook Directory after importing connection module
os.chdir(state[0])

pd.options.display.max_columns = None

## Create DB Connection String

In [None]:
if os.name == 'nt':
    # Update path to where config file resides
    db_creds = os.path.expanduser('~') + 'Directory Name'
    creds = oracle_connection(db_creds)

    url = creds['host'] + ":" + creds['port'] + "/" + creds['database']

    db = cx_Oracle.connect(creds['user'], creds['password'], url)

    cursor = db.cursor()
elif os.name == 'posix':
    # Update path to where config file resides
    db_creds = os.path.expanduser('~') + 'Directory Name'
    creds = oracle_connection(db_creds)

    url = creds['host'] + ":" + creds['port'] + "/" + creds['database']

    db = cx_Oracle.connect(creds['user'], creds['password'], url, encoding = 'UTF-8')
    cursor = db.cursor()
else:
    print('No OS!')

### Send query to Oracle database and return as Pandas DF


In [None]:
# Get Data for clustering process
query = """
            
        """

df = pd.read_sql(query, cursor.connection)

In [None]:
df_tr = df.copy()
df_tr = df_tr.drop(["ACCT_ID"], axis = 1)

### Data Pre-processing Steps

In [None]:
dtype_dict_value = replace_values(df_tr, 'Unknown')
df_tr.fillna(value = dtype_dict_value, inplace = True)

df_tr.head()

In [None]:
df_tr, forward_mapping_dict, inv_mapping_dict, encoder_fit = convert_cat_to_cat_lvl(df_tr, encode_method = 'Numeric')

df_tr[df_tr.isnull().any(axis = 1)]

### Read the Serialized K-Means Cluster models from disk and score new data

In [None]:
# Models
kmeans_dlm_full = joblib.load('./Model/kmeans_dlm_full_v2.0.pkl')
kmeans_dlm_pca = joblib.load('./Model/kmeans_dlm_pca_v2.0.pkl')

# Transformations
scaler_fit = joblib.load('./Model/kmeans_dlm_scaler_v2.0.pkl')
pca_fit = joblib.load('./Model/kmeans_dlm_pca_fit_v2.0.pkl')

In [None]:
df_std = df_tr.copy()

df_std = StandardScaler().fit_transform(df_std)

#pca = PCA(n_components=2)
principalComponents = pca_fit.fit_transform(df_std)
pca_df = pd.DataFrame(data = principalComponents
             , columns = ['PCA_1', 'PCA_2'])


df_tr['FULL_CLUSTER'] = kmeans_dlm_full.predict(df_tr)
df_tr['PCA_CLUSTER'] = kmeans_dlm_pca.predict(pca_df)
df_tr.head()

### Return the columns that were converted to category levels back to category values.

In [None]:
for idx, val in enumerate(inv_mapping_dict.keys()):
    col = list(inv_mapping_dict.keys())[idx]
    mapping = list(inv_mapping_dict.values())[idx]
    col_name = col.replace("_CAT", "")
    df_tr[col_name] = df_tr[col].map(mapping)

df_tr.head()

In [None]:
final_df = df.join(df_tr[['COLUMN NAMES']], lsuffix = '_ORIG', rsuffix = '_TEST')
final_df.drop(final_df.columns.difference(['COLUMN NAMES']), axis = 1, inplace = True)
final_df.rename(columns = {'COLUMN NAMES'}, inplace = True)
final_df.head()

## Write data back to Oracle Database

In [None]:
# Drop target table

drop_table_sql = """
"""

cursor.execute(drop_table_sql)

In [None]:
# Create target table
create_table_sql = """
"""

cursor.execute(create_table_sql)

In [None]:
# insert into target table

records = [tuple(x) for x in final_df.values]
cursor.executemany('''''', records)
db.commit()