[View in Colaboratory](https://colab.research.google.com/github/kalyanpichuka/Complaints_Framework/blob/master/Complaints_Framework_Utilities.ipynb)

In [0]:
# IMPORTING NECESSARY PACKAGES
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from collections import OrderedDict
from pgmpy.models import BayesianModel
from pgmpy.factors.discrete import TabularCPD
from pgmpy.estimators import ParameterEstimator
from pgmpy.estimators import BayesianEstimator
from pgmpy.inference import VariableElimination

In [0]:
!pip install pgmpy

Collecting pgmpy
[?25l  Downloading https://files.pythonhosted.org/packages/96/9c/4b1e07564d8160838d0472728746f3ea3725ced41e43ac05486a328ee78e/pgmpy-0.1.6.tar.gz (218kB)
[K    100% |████████████████████████████████| 225kB 5.3MB/s 
Building wheels for collected packages: pgmpy
  Running setup.py bdist_wheel for pgmpy ... [?25l- \ | done
[?25h  Stored in directory: /root/.cache/pip/wheels/5e/5d/c5/81dd9fc173c4b56cc6f38b943d3d73b81f1096d67c52ae278a
Successfully built pgmpy
Installing collected packages: pgmpy
Successfully installed pgmpy-0.1.6


In [0]:
!pip install wrapt

Collecting wrapt
  Downloading https://files.pythonhosted.org/packages/a0/47/66897906448185fcb77fc3c2b1bc20ed0ecca81a0f2f88eda3fc5a34fc3d/wrapt-1.10.11.tar.gz
Building wheels for collected packages: wrapt
  Running setup.py bdist_wheel for wrapt ... [?25l- \ | done
[?25h  Stored in directory: /root/.cache/pip/wheels/48/5d/04/22361a593e70d23b1f7746d932802efe1f0e523376a74f321e
Successfully built wrapt
Installing collected packages: wrapt
Successfully installed wrapt-1.10.11


In [0]:
# THIS INCLUDES ALL THE UTILITIES REQUIRED FOR THE COMPLAINTS FRAMEWORK MODEL
!apt-get install -y -qq software-properties-common python-software-properties module-init-tools
!add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
!apt-get update -qq 2>&1 > /dev/null
!apt-get -y install -qq google-drive-ocamlfuse fuse
from google.colab import auth
auth.authenticate_user()
from oauth2client.client import GoogleCredentials
creds = GoogleCredentials.get_application_default()
import getpass
!google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL
vcode = getpass.getpass()
!echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}

Preconfiguring packages ...
Selecting previously unselected package cron.
(Reading database ... 18408 files and directories currently installed.)
Preparing to unpack .../00-cron_3.0pl1-128ubuntu5_amd64.deb ...
Unpacking cron (3.0pl1-128ubuntu5) ...
Selecting previously unselected package libapparmor1:amd64.
Preparing to unpack .../01-libapparmor1_2.11.0-2ubuntu17.1_amd64.deb ...
Unpacking libapparmor1:amd64 (2.11.0-2ubuntu17.1) ...
Selecting previously unselected package libdbus-1-3:amd64.
Preparing to unpack .../02-libdbus-1-3_1.10.22-1ubuntu1_amd64.deb ...
Unpacking libdbus-1-3:amd64 (1.10.22-1ubuntu1) ...
Selecting previously unselected package dbus.
Preparing to unpack .../03-dbus_1.10.22-1ubuntu1_amd64.deb ...
Unpacking dbus (1.10.22-1ubuntu1) ...
Selecting previously unselected package dirmngr.
Preparing to unpack .../04-dirmngr_2.1.15-1ubuntu8.1_amd64.deb ...
Unpacking dirmngr (2.1.15-1ubuntu8.1) ...
Selecting previously unselected package distro-info-data.
Preparing to unpack .

In [0]:
#GOOGLE DRIVE RELATED
!mkdir -p drive
!google-drive-ocamlfuse drive

In [0]:
data = pd.read_csv("drive/Complaints_Dataset/clusteredComplaintsDataSet_latest.csv")

In [0]:
data.__delitem__('ID')
data.__delitem__('Time Difference')
data.__delitem__('Class cluster')
data.__delitem__('Count')

In [0]:
model = BayesianModel([('City', 'Issue'), ('Submitted via', 'Issue'), ('Company response to consumer','Issue')])

In [0]:
model.fit(data, estimator=BayesianEstimator, prior_type="BDeu")

In [0]:
issue_inference = VariableElimination(model)

In [0]:
prob_issue = issue_inference.query(variables=['Issue'])

In [0]:
#print(prob_issue['Issue'])

In [0]:
prob_issue['Issue'].__dict__['values'].argsort()[-1:-4:-1]

array([7, 8, 0])

In [0]:
city_dict = {}
submitted_via_dict = {}
company_response_dict = {}
issue_dict = {}

In [0]:
city_list = sorted(list(data['City'].unique()))
submitted_via_list = sorted(list(data['Submitted via'].unique()))
company_response_list = sorted(list(data['Company response to consumer'].unique()))
issue_list = sorted(list(data['Issue'].unique()))

In [0]:
city_count = len(city_list)
submitted_via_count = len(submitted_via_list)
company_response_count = len(company_response_list)
issue_count = len(issue_list)

In [0]:
i=0
for city in city_list:
  city_dict[city] = i
  i += 1

In [0]:
i=0
for submitted_via in submitted_via_list:
  submitted_via_dict[submitted_via] = i
  i += 1

In [0]:
i=0
for company_response in company_response_list:
  company_response_dict[company_response] = i
  i += 1

In [0]:
i=0
for issue in issue_list:
  issue_dict[issue] = i
  i += 1

In [0]:
issue_dict 

{'Application, originator, mortgage broker': 0,
 'Applying for a mortgage': 1,
 'Applying for a mortgage or refinancing an existing mortgage': 2,
 'Closing on a mortgage': 3,
 'Credit decision / Underwriting': 4,
 'Improper use of your report': 5,
 'Incorrect information on your report': 6,
 'Loan modification,collection,foreclosure': 7,
 'Loan servicing, payments, escrow account': 8,
 'Other': 9,
 "Problem with a credit reporting company's investigation into an existing problem": 10,
 'Settlement process and costs': 11,
 'Struggling to pay mortgage': 12,
 'Trouble during payment process': 13,
 'Unable to get your credit report or credit score': 14}

In [0]:
lValue = 7
lKey = [key for key, value in issue_dict.items() if value == lValue][0]
lKey

'Loan modification,collection,foreclosure'

In [0]:
#RETRIVING KEYS RELATED TO TOP MAX VALUES
def _get_max_value_keys(array,res_dict):
  result=[]
  for element in array:
    result.append([key for key, value in res_dict.items() if value == element][0])
  return result

In [0]:
_get_max_value_keys(np.array([2,7,8]),issue_dict)

['Applying for a mortgage or refinancing an existing mortgage',
 'Loan modification,collection,foreclosure',
 'Loan servicing, payments, escrow account']

In [0]:
submitted_via_dict

{'Email': 0, 'Fax': 1, 'Phone': 2, 'Postal mail': 3, 'Referral': 4, 'Web': 5}

In [0]:
prob_issue_multiple = issue_inference.query( variables = ['Issue'], 
                                        evidence = {'City':city_dict['Miami'],'Submitted via':submitted_via_dict['Phone'],'Company response to consumer':company_response_dict['Closed without relief']})

In [0]:
prob_issue_multiple['Issue'].__dict__['values'].argsort()[-1:-4:-1]

array([ 8,  7, 14])

In [0]:
prob_issue_multiple['Issue'].__dict__

{'cardinality': array([15]),
 'state_names': None,
 'values': array([7.68181905e-07, 7.68181905e-07, 7.68181905e-07, 7.68181905e-07,
        7.68181905e-07, 7.68181905e-07, 7.68181905e-07, 4.99995007e-01,
        4.99995007e-01, 7.68181905e-07, 7.68181905e-07, 7.68181905e-07,
        7.68181905e-07, 7.68181905e-07, 7.68181905e-07]),
 'variables': ['Issue']}

In [0]:
# RETRIVING ISSUES BASED ON THE CITY,SUBMITTED VIA,COMPANY RESPONSE

def _get_issues_(city,submitted_via,company_response):
  result=[]
  prob_issue_multiple = issue_inference.query( variables = ['Issue'],evidence = {'City':city_dict[city],'Submitted via':submitted_via_dict[submitted_via],'Company response to consumer':company_response_dict[company_response]})
  result = _get_max_value_keys(prob_issue_multiple['Issue'].__dict__['values'].argsort()[-1:-4:-1],issue_dict)
  return result  
  

In [0]:
# RETRIVING ISSUES BASED ON THE CITY,SUBMITTED VIA,COMPANY RESPONSE

def _get_issues_latest_(city,submitted_via,company_response):
  result=[[],[]]
  prob_issue_multiple = issue_inference.query( variables = ['Issue'],evidence = {'City':city_dict[city],'Submitted via':submitted_via_dict[submitted_via],'Company response to consumer':company_response_dict[company_response]})
  prob_values = prob_issue_multiple['Issue'].__dict__['values']
  #print(prob_values)
  #prob_sum = np.sum(prob_values)
  #print(prob_sum)
  max_value_indices = prob_values.argsort()[-1:-4:-1]
  #print(max_value_indices)
  result[0] = _get_max_value_keys(max_value_indices,issue_dict)
  result[1] = [prob_values[index] for index in max_value_indices]
  return result  
  

In [0]:
{'City':city_dict['Miami'],'Submitted via':submitted_via_dict['Phone'],'Company response to consumer':company_response_dict['Closed without relief']}

{'City': 2520, 'Company response to consumer': 5, 'Submitted via': 2}

In [0]:
_get_issues_('Miami','Phone','Closed without relief')

['Loan servicing, payments, escrow account',
 'Loan modification,collection,foreclosure',
 'Unable to get your credit report or credit score']

In [0]:
_get_issues_latest_('Miami','Referral','Closed without relief')

[['Loan modification,collection,foreclosure',
  'Loan servicing, payments, escrow account',
  'Unable to get your credit report or credit score'],
 [0.6666620575575305, 0.3333312848403838, 5.121232373358323e-07]]