In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import numpy as np
%matplotlib inline

In [None]:
csv = pd.read_csv('data/Crisis_2020_redacted.csv', encoding='ISO-8859-1', low_memory=False)

In [None]:
csv.head()

In [None]:
repeat_callers = pd.read_csv('data/repeat_callers.csv')
repeat_callers = repeat_callers.drop(columns = ['Unnamed: 0'])
repeat_callers.head(25)

In [None]:
allnull = csv.isnull().sum() == 22326
coldict = allnull.to_dict()
cols = list(csv.columns.values)
null_list = []
for value in cols:
    if coldict[value] == True:
        null_list += [value]
null_list

In [None]:
csv.drop(columns=null_list)

In [None]:
display("The column headers :")
display(list(csv.columns.values))



In [None]:
crisis_cols = csv.columns.str.contains('crisis',
                                        case=False)

In [None]:
crisis_indices = [i for i, col in enumerate(crisis_cols) if col]
csv_crisis = csv.iloc[:, np.r_[6, crisis_indices]]
csv_crisis.head()

In [None]:
csv_crisis.shape

In [None]:
csv_crisis['CallerNum'].value_counts()

In [None]:
crisis_issues_cols = csv.columns.str.contains('crisis issues',
                                        case=False)

crisis_issues_indices = [i for i, col in enumerate(crisis_issues_cols) if col]
csv_crisis_issues = csv.iloc[:, np.r_[0:16, crisis_issues_indices]]
csv_crisis_issues.head()

In [None]:
csv_crisis_issues['CallerNum'].astype(int)

In [None]:
csv_crisis_issues.dtypes

In [None]:
pd.DataFrame(csv_crisis_issues)

In [None]:
top_caller = csv_crisis_issues.loc[csv_crisis_issues['CallerNum'] == 2270935]
top_caller.head()

In [None]:
csv_crisis_issues[['Date','Time']] = csv_crisis_issues['CallDateAndTimeStart'].str.split(" ",n=1, expand=True)
csv_crisis_issues.head()


In [None]:
issues = csv_crisis_issues[[c for c in csv_crisis_issues.columns if 'CRISIS Issues' in c]]

In [None]:
emotional_state = issues['CRISIS Issues - Emotional State']

In [None]:
emotional_state_list = emotional_state.str.split('; ')


In [None]:
list(set(emotional_state_list.dropna().sum()))

In [None]:
def get_unique_issues(col_name, df, sep = '; '):
    column = df[col_name]
    column_list = column.dropna().astype(str).str.split(sep)
    unique_list = list(set(column_list.sum()))
    return unique_list


In [None]:
get_unique_issues('CRISIS Issues - Emotional State', csv_crisis_issues)

In [None]:
issues = issues.drop('CRISIS Issues - Call Problems (specify at least 2)', axis = 1)

In [None]:
#need to figure out how to get all the lists from each column
unique_cols_list = []
for c in issues.columns:
    print(c)
    unique_cols_list.append(get_unique_issues(c, issues))


In [None]:
unique_cols_list

In [None]:
issues_dict = {}
for issue in emotional_state_list.dropna().sum():
    # using if/else
#     if issue in issues_dict.keys():
#         issues_dict[issue] += 1
#     else:
#         issues_dict[issue] = 1
    #-------
    # using try/except
    try:
        issues_dict[issue] += 1
    except KeyError:
        issues_dict[issue] = 1
    except TypeError:
        #something else
        issues_dict

In [None]:
issues_dict.keys()

In [None]:
issues_dict_sort = dict(sorted(issues_dict.items(), key=lambda item: item[1]))

In [None]:
type(issues_dict_sort.keys())

In [None]:
list(issues_dict.items())

In [None]:
plt.figure(figsize = (15, 6))
plt.barh(y = list(issues_dict_sort.keys()), width = issues_dict_sort.values())
plt.xticks(rotation = 90);

In [None]:
#flat list
[issue for sublist in unique_cols_list for issue in sublist]


In [None]:
#alternative appraoch to creating a flat list
flat_list = []
for sublist in unique_cols_list:
    for issue in sublist:
        flat_list.append(issue)
flat_list

In [None]:
high_risk = [
'Abuse/Neglect of Spouse/Partner',
'Child Sexual Abuse',
'Abuse/Neglect of Parent/Other Family',
'Child Abuse/Neglect',
'Other Physical Abuse/Neglect',
'Rape/Sexual Abuse',
'Pedophile',
'Cult/Ritualistic Abuse',
'Runaway/Missing',
'Mass Violence (Domestic/International)',
'Terrorism (Domestic/International)',
'CURRENT HOMICIDAL THOUGHTS/IDEATIONS',
'CURRENT HOMICIDAL INTENTION/PLAN/ATTEMPT IN PROGRESS',
'Non-Suicidal Self Injury',
'Third party concerned about anothers homicide risk',
'CURRENT THOUGHTS/IDEATIONS',
'CURRENT SUICIDAL INTENTION/PLAN/ATTEMPT IN PROGRESS',
'Third party concerned about anothers suicide risk',
'Mobile Crisis ',
'Emergency (Mental Health)',
'Emergency Medical Services',
'Poisoning',
'Overdose',
'Detoxification',
'Drug (street) Problem',
'Medication Concern',
'Under the Influence'
]

In [None]:
crisis_risk = pd.read_csv('data/Crisis_High_Risk.csv')

In [None]:
crisis_risk.drop(columns = 'Unnamed: 0')

In [None]:
crisis_risk_issues_only = crisis_risk[['CallerNum', 
'CallLength',
'Abuse/Neglect of Spouse/Partner',
'Child Sexual Abuse',
'Abuse/Neglect of Parent/Other Family',
'Child Abuse/Neglect',
'Other Physical Abuse/Neglect',
'Rape/Sexual Abuse',
'Pedophile',
'Cult/Ritualistic Abuse',
'Runaway/Missing',
'Mass Violence (Domestic/International)',
'Terrorism (Domestic/International)',
'CURRENT HOMICIDAL THOUGHTS/IDEATIONS',
'CURRENT HOMICIDAL INTENTION/PLAN/ATTEMPT IN PROGRESS',
'Non-Suicidal Self Injury',
'Third party concerned about anothers homicide risk',
'CURRENT THOUGHTS/IDEATIONS',
'CURRENT SUICIDAL INTENTION/PLAN/ATTEMPT IN PROGRESS',
'Third party concerned about anothers suicide risk',
'Mobile Crisis ',
'Emergency (Mental Health)',
'Emergency Medical Services',
'Poisoning',
'Overdose',
'Detoxification',
'Drug (street) Problem',
'Medication Concern',
'Under the Influence','Total Number of Issues', 'Number of Imminent Risk Issues', 'IR Call'
]]

In [None]:
crisis_risk_repeaters = crisis_risk_issues_only.drop(crisis_risk_issues_only.loc[crisis_risk_issues_only['CallerNum']== -1].index)
crisis_risk_repeaters.drop(columns = 'IR Call', inplace=True)

In [None]:
result = []
for value in crisis_risk_repeaters["Number of Imminent Risk Issues"]:
    if value > 0:
        result.append("Yes")
    else:
        result.append("No")
       
crisis_risk_repeaters["Risk Present"] = result   
crisis_risk_repeaters.head(25)

In [None]:
# crisis_risk_repeaters['StateProvince'].astype(str)

In [None]:
crisis_risk_repeaters['Number of Imminent Risk Issues'].astype(int)

In [None]:
crisis_risk_repeaters.shape

In [None]:
crisis_risk_repeaters['Risk Present'].value_counts().sort_values()\
.plot(kind='barh', figsize=(10,10),color = '#a45c7c', fontsize = '16', title = 'Presence of Imminent Risk in Repeat Callers');
plt.xlabel('Number of Calls', fontsize = 16)

In [None]:
crisis_risk_repeaters['Risk Present'].value_counts()

In [None]:
crisis_risk_first = crisis_risk_issues_only.drop(crisis_risk_issues_only.loc[crisis_risk_issues_only['CallerNum']!= -1].index)
crisis_risk_first.drop(columns = 'IR Call', inplace=True)

In [None]:
result = []
for value in crisis_risk_first["Number of Imminent Risk Issues"]:
    if value > 0:
        result.append("Yes")
    else:
        result.append("No")
       
crisis_risk_first["Risk Present"] = result   
crisis_risk_first.head(25)

In [None]:
crisis_risk_first['Risk Present'].value_counts().sort_values()\
.plot(kind='barh', figsize=(10,10),color = '#a45c7c', fontsize = '16', title = 'Presence of Imminent Risk in First-Time Callers')
plt.xlabel('Number of Calls', fontsize = '16')



In [None]:
crisis_risk_first['Risk Present'].value_counts()

In [None]:
crisis_risk_first_high = crisis_risk_first.drop(crisis_risk_first.loc[crisis_risk_first['Risk Present'] == 'No'].index)
crisis_risk_first_high.head(50)

In [None]:
crisis_risk_first_high['Number of Imminent Risk Issues'].value_counts().sort_values()\
.plot(kind='barh', figsize=(10,10),color = '#a45c7c', fontsize = '16', title = 'Number of High Risk Issues in First-Time Callers');
plt.xlabel('Number of Calls', fontsize='16')
plt.ylabel('Number of High Risk Issues', fontsize='16')

In [None]:
crisis_risk_first_high.CallLength.sum()

In [None]:
crisis_risk_repeaters_high = crisis_risk_repeaters.drop(crisis_risk_repeaters.loc[crisis_risk_repeaters['Risk Present'] == 'No'].index)
crisis_risk_repeaters_high.head(50)

In [None]:
crisis_risk_repeaters_high['Number of Imminent Risk Issues'].value_counts().sort_values()\
.plot(kind='barh', figsize=(10,10),color = '#a45c7c', fontsize = '16', title = 'Number of High Risk Issues in Repeat Callers');
plt.xlabel('Number of Calls', fontsize='16')
plt.ylabel('Number of High Risk Issues', fontsize='16')

In [None]:
crisis_risk_repeaters_high.CallLength.sum()

In [None]:
crisis_risk_repeaters['CallerNum'].nunique()

In [None]:
crisis_risk_first['CallerNum'].shape