<a href="https://colab.research.google.com/github/mitrarokni2019/PythonPractices/blob/master/AssociationRulesMining.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


This project delves into the NYC 311 Service Requests data from 2014, a comprehensive dataset that encapsulates a wide array of non-emergency requests and complaints logged by New York City's diverse populace. Utilizing data analysis and association rule mining techniques, the code aims to uncover patterns and correlations within the service request types, agencies involved, and other categorical variables present in the dataset. The findings from this analysis could potentially inform city management and policy decisions by highlighting prevalent issues and service request trends.


In [None]:
#Packages that you need: pandas, numpy, mlxtend
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori,association_rules
df = pd.read_csv('service5.csv')
df.describe(include="all").iloc[:,:3]
np.sum(df.isna())
D=df.dropna()


#s=np.random.choice(shape[0],int(0.3*D))
#  if we face with the memory error we should create a sample from dataset with less rows
#  با نمونه گيري کردن به صورت تصادفي از ديتا ستمان يک نمونه کوچکتر توليد ميکنيم تا ديگر مشکل حافظه نداشته باشيم

A=pd.get_dummies(D)
frequent_itemsets = apriori(A, min_support=0.05, use_colnames=True,max_len=3)
rules = association_rules(frequent_itemsets, metric="confidence",min_threshold=0.2)
#'''

  and should_run_async(code)


In [None]:
#Get some info about time-related rules
Month=A.columns[0:6]
Monthconseq=[[x,rules[rules['antecedents']=={x}].loc[:,['consequents','support','confidence','lift','leverage','conviction']]] for x in A.columns[0:6].tolist() if np.sum(rules['antecedents']=={x})>0]
Monthconseq[0][1].iloc[:,:2]
Monthconseq[1][0]
Monthconseq[1][1].iloc[:,:2]
Monthconseq[1][1].iloc[:,2:4]

  and should_run_async(code)


Unnamed: 0,confidence,lift
2,0.489967,1.128975
3,0.489967,1.128975
4,0.320079,0.989172
82,0.489967,1.128975


In [None]:
#Get some info about agency-related rules
Agency=A.columns[6:23]
Agencyconseq=[[x,rules[rules['antecedents']=={x}].loc[:,['consequents','support','confidence','lift','leverage','conviction']]] for x in A.columns[6:23].tolist() if np.sum(rules['antecedents']=={x})>0]
Agencyconseq[0][0]
Agencyconseq[0][1].iloc[:,0:5]
Agencyconseq[0][1].iloc[:,5]
Agencyconseq[1][0]
Agencyconseq[1][1].iloc[:,:5]
Agencyconseq[1][1].iloc[:,5]

  and should_run_async(code)


27    2.231255
Name: conviction, dtype: float64

In [None]:
#Get some info about borough-related rules
Borough=A.columns[1091:]
Boroughconseq=[[x,rules[rules['antecedents']=={x}].loc[:,['consequents','support','confidence','lift','leverage','conviction']]] for x in A.columns[1091:].tolist() if np.sum(rules['antecedents']=={x})>0]


  and should_run_async(code)


In [None]:
#Get some info about complaint-type-related rules
Complaint=A.columns[23:162]
Complaintconseq=[[x,rules[rules['antecedents']=={x}].loc[:,['consequents','support','confidence','lift','leverage','conviction']]] for x in A.columns[23:162].tolist() if np.sum(rules['antecedents']=={x})>0]

Complaintconseq[2][1].iloc[:,:2]
Complaintconseq[2][1].iloc[:,2:6]


  and should_run_async(code)


Unnamed: 0,confidence,lift,leverage,conviction
30,1.0,2.304188,0.028358,inf
65,1.0,2.304188,0.028358,inf
132,1.0,2.304188,0.028358,inf


In [None]:
#Hypothesis Test

phatsd=np.sqrt(np.mean(rules['support'])*(1-np.mean(rules['support']))/len(rules['support']))
phatbar=np.mean(rules['support'])
z=0.045/phatsd
from scipy.stats import norm
norm.isf(1-0.975)

  and should_run_async(code)


1.959963984540054

In [None]:
#Removing NYPD and HPD
D2=D[D['Agency']!='NYPD']
D2=D2[D2['Agency']!='HPD']
A2=pd.get_dummies(D2)
frequent_itemsets2 = apriori(A2, min_support=0.05, use_colnames=True,max_len=2)
rules2 = association_rules(frequent_itemsets2, metric="confidence",min_threshold=0.2)

  and should_run_async(code)


In [None]:
phatsd2=np.sqrt(np.mean(rules2['support'])*(1-np.mean(rules2['support']))/len(rules2['support']))
phatbar2=np.mean(rules2['support'])
z=0.065/phatsd2

  and should_run_async(code)


In [None]:
#Get some info about time-related rules
Month2=A2.columns[0:6]
Monthconseq2=[[x,rules2[rules2['antecedents']=={x}].loc[:,['consequents','support','confidence','lift','leverage','conviction']]] for x in A2.columns[:6].tolist() if np.sum(rules2['antecedents']=={x})>0]
Monthconseq2[3][1].iloc[:,0]
Monthconseq2[3][1].iloc[:,1:]


Monthconseq2[5][1].iloc[:,0]
Monthconseq2[5][1].iloc[:,1:]

  and should_run_async(code)


Unnamed: 0,support,confidence,lift,leverage,conviction
11,0.050363,0.263533,1.534891,0.017551,1.124701
13,0.050316,0.263286,0.784955,-0.013784,0.902094
14,0.097723,0.511349,1.182222,0.015063,1.161294
16,0.061877,0.323779,1.065325,0.003794,1.02936
18,0.053987,0.282493,0.99086,-0.000498,0.996368


In [None]:
#Get some info about agency-related rules
Agency2=A2.columns[6:21]
Agencyconseq2=[[x,rules2[rules2['antecedents']=={x}].loc[:,['consequents','support','confidence','lift','leverage','conviction']]] for x in A2.columns[6:21].tolist() if np.sum(rules2['antecedents']=={x})>0]
Agencyconseq2[0][1].iloc[:,1:]


  and should_run_async(code)


Unnamed: 0,support,confidence,lift,leverage,conviction
19,0.050657,0.441299,8.711584,0.044842,1.699197


In [None]:
#Get some info about borough-related rules
Borough2=A2.columns[910:]
Boroughconseq2=[[x,rules2[rules2['antecedents']=={x}].loc[:,['consequents','support','confidence','lift','leverage','conviction']]] for x in A2.columns[910:].tolist() if np.sum(rules2['antecedents']=={x})>0]
Boroughconseq2[2][1].iloc[:,0]
Boroughconseq2[2][1].iloc[:,1:]

  and should_run_async(code)


Unnamed: 0,support,confidence,lift,leverage,conviction
35,0.06025,0.211329,1.230839,0.0113,1.050254
49,0.098395,0.345125,1.028949,0.002768,1.014827
58,0.060406,0.211876,1.03545,0.002068,1.009204
63,0.129922,0.455708,1.053583,0.006608,1.042581


In [None]:
#Get some info about complaint-type-related rules
Complaint2=A2.columns[21:125]
Complaintconseq2=[[x,rules2[rules2['antecedents']=={x}].loc[:,['consequents','support','confidence','lift','leverage','conviction']]] for x in A2.columns[21:125].tolist() if np.sum(rules2['antecedents']=={x})>0]
Complaintconseq2[0][1].iloc[:,0]
Complaintconseq2[0][1].iloc[:,1:]
#'''

  and should_run_async(code)


Unnamed: 0,support,confidence,lift,leverage,conviction
21,0.064937,1.0,4.531024,0.050606,inf
50,0.063655,0.980259,2.266328,0.035568,28.746127
