In [40]:
import boto3
import botocore
import sagemaker
import sys


bucket = 'sciforma-performance-data'   # <--- specify a bucket you have access to
prefix = 'sagemaker/rcf-benchmarks'
execution_role = sagemaker.get_execution_role()


# check if the bucket exists
try:
    boto3.Session().client('s3').head_bucket(Bucket=bucket)
except botocore.exceptions.ParamValidationError as e:
    print('Hey! You either forgot to specify your S3 bucket'
          ' or you gave your bucket an invalid name!')
except botocore.exceptions.ClientError as e:
    if e.response['Error']['Code'] == '403':
        print("Hey! You don't have permission to access the bucket, {}.".format(bucket))
    elif e.response['Error']['Code'] == '404':
        print("Hey! Your bucket, {}, doesn't exist!".format(bucket))
    else:
        raise
else:
    print('Training input/output will be stored in: s3://{}/{}'.format(bucket, prefix))


import pandas as pd
from sagemaker import get_execution_role

role = get_execution_role()
data_key = 'performanceDataWithRole_Loc_mag_team_allocation.csv'
data_location = 's3://{}/{}'.format(bucket, data_key)

teamAllocData = pd.read_csv(data_location)
anomaliesTeamAlloc = pd.read_csv('anomalies_team_allocation.csv')
teamAllocData.head()

Training input/output will be stored in: s3://sciforma-performance-data/sagemaker/rcf-benchmarks


Unnamed: 0.1,Unnamed: 0,STARTDATE,ELAPSED,USERID,EVENTID,EVENT_DESC,WORKSPACEID,WS_DESC,TRANSACTIONTYPE,CORE_ID,HR_ORGANIZATION,LOCATION,USER_ROLE,MAG_CODE
0,0,2019-03-05 11:40:51,1488,59786,x84sljJzv,Team Allocation,,Exception,0,B33046,HPMS.AMP,IN-NDA02,_PM+RM,RNG
1,1,2019-03-05 11:57:57,505,3265232,x84sljJzv,Team Allocation,,Exception,1,NXP87683,HPMS.STI,DE-MUC02,_Resource Manager,RC9
2,2,2019-03-05 12:08:48,1553,8713928,x84sljJzv,Team Allocation,,Exception,1,NXP72191,T&O.TO-FO,NL-NYM01-s1,_Resource Manager,R86
3,3,2019-03-05 12:10:52,1683,8713928,x84sljJzv,Team Allocation,,Exception,1,NXP72191,T&O.TO-FO,NL-NYM01-s1,_Resource Manager,R86
4,4,2019-03-05 12:11:40,4397,8678907,x84sljJzv,Team Allocation,,Exception,1,ATP02132,HPMS.STI,AT-GRK01-s1,_PM+RM,RC9


In [42]:
anomaliesTeamAlloc.describe()

Unnamed: 0.1,Unnamed: 0,ELAPSED,USERID,TRANSACTIONTYPE,ELAPSED_normalized,score
count,536.0,536.0,536.0,536.0,536.0,536.0
mean,14991.914179,7.403013,19267170.0,0.787313,0.035954,3.455048
std,6705.592245,42.623117,52147090.0,0.40959,0.086919,0.960739
min,53.0,0.427,56581.0,0.0,0.007771,2.261823
25%,10968.75,1.52025,61944.0,1.0,0.009365,2.607621
50%,15742.5,2.2915,8353029.0,1.0,0.013685,3.219316
75%,20238.75,4.354,8928422.0,1.0,0.029531,4.251562
max,25585.0,952.667,289432400.0,1.0,1.0,5.866686


In [None]:
highAnomalies = anomaliesTeamAlloc[anomaliesTeamAlloc['ELAPSED'] > anomaliesTeamAlloc['ELAPSED'].median()]

In [None]:
#highAnomalies.drop(columns='Unnamed: 0', inplace=True)
anomaliesTeamAlloc['ELAPSED'].describe()

In [None]:
anomaliesTeamAlloc['ELAPSED'].median()

In [None]:
highAnomalies.describe()

In [None]:

highAnomalies.dropna(inplace = True)
highAnomalies.describe()

In [None]:

datasetApriori = highAnomalies.drop(highAnomalies.columns.difference(['LOCATION','USER_ROLE']), 1).values.tolist()
datasetApriori

In [None]:
# Apriori analysis
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori

te = TransactionEncoder()
te_ary = te.fit(datasetApriori).transform(datasetApriori)
df = pd.DataFrame(te_ary, columns=te.columns_)
df


In [None]:
from mlxtend.frequent_patterns import apriori

apriori(df, min_support=0.2)

In [None]:
!pip install mlxtend

In [None]:
frequent_itemsets= apriori(df, min_support=0.05, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
frequent_itemsets

In [None]:
from mlxtend.frequent_patterns import association_rules

association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)