In [23]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import json

# #显示所有列
# pd.set_option('display.max_columns', None)
# #显示所有行
# pd.set_option('display.max_rows', None)

In [31]:
class Permision(object):
    def __init__(self):
        f =  open('android_all_permisions.json', 'r')
        self.all_permisions_dict = json.load(f)
        self.all_permisions = set(self.all_permisions_dict.keys())
        self.key2index()
        self.index2key()
    
    def key2index(self):
        index = 0
        self.key2index_dict = {}
        for key in self.all_permisions_dict.keys():
            self.key2index_dict[key] = index
            index += 1
    
    def index2key(self):
        index = 0
        self.index2key_dict = {}
        for key in self.all_permisions_dict.keys():
            self.index2key_dict[index] = key
            index += 1

class ApkPermision(Permision):
    def __init__(self,permision_file):
        super(ApkPermision,self).__init__()
        self.name = permision_file.split('\\')[-1].split('.')[0][:-3]
        self.permisions = pd.read_csv(permision_file,header=None,index_col=None)[0].values[:-1]
        
    def permision2array(self):
        res = np.zeros(len(self.all_permisions))
        for permision in self.permisions:
            if permision in self.all_permisions:
#                 print(self.key2index_dict[permision])
                res[self.key2index_dict[permision]] = 1
        return res

class PermisionAnalysisUtil(object):
    '''权限分析工具类'''
    
    def __init__(self,path):
        self.permision = Permision()
        self.permisions_df = self.get_apks_permisions(path)
    
    def get_apks_permisions(self,path):
        '''得到所有APK的所有权限'''
        import glob
        apks = glob.glob(f'{path}/*.txt')
        permision = self.permision.all_permisions
        permisions_df = pd.DataFrame(index=permision)
        for apk in apks:
            apk = ApkPermision(apk)
            permisions_df[apk.name] = apk.permision2array()
        permisions_df = permisions_df.astype(np.int16)
        return permisions_df
    
    def get_permision_num(self):
        '''得到每一个APK的权限数量'''
        return self.permisions_df.sum()
    
    def get_permision_list(self,apk_names=None):
        '''得到APK的权限列表'''
        from collections import Iterable
        res = {}
        if apk_names is None:
            for apk_name in self.permisions_df.columns.values:
                res[apk_name] = set(self.permisions_df[self.permisions_df[apk_name] == 1.0].index.values)
        else:
            if isinstance(apk_names,tuple) or isinstance(apk_names,list):
                for apk_name in apk_names:
                    res[apk_name] = set(self.permisions_df[self.permisions_df[apk_name] == 1.0].index.values)
            else:
                print('apk_names should be a list or a tuple of apk names')
        return res
    
    def get_top_permision(self,top=10):
        '''得到需求最多的权限'''
        if top < 1 or top > len(self.permisions_df):
            print(f'please type in a num in [1,{len(self.permisions_df)}]')
            return None
        else:
            return self.permisions_df.sum(axis=1).sort_values(ascending=False)[:top]
    
    def get_permisions_all_have(self):
        '''得到所有apk共有的权限'''
        top100_permision_df = self.get_top_permision(100)
        return top100_permision_df[top100_permision_df[top100_permision_df.index]==len(self.permisions_df.columns)]
    
    def get_permisions_description(self,permisions=None,is_index=False):
        '''
            功能：得到权限的说明
            参数：
                permisions 权限名称或者列表
                is_index   是通过下标还是权限名称访问，每个权限有一个唯一标识[0-134](共有135个权限)  
        ''' 
        res = {}
        indexes = permisions
        if is_index:
            permisions = []
            if isinstance(indexes,int):
                permisions.append(self.permision.index2key_dict[indexes])
            elif isinstance(indexes,tuple) or isinstance(indexes,list):
                for index in indexes:
                    permisions.append(self.permision.index2key_dict[index])
            else:
                pass

        if isinstance(permisions,str):
            res = {permisions:self.permision.all_permisions_dict[permisions] }
        elif isinstance(permisions,tuple) or isinstance(permisions,list):
            for permision in permisions:
                res[permision] = self.permision.all_permisions_dict[permision]
        elif permisions is None:
            res = self.permision.all_permisions_dict
        return res
    
    
    def get_cosine_similarity(self,vectors):
        '''计算余弦相似度'''
        from sklearn.metrics.pairwise import cosine_similarity,pairwise_distances
        cos_sim = cosine_similarity(vectors)
        pair_dis = pairwise_distances(vectors,metric="cosine")
        return {'cos_sim':cos_sim,'pair_dis':pair_dis}

In [32]:
util = PermisionAnalysisUtil('apk_permisions')
print('所有APK的所有权限*******************\n',util.permisions_df)

所有APK的所有权限*******************
                                 huopaozhibo  ruanruan
GLOBAL_SEARCH                             0         0
MOUNT_UNMOUNT_FILESYSTEMS                 1         1
BROADCAST_WAP_PUSH                        1         1
GET_ACCOUNTS                              0         1
BIND_CARRIER_MESSAGING_SERVICE            1         1
...                                     ...       ...
READ_CONTACTS                             0         0
CHANGE_WIFI_STATE                         0         0
CHANGE_COMPONENT_ENABLED_STATE            1         1
BLUETOOTH                                 0         0
WRITE_CALL_LOG                            0         0

[135 rows x 2 columns]


In [33]:
print('\n权限列表*******************\n',util.get_permision_list(['ruanruan']))


权限列表*******************
 {'ruanruan': {'BIND_PRINT_SERVICE', 'GET_ACCOUNTS_PRIVILEGED', 'MOUNT_UNMOUNT_FILESYSTEMS', 'BROADCAST_WAP_PUSH', 'FLASHLIGHT', 'TRANSMIT_IR', 'ACCESS_FINE_LOCATION', 'GET_ACCOUNTS', 'BIND_CARRIER_MESSAGING_SERVICE', 'DELETE_PACKAGES', 'RECORD_AUDIO', 'CHANGE_WIFI_MULTICAST_STATE', 'SET_PROCESS_LIMIT', 'RESTART_PACKAGES', 'SET_DEBUG_APP', 'READ_FRAME_BUFFER', 'CONTROL_LOCATION_UPDATES', 'REQUEST_IGNORE_BATTERY_OPTIMIZATIONS', 'BIND_DREAM_SERVICE', 'MANAGE_DOCUMENTS', 'BIND_MIDI_DEVICE_SERVICE', 'SET_TIME', 'WRITE_SYNC_SETTINGS', 'CHANGE_COMPONENT_ENABLED_STATE'}}


In [34]:
print('\n每一个APK的权限数量*******************\n',util.get_permision_num())


每一个APK的权限数量*******************
 huopaozhibo    29
ruanruan       24
dtype: int64


In [35]:
print('\n需求最多的权限*******************\n',util.get_top_permision(top=15))


需求最多的权限*******************
 FLASHLIGHT                              2
BIND_DREAM_SERVICE                      2
REQUEST_IGNORE_BATTERY_OPTIMIZATIONS    2
ACCESS_FINE_LOCATION                    2
CHANGE_WIFI_MULTICAST_STATE             2
DELETE_PACKAGES                         2
SET_PROCESS_LIMIT                       2
SET_DEBUG_APP                           2
MANAGE_DOCUMENTS                        2
RESTART_PACKAGES                        2
SET_TIME                                2
BIND_MIDI_DEVICE_SERVICE                2
RECORD_AUDIO                            2
BIND_CARRIER_MESSAGING_SERVICE          2
WRITE_SYNC_SETTINGS                     2
dtype: int64


In [36]:
print('\n每个APK都有的权限*******************\n',util.get_permisions_all_have())


每个APK都有的权限*******************
 FLASHLIGHT                              2
BIND_DREAM_SERVICE                      2
REQUEST_IGNORE_BATTERY_OPTIMIZATIONS    2
ACCESS_FINE_LOCATION                    2
CHANGE_WIFI_MULTICAST_STATE             2
DELETE_PACKAGES                         2
SET_PROCESS_LIMIT                       2
SET_DEBUG_APP                           2
MANAGE_DOCUMENTS                        2
RESTART_PACKAGES                        2
SET_TIME                                2
BIND_MIDI_DEVICE_SERVICE                2
RECORD_AUDIO                            2
BIND_CARRIER_MESSAGING_SERVICE          2
WRITE_SYNC_SETTINGS                     2
BROADCAST_WAP_PUSH                      2
CHANGE_COMPONENT_ENABLED_STATE          2
MOUNT_UNMOUNT_FILESYSTEMS               2
dtype: int64


In [38]:
print('\n权限的解释说明*******************\n',util.get_permisions_description([i for i in range(2)],is_index=True))
print('\n权限的解释说明*******************\n',util.get_permisions_description(['ACCESS_CHECKIN_PROPERTIES','ACCESS_NETWORK_STATE'],is_index=False))


权限的解释说明*******************
 {'ACCESS_CHECKIN_PROPERTIES': '允许读写访问”properties”表在checkin数据库中，改值可以修改上传', 'ACCESS_COARSE_LOCATION': '允许一个程序访问CellID或 WiFi热点来获取粗略的位置'}

权限的解释说明*******************
 {'ACCESS_CHECKIN_PROPERTIES': '允许读写访问”properties”表在checkin数据库中，改值可以修改上传', 'ACCESS_NETWORK_STATE': '允许程序获取网络信息状态，如当前的网络连接是否有效'}


In [39]:
print('APK权限的余弦相似度*******************\n',util.get_cosine_similarity(np.array(util.permisions_df).transpose()))

权限的余弦相似度*******************
 {'cos_sim': array([[1.        , 0.68228824],
       [0.68228824, 1.        ]]), 'pair_dis': array([[0.        , 0.31771176],
       [0.31771176, 0.        ]])}


In [21]:
# !python analysis.py