In [1]:
!pip install androguard==3.3.5
# !pip install tensorflow==2.13.0
# !pip install keras==2.13.1

In [2]:
import sys
from androguard.core.bytecodes import apk, dvm
from androguard.core.analysis import analysis
from androguard.core.bytecodes.apk import APK
import os
import re
sys.path.append("."+os.sep+'Log')
import logging as log

In [3]:
import androguard
print(androguard.__version__)

3.3.5


In [4]:
import keras
keras.__version__

'2.13.1'

In [5]:
import tensorflow as tf
print(tf.__version__)

2.13.0


In [6]:
def getPermissions(Path, fileName,dataPath):
		"""
		get the API Calls
		:param Path: the APK folder path
		:param fileNmae: the APK Name
		:param dataPath: the folder path where the permission files save
		:return: permission files
		"""
		filePath = Path + os.sep + fileName
		print("Extract" + fileName + "Permissions")
		app = APK(filePath)
		permissions = app.get_permissions()
		name = fileName.replace(".apk", "")
		with open(dataPath+os.sep+name+"_Permission.txt",'w') as f:
			print("Saving" + fileName + "Permissions")
			for i in range(permissions.__len__()):
				f.write(permissions[i]+"\n")

In [7]:
def getAPICalls(Path, fileName, dataPath):
		"""
		get the API Calls
		:param Path: the APK folder path
		:param fileNmae: the APK Nmae
		:param dataPath: the folder path where the api files save
		:return: api files
		"""
		filePath = Path + os.sep + fileName
		# self.logger.info("Extract" + fileName + "API calls")
		print("Extract" + fileName + "API calls")
		app = APK(filePath)
		app_dex = dvm.DalvikVMFormat(app.get_dex())
		app_x = analysis.Analysis(app_dex)
		APIs = list()

		classes = [cc.get_name() for cc in app_dex.get_classes()]
		for method in app_dex.get_methods():
			methodBlock = app_x.get_method(method)
			if method.get_code() == None:
				continue
			for i in methodBlock.get_basic_blocks().get():
				for ins in i.get_instructions():
					output = ins.get_output()
					match = re.search(r'(L[^;]*;)->([^\(]*)', output)
					if match and match.group(1) not in classes:
						# print "API: "+match.group()+"	 "+match.group(1)
						if match.group(2) == "<init>":
							continue
						api = match.group()
						if api in APIs:
							continue
						else:
							APIs.append(api)

		name = fileName.replace(".apk", "")
		with open(dataPath+os.sep+name+"_API.txt",'w') as f:
			print("Saving " + fileName + " APIs")
			# self.logger.info("Saving " + fileName + " APIs")
			for i in range(APIs.__len__()):
					f.write(APIs[i]+"\n")

In [8]:
import numpy as np
import os
import logging
import sys
sys.path.append("."+os.sep+'GetFeature')
#import GetFeature
sys.path.append("."+os.sep+'Log')
#"from log import Log
import pandas as pd
import threading
import random

In [9]:

APIFile = "C:/Users/SRMAP/Desktop/FDP Code/Features/API.txt"
permissionFile = "C:/Users/SRMAP/Desktop/FDP Code/Features/permission.txt"
featurelistPath = "C:/Users/SRMAP/Desktop/FDP Code/Features/featureList.csv"
featurelist = list()

In [10]:
def featureFromFile(filePath):
		"""
		Get the features to be extracted from one feature file
		:param filePath:  file path
		:return: features list
		"""
		if not os.path.exists(filePath):
			#self.logger.error(filePath+"does not exist")
			print(filePath+"does not exist")
			return
		ls=list()
		with open(filePath,'r')as f:
			while(True):
				line=f.readline()
				if not line:break
				s=line.replace("\n","")
				s=s.replace("\r","")
				ls.append(s)
		return ls


In [11]:
def generateFeatureList():
		"""
		Get the features to be extracted from all features
		:param :
		:return: features list from all feature files
		"""
		ls1 = featureFromFile(APIFile)
		ls2 = featureFromFile(permissionFile)      
		featurelist = ls1+ls2

		frame = pd.DataFrame(featurelist)
		frame.to_csv(featurelistPath,header=False,index=False)

		return featurelist

## Malware Matrix.py

In [12]:
featureList = generateFeatureList()


In [13]:
print(featureList)

['Ljava/net/URL;->openConnection', 'Ljava/net/URL;->openStream', 'Ljava/net/URL;->getContent', 'Landroid/telephony/TelephonyManager;->getCallState', 'Landroid/telephony/TelephonyManager;->getCellLocation', 'Landroid/telephony/TelephonyManager;->getDeviceId', 'Landroid/telephony/TelephonyManager;->getDeviceSoftwareVersion', 'Landroid/telephony/TelephonyManager;->getNeighboringCellInfo', 'Landroid/telephony/TelephonyManager;->getNetworkCountryIso', 'Landroid/telephony/TelephonyManager;->getNetworkOperator', 'Landroid/telephony/TelephonyManager;->getNetworkOperatorName', 'Landroid/telephony/TelephonyManager;->getNetworkType', 'Landroid/telephony/TelephonyManager;->getPhoneType', 'Landroid/telephony/TelephonyManager;->getSimCountryIso', 'Landroid/telephony/TelephonyManager;->getSimOperator', 'Landroid/telephony/TelephonyManager;->getSimOperatorName', 'Landroid/telephony/TelephonyManager;->getSimSerialNumber', 'Landroid/telephony/TelephonyManager;->getSimState', 'Landroid/telephony/Telephon

In [14]:
APKlist = list()
Matrix1 = np.zeros((0, len(featureList)), dtype="int")
label1 = list()

In [15]:
def featureFromFile(filePath):
		"""
		get features from file
		:param filePath: the feature files path
		:return: feature list
		"""
		if not os.path.exists(filePath):
			print(filePath+" does not exist")
			return
		ls=list()
		with open(filePath,'r',encoding='UTF-8')as f:
			while(True):
				line=f.readline()
				if not line:break
				s=line.replace("\n","")
				s=s.replace("\r","")
				ls.append(s)
		return ls


In [16]:
def getFeaturefromAPK(dataPath):
		apkFeature = np.zeros((1,featureList.__len__()), dtype=int)
		try:
			perList = featureFromFile(dataPath)
			for p in perList:
				if p in featureList:
					i = featureList.index(p)
					apkFeature[0][i] = 1
		except Exception as e:
			print("feature matrix extraction errors.")
		return apkFeature

In [17]:
import os
path = "C:/Users/SRMAP/Desktop/FDP Code/MalwareAPK/MalwareRawFeature"
dir_list = os.listdir(path)

In [18]:
for fl in dir_list:
    fl_path = path +'/'+ fl
    features = getFeaturefromAPK(fl_path)
    Matrix1 = np.vstack((Matrix1,features))
    label1.append(1)

feature matrix extraction errors.


In [19]:
len(Matrix1)

389

In [20]:
len(label1)

389

## Benign Matrix

In [21]:
featureList = generateFeatureList()
APKlist = list()
Matrix2 = np.zeros((0, len(featureList)), dtype="int")
label2 = list()

In [22]:
def featureFromFile(filePath):
		"""
		get features from file
		:param filePath: the feature files path
		:return: feature list
		"""
		if not os.path.exists(filePath):
			print(filePath+" does not exist")
			return
		ls=list()
		with open(filePath,'r',encoding='UTF-8')as f:
			while(True):
				line=f.readline()
				if not line:break
				s=line.replace("\n","")
				s=s.replace("\r","")
				ls.append(s)
		return ls


In [1]:
def getFeaturefromAPK(dataPath):
		apkFeature = np.zeros((1,featureList.__len__()), dtype=int)
		try:
			perList = featureFromFile(dataPath)
			for p in perList:
				if p in featureList:
					i = featureList.index(p)
					apkFeature[0][i] = 1
		except Exception as e:
			print("feature matrix extraction errors.")
		return apkFeature

In [24]:
import os
path = "C:/Users/SRMAP/Desktop/FDP Code/BenignAPK/BenignRawFeature"
dir_list = os.listdir(path)


In [25]:
for fl in dir_list:
    fl_path = path +'/'+ fl
    features = getFeaturefromAPK(fl_path)
    Matrix2 = np.vstack((Matrix2,features))
    label2.append(0)

feature matrix extraction errors.


In [26]:
len(Matrix2)

598

## Combine Matrix

In [27]:
Matrix = np.vstack((Matrix1,Matrix2))

In [28]:
len(Matrix)

987

In [29]:
label = label1+label2

In [30]:
len(label)

987

In [31]:
Matrix.shape

(987, 158)

## Model Training

In [32]:
import joblib
from sklearn.model_selection import train_test_split 
import tensorflow as tf
import os
import sys
import pickle 
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation,Flatten,Multiply
from sklearn.preprocessing import OneHotEncoder
from keras.layers import Input, Dense, concatenate
from keras.models import Model
from keras.models import load_model
from keras import optimizers

In [33]:
# print(X_train.shape)

In [34]:
# len(y_train)

In [35]:
label = np.array(label)

In [36]:
dimemsion = 158
enc = OneHotEncoder()
label=label.reshape(-1, 1)
label=enc.fit_transform(label).toarray()

In [37]:
X_train,X_test,y_train,y_test=train_test_split(Matrix,label,shuffle=True,train_size=0.85)

In [38]:
def Att(att_dim,inputs,name):
		""" attention layer """	
		V = inputs
		QK = Dense(att_dim)(inputs)
		QK = Activation("softmax",name=name)(QK)
		MV = Multiply()([V, QK])
		return(MV)

In [39]:
inputs = Input(shape=(dimemsion,))
attention = Att(dimemsion,inputs,"attention")
output = Dense(2, activation='softmax')(attention)
model = Model(inputs=[inputs], outputs=output)


In [40]:
adam = optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=None)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit([X_train], y_train, epochs=30, batch_size=16, shuffle=True, validation_split=0.1)



Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.src.callbacks.History at 0x254052a9b10>

In [41]:
def show_accuracy(x_train,y_hat,y_train,name):
		"""
		calculate the training results
		:param x_train: feature matrix of training set 
		:param y_hat: detection result of training set 
		:param y_train: label of training set 
		:param name: apk name
		:return: Accurary 
		"""	

		print(name)
		tp = 0				  
		fp = 0
		tn = 0
		fn = 0
		num = len(x_train)			 
		for i in range(num):		 
			if y_hat[i] == y_train[i]: 
				if y_hat[i] == 0:
					tp += 1
				else:
					tn += 1
			else:
				if y_hat[i] == 0:
					fp += 1
				else:
					fn += 1
		Recall = tp / float(tp + fn)
		Precision = tp / float(tp + fp)
		Accurary = (tp + tn)/float(tp + tn + fn + fp)
		fmeasure = (2*Precision*Recall)/float(Precision+Recall)
		FPR = fp/float(tn + fp)       
		FalseRate = 1 - Precision
		AllFalseRate = 1- Accurary
		print("Total num:",num)
		printlist = ['Accurary','Precision','Recall','F1-Score','TPR','FPR']
		datalist = [Accurary,Precision,Recall,fmeasure,Recall,FPR]
		print(printlist)
		print(datalist)


In [42]:
y_hat_proba = model.predict(X_train)
y_hat = [round(i[1]) for i in y_hat_proba]
show_accuracy(X_train,y_hat,y_train[:,1],"train set")

train set
Total num: 838
['Accurary', 'Precision', 'Recall', 'F1-Score', 'TPR', 'FPR']
[0.9534606205250596, 0.939922480620155, 0.9837728194726166, 0.9613478691774033, 0.9837728194726166, 0.08985507246376812]


In [43]:
y_hat_proba = model.predict(X_test)
y_hat = [round(i[1]) for i in y_hat_proba]
show_accuracy(X_test,y_hat,y_test[:,1],"test set")

test set
Total num: 149
['Accurary', 'Precision', 'Recall', 'F1-Score', 'TPR', 'FPR']
[0.9060402684563759, 0.9174311926605505, 0.9523809523809523, 0.9345794392523364, 0.9523809523809523, 0.20454545454545456]


## Testing 

In [49]:
def getFeatureMatric(checkApks):
		"""
		get feature files from CheckAPK file
		:param checkApks: the checkApks files path
		:return: Matrix 
		"""		
		Matrix = np.zeros((0, self.Featurelist.__len__()), dtype=int)
		GetFeatureMatrixClass = GetFeatureMatrix()
		for apk in checkApks:
			features = GetFeatureMatrixClass.getFeaturefromAPK(CheckData,apk)
			Matrix = np.vstack((Matrix,features))
		return Matrix

In [50]:
featureList = generateFeatureList()
APKlist_T = list()
Matrix_test = np.zeros((0, len(featureList)), dtype="int")

In [51]:
Matrix_test

array([], shape=(0, 158), dtype=int32)

In [52]:

# C:\Users\91812\Desktop\FDP Presentation\Mitre Code\Ransomware-APKs\Koler_raw
test_file = "C:/Users/SRMAP/Desktop/FDP Code/Ransomware-APKs/Koler_raw/00f6cb935df075494a1fd1ce5e918a7a_API_Permission.txt"
features = getFeaturefromAPK(test_file)

In [53]:
features

array([[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0,
        0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0,
        0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1,
        0, 1, 0, 1]])

In [54]:
Matrix_test = np.vstack((Matrix_test,features))

In [55]:
Matrix_test

array([[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0,
        0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0,
        0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1,
        0, 1, 0, 1]])

In [56]:
Matrix_test.shape

(1, 158)

In [57]:
model.save('model.h5')

  saving_api.save_model(


In [58]:
model = tf.keras.models.load_model('model.h5')
model.predict(Matrix_test)



array([[0.05962212, 0.9403779 ]], dtype=float32)