In [17]:
# import library
from androguard.core.bytecodes.apk import APK
import xml.etree.ElementTree as ET
import xml.dom.minidom as minidom
import pandas as pd
import os
import xmltodict
import json
import warnings

In [18]:
# Function
# Funtion read CSV with condition
def read_csv_and_filter(file_path):
    # Read the CSV file into a pandas DataFrame
    df = pd.read_csv(file_path)

    # Filter rows where the 'manifest' column has a NaN value
    filtered_df = df[df['manifest'].isna()]

    # Extract the 'apkfile_name' column from the filtered DataFrame
    apkfile_names = filtered_df['apkfile_name'].tolist()

    return apkfile_names
# Function export Manifest file
def exportManifestFile(apk_path,apkfile_name):
    warnings.filterwarnings("ignore", category=UserWarning, module="androguard.core.bytecodes.axml")
    file_path = apk_path+"\\"+apkfile_name
    # Load the APK
    apk = APK(file_path)
    # Get the AndroidManifest.xml content
    manifest_xml = apk.get_android_manifest_xml()
    # Convert XML to a string
    xml_string = ET.tostring(manifest_xml, encoding='utf-8', method='xml').decode('utf-8')
    # Parse XML string
    xml_dom = minidom.parseString(xml_string)

    # Create manifest directory if it does not exist
    manifest_directory = r'.\\manifest'
    if not os.path.exists(manifest_directory):
        os.makedirs(manifest_directory)

    # Export prettified XML to a file in the manifest directory
    output_file = os.path.join(manifest_directory, f"{os.path.splitext(apkfile_name)[0]}-AndroidManifest.xml")
    with open(output_file, "w", encoding='utf-8') as file:
        file.write(xml_dom.toprettyxml())

    print(f"AndroidManifest.xml exported to {output_file}")
# Function convert XML to JSON
def convert_xml_to_json(xml_directory,apkfile_name):
    xml_file_path = xml_directory+"\\"+str(os.path.splitext(apkfile_name)[0])+"-AndroidManifest.xml"
    json_file_path = xml_directory+"\\"+str(os.path.splitext(apkfile_name)[0])+"-AndroidManifest.json"
    # Read the XML file
    with open(xml_file_path, 'r', encoding='utf-8') as xml_file:
        # Parse the XML content
        xml_content = xml_file.read()
        # Convert XML to JSON
        json_data = xmltodict.parse(xml_content, dict_constructor=dict)
        # Write JSON to file
        with open(json_file_path, 'w', encoding='utf-8') as json_file:
            json.dump(json_data, json_file, ensure_ascii=False, indent=2)
# Function read JSON
def read_and_print_json(xml_directory,apkfile_name):
    file_path = xml_directory+"\\"+str(os.path.splitext(apkfile_name)[0])+"-AndroidManifest.json"
    try:
        with open(file_path, 'r') as file:
            data = json.load(file)
            # Print JSON data in a readable format
            #formatted_json = json.dumps(data, indent=2)
            #print("Formatted JSON data:")
            #print(formatted_json)
            return data
    except Exception as e:
        print(f"Error reading or parsing JSON file: {e}")
        return None
# Function get first-level key of dict
def get_first_level_keys(data):
    if isinstance(data, dict):
        return list(data.keys())
    else:
        return None
# Funtion filter dict by string 
def filter_by_substring(input_list, substring):
    return [value for value in input_list if isinstance(value, str) and substring.lower() in value.lower()]
# Funtion get manifest permission
def extract_values_by_key(data, key_to_find):
    result = []

    if isinstance(data, dict):
        for key, value in data.items():
            if key == key_to_find:
                result.append(value)
            elif isinstance(value, (dict, list)):
                result.extend(extract_values_by_key(value, key_to_find))
    elif isinstance(data, list):
        for item in data:
            result.extend(extract_values_by_key(item, key_to_find))

    return result
# Function convert list to string
def list_to_string_with_newline(input_list):
    return '\n'.join(input_list)
# Funtion update CSV data
def update_csv_column(csv_path, condition_column, condition_value, update_column, new_value):
    # Read the CSV file into a DataFrame
    df = pd.read_csv(csv_path)

    # Update the specified column based on the condition
    df.loc[df[condition_column] == condition_value, update_column] = new_value

    # Write the updated DataFrame back to the CSV file
    df.to_csv(csv_path, index=False)
# Function remove duplicate value in list
def keep_one_value(lst):
    seen = set()
    result = []

    for item in lst:
        if item not in seen:
            seen.add(item)
            result.append(item)

    return result
# Function find all mimeType
def findMimeType(input_dict):
    # Empty list to store values under key "action"
    action_values = []
    # Recursive function to find values under key "action"
    def find_actions(d):
        for key, value in d.items():
            if key == "@ns0:mimeType":
                action_values.append(value)
            elif isinstance(value, dict):
                find_actions(value)
            elif isinstance(value, list):
                for item in value:
                    if isinstance(item, dict):
                        find_actions(item)
    # Call the recursive function with the input dictionary
    find_actions(input_dict)
    return action_values
# Funtion check value exist
def check_value_exist(value, my_list):
    if value in my_list:
        return 1
    else:
        return 0
# Function find_values_with_keys
def find_values_with_keys(json_obj, target_keys):
    result = []

    def find_values(obj, keys):
        if isinstance(obj, dict):
            for key, value in obj.items():
                if key in keys:
                    result.append(value)
                if isinstance(value, (dict, list)):
                    find_values(value, keys)
        elif isinstance(obj, list):
            for item in obj:
                find_values(item, keys)

    find_values(json_obj, target_keys)
    return result
# Function Read JSON to DICT
def read_json_to_dict(json_file_path):
    with open(json_file_path, 'r') as json_file:
        json_data = json.load(json_file)
    return json_data
# Function find index by data
def find_indices_by_data(data_list, target_data):
    index_array = []
    for index, item in enumerate(data_list):
        if 'data' in item and item['data'] == target_data:
            index_array.append(index)
    return index_array
# Function Read XML to JSON
def convert_xml_to_json_2(xml_file_path, json_file_path):
    # Parse the XML file
    tree = ET.parse(xml_file_path)
    root = tree.getroot()

    # Convert XML to JSON using xmltodict
    xml_string = ET.tostring(root, encoding='utf-8').decode('utf-8')
    json_data = json.dumps(xmltodict.parse(xml_string), indent=4)

    # Write the JSON data to the output file
    with open(json_file_path, 'w') as json_file:
        json_file.write(json_data)
# Function find_and_append_intents
def find_and_append_intents(xml_file_path, output_file_path):
    # Parse the XML file
    tree = ET.parse(xml_file_path)
    root = tree.getroot()

    # Create a new root element for the output XML
    new_root = ET.Element('root')  # Create a new root element

    # Iterate through all <intent> tags
    for intent in root.findall('.//intent'):
        # Append the entire <intent> to the new root
        new_root.append(intent)

    # Create a new tree with the new root
    new_tree = ET.ElementTree(new_root)

    # Write the result to the output file
    with open(output_file_path, 'wb') as output_file:
        new_tree.write(output_file, encoding='utf-8', xml_declaration=True)

In [19]:
# 1. Read CSV data file
csv_path = "paper_report.csv"
apk_path = ".\\upload_apk"
xml_directory = ".\\manifest"
apk_file_name_arr = read_csv_and_filter(csv_path)
number_apk = len(apk_file_name_arr)

In [20]:
# 2. Export Manifest.xml file
exportManifestFile(apk_path,apk_file_name_arr[0])
# 3. Convert XML to JSON
convert_xml_to_json(xml_directory,apk_file_name_arr[0])
# 4. Read JSON
json_data = read_and_print_json(xml_directory,apk_file_name_arr[0])
# # 5. Get Manifest permission 
# permission_key_array = filter_by_substring(json_data["manifest"],"permission")
# list_permission = []
# for key in permission_key_array:
#     sub_list_permission = extract_values_by_key(json_data["manifest"][key],"@ns0:name")
#     list_permission = list_permission + sub_list_permission
# list_permission = keep_one_value(list_permission)
# string_permission = list_to_string_with_newline(list_permission)
# # 6. Append permission to CSV
# update_csv_column(csv_path, "apkfile_name", apk_file_name_arr[0] , "manifest", string_permission)
# 7. mimeType
listMimeType=keep_one_value(findMimeType(json_data["manifest"]))
print(listMimeType)
# 8. Check mime type exist
checkResult = check_value_exist("image/*", listMimeType)
# 9. Find action

Requested API level 33 is larger than maximum we have, returning API level 28 instead.


AndroidManifest.xml exported to .\\manifest\Gmail-2023.07.23.553967039.Release-AndroidManifest.xml


PermissionError: [Errno 13] Permission denied: '.\\manifest'

In [None]:
# import xml.etree.ElementTree as ET

# def find_and_append_intents(xml_file_path, output_file_path):
#     # Parse the XML file
#     tree = ET.parse(xml_file_path)
#     root = tree.getroot()

#     # Create a new root element for the output XML
#     new_root = ET.Element('root')  # Create a new root element

#     # Iterate through all <intent> tags
#     for intent in root.findall('.//intent'):
#         # Append the entire <intent> to the new root
#         new_root.append(intent)

#     # Create a new tree with the new root
#     new_tree = ET.ElementTree(new_root)

#     # Write the result to the output file
#     with open(output_file_path, 'wb') as output_file:
#         new_tree.write(output_file, encoding='utf-8', xml_declaration=True)

# # Example usage
# input_xml_file = r"C:\Users\ASUS\anaconda3\metaLeak-ml-manifest-mimeType\manifest\Gmail-2023.07.23.553967039.Release-AndroidManifest.xml"
# output_xml_file = 'output.xml'
# find_and_append_intents(input_xml_file, output_xml_file)

In [None]:
# import xml.etree.ElementTree as ET

# def find_and_append_intents2(xml_file_path, output_file_path):
#     # Parse the XML file
#     tree = ET.parse(xml_file_path)
#     root = tree.getroot()

#     # Create a new root element for the output XML
#     new_root = ET.Element('root')  # Create a new root element

#     # Iterate through all <intent> tags
#     for intent in root.findall('.//intent-filter'):
#         # Append the entire <intent> to the new root
#         new_root.append(intent)

#     # Create a new tree with the new root
#     new_tree = ET.ElementTree(new_root)

#     # Write the result to the output file
#     with open(output_file_path, 'wb') as output_file:
#         new_tree.write(output_file, encoding='utf-8', xml_declaration=True)

# # Example usage
# input_xml_file = r"C:\Users\ASUS\anaconda3\metaLeak-ml-manifest-mimeType\manifest\Gmail-2023.07.23.553967039.Release-AndroidManifest.xml"
# output_xml_file = 'output2.xml'
# find_and_append_intents2(input_xml_file, output_xml_file)

In [None]:
# import xml.etree.ElementTree as ET
# import json
# import xmltodict

# def convert_xml_to_json(xml_file_path, json_file_path):
#     # Parse the XML file
#     tree = ET.parse(xml_file_path)
#     root = tree.getroot()

#     # Convert XML to JSON using xmltodict
#     xml_string = ET.tostring(root, encoding='utf-8').decode('utf-8')
#     json_data = json.dumps(xmltodict.parse(xml_string), indent=4)

#     # Write the JSON data to the output file
#     with open(json_file_path, 'w') as json_file:
#         json_file.write(json_data)

# # Example usage
# input_xml_file = 'output2.xml'
# output_json_file = 'output2.json'
# convert_xml_to_json(input_xml_file, output_json_file)

In [None]:
# # Read JSON to DICT
# def read_json_to_dict(json_file_path):
#     with open(json_file_path, 'r') as json_file:
#         json_data = json.load(json_file)
#     return json_data
# json_data = read_json_to_dict("output2.json")
# print(json_data["root"]["intent-filter"])
# print(type(json_data["root"]["intent-filter"]))

In [None]:
# def find_indices_by_data(data_list, target_data):
#     index_array = []
#     for index, item in enumerate(data_list):
#         if 'data' in item and item['data'] == target_data:
#             index_array.append(index)
#     return index_array
# target_data = {'@ns0:mimeType': '*/*'}
# result_indices = find_indices_by_data(json_data["root"]["intent-filter"], target_data)

# print(f"Indices of data {target_data} in the list: {result_indices}")