In [2]:
from google.colab import drive

drive.mount('/content/drive')


Mounted at /content/drive


In [6]:
import os
import re
import openpyxl
from collections import Counter

def extract_comments_methods(file_path):
    with open(file_path, 'r') as file:
        content = file.readlines()

    # Regular expressions to detect comments, methods, and class names
    comment_pattern = r'#.*'
    method_pattern = r'def\s+(\w+)\s*\((.*?)\):'
    class_pattern = r'class\s+(\w+)\s*:'

    # Find all comments in the file
    comments = []
    for line in content:
        if not line.strip():  # Skip empty lines
            continue
        match = re.match(comment_pattern, line.strip())
        if match:
            comments.append(match.group())

    # Find all methods and their parameters
    methods = re.findall(method_pattern, ''.join(content))

    # Find all class names
    class_names = re.findall(class_pattern, ''.join(content))

    # Calculate comment percentage and total lines (excluding empty lines)
    total_lines = len([line for line in content if line.strip()])
    comment_lines = len(comments)
    comment_percentage = (comment_lines / total_lines) * 100

    return comments, comment_lines, methods, class_names, comment_percentage, total_lines

# Function to process each .py file and store the results in an Excel sheet
def process_py_files(folder_path):
    wb = openpyxl.Workbook()
    ws = wb.active
    ws.title = 'Python File Results'
    ws.append(['File Name', 'Comments', 'Total Comments', 'Class Names', 'Method Names', 'Comment Percentage', 'Total Lines of Code', 'ATFD'])

    for file_name in os.listdir(folder_path):
        if file_name.endswith('.py'):
            file_path = os.path.join(folder_path, file_name)
            comments, comment_lines, methods, class_names, comment_percentage, total_lines = extract_comments_methods(file_path)

            # Get method names and class names
            method_names = [method[0] for method in methods]
            class_names_list = ', '.join(class_names).split(', ')

            # Count method occurrences
            method_counter = Counter(method_names)

            # Get method names used in multiple classes
            methods_used_in_multiple_classes = [method for method, count in method_counter.items() if count > 1]

            # Append data for the current file
            atfd = ', '.join([f"{method} ({method_counter[method]} times)" for method in methods_used_in_multiple_classes])
            ws.append([file_name, '\n'.join(comments), comment_lines, ', '.join(class_names_list), ', '.join(method_names), comment_percentage, total_lines, atfd])

    output_file_path = '/content/drive/MyDrive/thesis/test/sheet_new.xlsx'
    wb.save(output_file_path)

    print("Excel file created:", output_file_path)

# Example usage:
folder_path = '/content/drive/MyDrive/thesis/test'
process_py_files(folder_path)


Excel file created: /content/drive/MyDrive/thesis/test/sheet_new.xlsx
