In [None]:
from os import system

# Install the required packages
system("pip install pydriller")
system("pip install ipywidgets")
system("pip install pandas")
system("pip install tqdm")
# Install madge
system("npm install -g madge")

In [2]:
import os
import json
import re
from os.path import exists
from os import system
import pandas as pd
import subprocess
import pydriller
from collections import defaultdict
from tqdm.notebook import tqdm
import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s | %(levelname)s | %(message)s')

## Setup

In [3]:
react_repo = "https://github.com/facebook/react"
clone_dir = os.path.join(os.getcwd(), "react")

if not exists(clone_dir):
    with tqdm(total=100, desc="Cloning React repo", unit="chunk") as progress_bar:
        process = subprocess.Popen(
            ['git', 'clone', '--progress', react_repo, clone_dir],
            stderr=subprocess.PIPE,
            stdout=subprocess.DEVNULL,
            text=True
        )
        for line in process.stderr:
            if "Receiving objects" in line:
                percentage = int(line.split("%")[0].split()[-1])
                progress_bar.n = percentage
                progress_bar.refresh()
            elif "Resolving deltas" in line:
                progress_bar.set_description("Resolving deltas")
                progress_bar.refresh()
        process.wait()

    if process.returncode == 0:
        logging.info("cloning completed successfully")
    else:
        logging.error("error during cloning")
else:
    logging.warning("repo already cloned")

Cloning React repo:   0%|          | 0/100 [00:00<?, ?chunk/s]

2024-11-21 01:21:19,928 | INFO | cloning completed successfully


# Task 1

### Listing All Components

In [4]:
os.chdir(clone_dir)

# Patterns to identify component types
class_component_pattern = r"class\s+\w+\s+extends\s+(React.Component|React.PureComponent)"
functional_component_pattern = r"function\s+\w+\s*\(.*\)\s*{[^}]*return\s*<[^>]+>"


components = {
    "class_components": [],
    "functional_components": []
}


for root, _, files in os.walk(clone_dir):
    for file in files:
        if file.endswith(".js"):
            file_path = os.path.join(root, file)
            with open(file_path, 'r', encoding='utf-8') as f:
                content = f.read()
                if re.search(class_component_pattern, content):
                    components["class_components"].append(file)
                elif re.search(functional_component_pattern, content):
                    components["functional_components"].append(file)


print("Class Components:", components["class_components"])
print("Functional Components:", components["functional_components"])


Functional Components: ['useSyncExternalStoreShimServer-test.js', 'useSyncExternalStoreNative-test.js', 'ReactFlightTurbopackDOMNode-test.js', 'ReactFlightTurbopackDOMBrowser-test.js', 'ReactFlightTurbopackDOM-test.js', 'ReactFlightTurbopackDOMEdge-test.js', 'ReactHooksInspectionIntegration-test.js', 'ReactDevToolsHooksIntegration-test.js', 'ReactHooksInspection-test.js', 'ReactFreshRuntime.js', 'ReactFreshBabelPlugin-test.js', 'ReactDOMServerSelectiveHydration-test.internal.js', 'DOMPropertyOperations-test.js', 'ReactDOMRoot-test.js', 'ReactStartTransitionMultipleRenderers-test.js', 'ReactDOMFizzStatic-test.js', 'ReactWrongReturnPointer-test.js', 'ReactDOMServerSuspense-test.internal.js', 'ReactDOMFizzDeferredValue-test.js', 'ReactDOMServerIntegrationSpecialTypes-test.js', 'ReactDOMNativeEventHeuristic-test.js', 'ReactDOMComponentTree-test.js', 'ReactDOMTestSelectors-test.js', 'ReactDOMHydrationDiff-test.js', 'ReactDOMUseId-test.js', 'ReactDOMImageLoad-test.internal.js', 'DOMPluginEve

In [5]:
components_output_path = os.path.join(os.getcwd(), "components.json")
with open(components_output_path, 'w', encoding='utf-8') as json_file:
    json.dump(components, json_file, indent=4)

print(f"Components data saved to: {components_output_path}")

Components data saved to: /Users/shellmychakkaith/Desktop/UZH/3. Semester/Software System/SE_I/react/components.json


### Detect Dependencies

In [43]:
# Executing the Madge command
logging.info("Running Madge to generate dependencies.json")
system("madge --json . > ./dependencies.json")

logging.info(f"Directory: {os.getcwd()}\\")

if exists("./dependencies.json"):
    logging.info("Dependencies data has been saved to 'dependencies.json'")
else:
    logging.error("Error generating the dependencies data")
    raise FileNotFoundError("dependencies.json file not found")

# Load dependencies JSON file 
with open("./dependencies.json", 'r', encoding='utf-8') as f:
    dependencies = json.load(f)

# Calculate the number of dependencies for each file
dependency_counts = {file: len(dependencies[file]) for file in dependencies}

# Sort files by the number of dependencies
top_3_files = sorted(dependency_counts.items(), key=lambda x: x[1], reverse=True)[:3]

# Prepare the top 3 files data
top_dependencies_data = {}
for file, _ in top_3_files:
    top_dependencies_data[file] = dependencies[file]

# Save only the top 3 files to a new JSON file
top_dependencies_file = "./top_dependencies.json"
with open(top_dependencies_file, 'w', encoding='utf-8') as f:
    json.dump(top_dependencies_data, f, indent=4)

# Check if the 'top_dependencies.json' file has been generated
if exists(top_dependencies_file):
    logging.info(f"Top 3 files with the highest number of dependencies have been saved to '{top_dependencies_file}'")
else:
    logging.error(f"Error generating the top dependencies data in '{top_dependencies_file}'")
    raise FileNotFoundError(f"{top_dependencies_file} file not found")

2024-11-21 03:10:17,526 | INFO | Running Madge to generate dependencies.json
2024-11-21 03:10:20,954 | INFO | Directory: /Users/shellmychakkaith/Desktop/UZH/3. Semester/Software System/SE_I/react\
2024-11-21 03:10:20,954 | INFO | Dependencies data has been saved to 'dependencies.json'
2024-11-21 03:10:20,956 | INFO | Top 3 files with the highest number of dependencies have been saved to './top_dependencies.json'


## Changes between Versions

In [28]:
logging.info("Getting the list of commits between v17.0.1 and v17.0.2")
commit_hashes = subprocess.check_output(['git', 'log', 'v17.0.1..v17.0.2', '--pretty=format:%H'], text=True).splitlines()

if not commit_hashes:
    logging.error("No commits found between v17.0.1 and v17.0.2")
    raise ValueError("No commits found between v17.0.1 and v17.0.2")

# Data structure to hold commit information
commit_info_list = []

# Regex patterns for changes, insertions, and deletions
files_changed_pattern = re.compile(r'(\d+) file[s]? changed')
insertions_pattern = re.compile(r'(\d+) insertion[s]?\(\+\)')
deletions_pattern = re.compile(r'(\d+) deletion[s]?\(\-\)')

# Iterate through each commit hash to extract inf
for commit_hash in commit_hashes:
    logging.info(f"Processing commit {commit_hash}")
    commit_details = subprocess.check_output(['git', 'show', '--stat', '--pretty=format:', commit_hash], text=True)

    # Extract information 
    files_changed_match = files_changed_pattern.search(commit_details)
    insertions_match = insertions_pattern.search(commit_details)
    deletions_match = deletions_pattern.search(commit_details)

    # Extract values or default to 0 
    files_changed = int(files_changed_match.group(1)) if files_changed_match else 0
    insertions = int(insertions_match.group(1)) if insertions_match else 0
    deletions = int(deletions_match.group(1)) if deletions_match else 0


    commit_info_list.append({
        "commit_hash": commit_hash,
        "files_changed": files_changed,
        "insertions": insertions,
        "deletions": deletions
    })

# Most substantial change 
max_commit = max(commit_info_list, key=lambda x: x['files_changed'])
    
commit_hash_task3 = max_commit['commit_hash'] 

# Documentation 
logging.info("Commit with the most substantial change:")
logging.info(f"Commit Hash: {max_commit['commit_hash']}")
logging.info(f"Files Changed: {max_commit['files_changed']}")
logging.info(f"Insertions: {max_commit['insertions']}")
logging.info(f"Deletions: {max_commit['deletions']}")

# Save the commit information 
commit_info_path = "./commit_info.json"
with open(commit_info_path, 'w', encoding='utf-8') as f:
    json.dump(max_commit, f, indent=4)

if exists(commit_info_path):
    logging.info(f"Commit information saved to '{commit_info_path}'")
else:
    logging.error(f"Failed to save commit information to '{commit_info_path}'")


Commit with the most substantial change:
Commit Hash: 12adaffef7105e2714f82651ea51936c563fe15c
Files Changed: 4
Insertions: 15
Deletions: 123
Commit information saved to 'commit_info.json'


## Dependency changes

In [36]:
try:
    subprocess.run(["git", "checkout", commit_hash], check=True, text=True)
    print(f"Checked out to commit {commit_hash} successfully.")
except subprocess.CalledProcessError as e:
    print(f"Error checking out to commit {commit_hash}: {e}")

Checked out to commit 8cc6ff24880ac00fdb9d11bce480a0433456e82d successfully.


Note: switching to '8cc6ff24880ac00fdb9d11bce480a0433456e82d'.

You are in 'detached HEAD' state. You can look around, make experimental
changes and commit them, and you can discard any commits you make in this
state without impacting any branches by switching back to a branch.

If you want to create a new branch to retain commits you create, you may
do so (now or later) by using -c with the switch command. Example:

  git switch -c <new-branch-name>

Or undo this operation with:

  git switch -

Turn off this advice by setting config variable advice.detachedHead to false

HEAD is now at 8cc6ff2488 fix: use SharedArrayBuffer only when cross-origin isolation is enabled (#20831)


In [40]:
result_commit = subprocess.run(f"madge --json ./ > 'dependencies_commit.json'", shell=True)


# Step 3: Load dependencies from dependencies.json
with open("dependencies_commit.json", 'r', encoding='utf-8') as f:
    try:
        dependencies = json.load(f)
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e}")
        dependencies = {}

In [None]:
# Execute Madge command for v17.0.1
subprocess.run(["git", "checkout", "v17.0.1"], shell=True)
result_v17_0_1 = subprocess.run(f"madge --json ./ > 'dependencies_v17_0_1.json'", shell=True)

with open("dependencies_v17_0_1.json", 'r', encoding='utf-8') as f:
    try:
        dependencies = json.load(f)
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e}")
        dependencies = {}


# Execute Madge command for v17.0.2
subprocess.run(["git", "checkout", "v17.0.2"], shell=True)
result_v17_0_2 = subprocess.run(f"madge --json ./ > 'dependencies_v17_0_2.json'", shell=True)

with open("dependencies_v17_0_2.json", 'r', encoding='utf-8') as f:
    try:
        dependencies = json.load(f)
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e}")
        dependencies = {}


In [34]:
# Compare dependency changes in v17_0_1 and v17_0_2

if os.path.exists("dependencies_v17_0_1.json") and os.path.exists("dependencies_v17_0_2.json"):
    with open("dependencies_v17_0_1.json", 'r', encoding='utf-8') as f:
        try:
            dependencies_v17_0_1 = json.load(f)
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON for v17.0.1: {e}")
            dependencies_v17_0_1 = {}

    with open("dependencies_v17_0_2.json", 'r', encoding='utf-8') as f:
        try:
            dependencies_v17_0_2 = json.load(f)
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON for v17.0.2: {e}")
            dependencies_v17_0_2 = {}
else:
    dependencies_v17_0_1 = {}
    dependencies_v17_0_2 = {}

changes = {
    "new_dependencies": {},
    "removed_dependencies": {}
}

# Find new dependencies introduced in v17.0.2
for file, deps in dependencies_v17_0_2.items():
    if file not in dependencies_v17_0_1:
        changes["new_dependencies"][file] = deps
    else:
        new_deps = set(deps) - set(dependencies_v17_0_1[file])
        if new_deps:
            changes["new_dependencies"][file] = list(new_deps)

# Find dependencies that were removed in v17.0.2
for file, deps in dependencies_v17_0_1.items():
    if file not in dependencies_v17_0_2:
        changes["removed_dependencies"][file] = deps
    else:
        removed_deps = set(deps) - set(dependencies_v17_0_2[file])
        if removed_deps:
            changes["removed_dependencies"][file] = list(removed_deps)

# Save dependency changes to a new JSON file
if changes["new_dependencies"] or changes["removed_dependencies"]:
    with open("dependency_changes", 'w', encoding='utf-8') as f:
        json.dump(changes, f, indent=4)
    print(f"Dependency changes between v17.0.1 and v17.0.2 have been documented in dependency_changes")
else:
    print("No changes in dependencies detected between v17.0.1 and v17.0.2.")

No changes in dependencies detected between v17.0.1 and v17.0.2.
