In [None]:
from os import system
system("pip install pydriller")
system("pip install ipywidgets")
system("pip install pandas")
system("pip install tqdm")
system("pip install deepdiff")
system("npm install -g madge")

In [4]:
import os
import json
import re
from os.path import exists
from os import system
import pandas as pd
import subprocess
import pydriller
from collections import defaultdict
from tqdm.notebook import tqdm
import logging
from deepdiff import DeepDiff
logging.basicConfig(level=logging.INFO, format='%(asctime)s | %(levelname)s | %(message)s')

## Setup

In [5]:
react_repo = "https://github.com/facebook/react"
clone_dir = os.path.join(os.getcwd(), "react")

if not exists(clone_dir):
    with tqdm(total=100, desc="Cloning React repo", unit="chunk") as progress_bar:
        process = subprocess.Popen(
            ['git', 'clone', '--progress', react_repo, clone_dir],
            stderr=subprocess.PIPE,
            stdout=subprocess.DEVNULL,
            text=True
        )
        for line in process.stderr:
            if "Receiving objects" in line:
                percentage = int(line.split("%")[0].split()[-1])
                progress_bar.n = percentage
                progress_bar.refresh()
            elif "Resolving deltas" in line:
                progress_bar.set_description("Resolving deltas")
                progress_bar.refresh()
        process.wait()

    if process.returncode == 0:
        logging.info("cloning completed successfully")
    else:
        logging.error("error during cloning")
else:
    logging.warning("repo already cloned")



# Task 1

### Listing All Components

In [6]:
os.chdir(clone_dir)

# Patterns to identify component types
class_component_pattern = r"class\s+\w+\s+extends\s+(React.Component|React.PureComponent)"
functional_component_pattern = r"function\s+\w+\s*\(.*\)\s*{[^}]*return\s*<[^>]+>"


components = {
    "class_components": [],
    "functional_components": []
}


for root, _, files in os.walk(clone_dir):
    for file in files:
        if file.endswith(".js"):
            file_path = os.path.join(root, file)
            with open(file_path, 'r', encoding='utf-8') as f:
                content = f.read()
                if re.search(class_component_pattern, content):
                    components["class_components"].append(file)
                elif re.search(functional_component_pattern, content):
                    components["functional_components"].append(file)


print("Class Components:", components["class_components"])
print("Functional Components:", components["functional_components"])


Functional Components: ['useSyncExternalStoreShimServer-test.js', 'useSyncExternalStoreNative-test.js', 'ReactFlightTurbopackDOMNode-test.js', 'ReactFlightTurbopackDOMBrowser-test.js', 'ReactFlightTurbopackDOM-test.js', 'ReactFlightTurbopackDOMEdge-test.js', 'ReactHooksInspectionIntegration-test.js', 'ReactDevToolsHooksIntegration-test.js', 'ReactHooksInspection-test.js', 'ReactFreshRuntime.js', 'ReactFreshBabelPlugin-test.js', 'ReactDOMServerSelectiveHydration-test.internal.js', 'DOMPropertyOperations-test.js', 'ReactDOMRoot-test.js', 'ReactStartTransitionMultipleRenderers-test.js', 'ReactDOMFizzStatic-test.js', 'ReactWrongReturnPointer-test.js', 'ReactDOMServerSuspense-test.internal.js', 'ReactDOMFizzDeferredValue-test.js', 'ReactDOMServerIntegrationSpecialTypes-test.js', 'ReactDOMNativeEventHeuristic-test.js', 'ReactDOMComponentTree-test.js', 'ReactDOMTestSelectors-test.js', 'ReactDOMHydrationDiff-test.js', 'ReactDOMUseId-test.js', 'ReactDOMImageLoad-test.internal.js', 'DOMPluginEve

In [7]:
components_output_path = os.path.join(os.getcwd(), "components.json")
with open(components_output_path, 'w', encoding='utf-8') as json_file:
    json.dump(components, json_file, indent=4)

print(f"Components data saved to: {components_output_path}")

Components data saved to: /Users/shellmychakkaith/Desktop/UZH/3. Semester/Software System/SE_I/react/components.json


### Detect dependencies between React files

In [8]:
# Executing the Madge command
logging.info("Running Madge to generate dependencies.json")
system("madge --json ./ > dependencies.json")

logging.info(f"Directory: {os.getcwd()}\\")

with open("dependencies.json", 'r', encoding='utf-8') as f:
    dependencies = json.load(f)

# number of dependencies for each file
dependency_counts = {file: len(dependencies[file]) for file in dependencies}

# Sort files by number of dependencies
top_3_files = sorted(dependency_counts.items(), key=lambda x: x[1], reverse=True)[:3]

top_dependencies_data = {}
for file, _ in top_3_files:
    top_dependencies_data[file] = dependencies[file]

# Save to a JSON file
top_dependencies_file = "top_dependencies.json"
with open(top_dependencies_file, 'w', encoding='utf-8') as f:
    json.dump(top_dependencies_data, f, indent=4)

# Check if the 'top_dependencies.json' file has been generated
if exists(top_dependencies_file):
    logging.info(f"Top 3 files with the highest number of dependencies have been saved to '{top_dependencies_file}'")
else:
    logging.error(f"Error generating the top dependencies data in '{top_dependencies_file}'")
    raise FileNotFoundError(f"{top_dependencies_file} file not found")

2024-11-22 01:46:12,445 | INFO | Running Madge to generate dependencies.json
2024-11-22 01:46:18,629 | INFO | Directory: /Users/shellmychakkaith/Desktop/UZH/3. Semester/Software System/SE_I/react\
2024-11-22 01:46:18,629 | INFO | Dependencies data has been saved to 'dependencies.json'
2024-11-22 01:46:18,633 | INFO | Top 3 files with the highest number of dependencies have been saved to 'top_dependencies.json'


### Commit info of change between v17.0.1 and v17.0.2

In [17]:
logging.info("Getting the list of commits between v17.0.1 and v17.0.2")
commit_hashes = subprocess.check_output(['git', 'log', 'v17.0.1..v17.0.2', '--pretty=format:%H'], text=True).splitlines()

if not commit_hashes:
    logging.error("No commits found between v17.0.1 and v17.0.2")
    raise ValueError("No commits found between v17.0.1 and v17.0.2")


commit_info_list = []

# Regex patterns for changes, insertions, and deletions
files_changed_pattern = re.compile(r'(\d+) file[s]? changed')
insertions_pattern = re.compile(r'(\d+) insertion[s]?\(\+\)')
deletions_pattern = re.compile(r'(\d+) deletion[s]?\(\-\)')

# Iterate through each commit hash to extract inf
for commit_hash in commit_hashes:
    logging.info(f"Processing commit {commit_hash}")
    commit_details = subprocess.check_output(['git', 'show', '--stat', '--pretty=format:', commit_hash], text=True)

    # Extract information 
    files_changed_match = files_changed_pattern.search(commit_details)
    insertions_match = insertions_pattern.search(commit_details)
    deletions_match = deletions_pattern.search(commit_details)

    # Extract values or default to 0 
    files_changed = int(files_changed_match.group(1)) if files_changed_match else 0
    insertions = int(insertions_match.group(1)) if insertions_match else 0
    deletions = int(deletions_match.group(1)) if deletions_match else 0


    commit_info_list.append({
        "commit_hash": commit_hash,
        "files_changed": files_changed,
        "insertions": insertions,
        "deletions": deletions
    })

# Most substantial change 
max_commit = max(commit_info_list, key=lambda x: x['files_changed'])
    
commit_hash = max_commit['commit_hash'] 


# Save the commit information 
commit_info_path = "./commit_info.json"
with open(commit_info_path, 'w', encoding='utf-8') as f:
    json.dump(max_commit, f, indent=4)

if exists(commit_info_path):
    logging.info(f"Commit information saved to '{commit_info_path}'")
else:
    logging.error(f"Failed to save commit information to '{commit_info_path}'")


2024-11-22 01:59:07,459 | INFO | Getting the list of commits between v17.0.1 and v17.0.2
2024-11-22 01:59:07,497 | INFO | Processing commit 12adaffef7105e2714f82651ea51936c563fe15c
2024-11-22 01:59:07,524 | INFO | Processing commit b2bbee7ba31bb7d212a9ff2e682a695a32f8a87f
2024-11-22 01:59:07,547 | INFO | Processing commit 8cc6ff24880ac00fdb9d11bce480a0433456e82d
2024-11-22 01:59:07,561 | INFO | Commit information saved to './commit_info.json'


### Dependencies changed at commit

In [10]:
try:
    subprocess.run(["git", "checkout", commit_hash], check=True, text=True)
    print(f"Checked out to commit {commit_hash} successfully.")
except subprocess.CalledProcessError as e:
    print(f"Error checking out to commit {commit_hash}: {e}")

Checked out to commit 12adaffef7105e2714f82651ea51936c563fe15c successfully.


Note: switching to '12adaffef7105e2714f82651ea51936c563fe15c'.

You are in 'detached HEAD' state. You can look around, make experimental
changes and commit them, and you can discard any commits you make in this
state without impacting any branches by switching back to a branch.

If you want to create a new branch to retain commits you create, you may
do so (now or later) by using -c with the switch command. Example:

  git switch -c <new-branch-name>

Or undo this operation with:

  git switch -

Turn off this advice by setting config variable advice.detachedHead to false

HEAD is now at 12adaffef7 Remove scheduler sampling profiler shared array buffer (#20840)


In [11]:
result_commit = subprocess.run(f"madge --json ./ > dependencies_at_commit.json", shell=True, check=True)

with open("dependencies_at_commit.json", 'r', encoding='utf-8') as f:
    try:
        dependencies = json.load(f)
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e}")
        dependencies = {}

In [19]:
# Check out back to latest version
try:
    result_checkout_latest = subprocess.run(["git", "checkout", "main"], check=True, text=True)  
    print("Checked out back to the latest version")
except subprocess.CalledProcessError as e:
    exit(1)

# Generate dependency analysis for the latest version 
try:
    result_latest_after = subprocess.run(f"madge --json ./ > latest_dependencies.json", shell=True, check=True)
    print(f"Madge dependency analysis completed for the latest version.")
except subprocess.CalledProcessError as e:
    exit(1)

Already on 'main'


Your branch is up to date with 'origin/main'.
Checked out back to the latest version
Madge dependency analysis completed for the latest version.


In [20]:
# Compare the dependencies between the latest version and the commit
try:
    with open("latest_dependencies.json", 'r') as f_latest, open("dependencies_at_commit.json", 'r') as f_commit:
        latest_dependencies = json.load(f_latest)
        commit_dependencies = json.load(f_commit)

    # Use DeepDiff to compare the two dependency files
    diff = DeepDiff(latest_dependencies, commit_dependencies, ignore_order=True)

    if diff:
        print("Dependencies have changed between the latest version and at commit:")
        print(json.dumps(diff, indent=4))
    else:
        print("No changes in dependencies between the latest version and at commit.")
except Exception as e:
    print(f"Error comparing dependencies: {e}")

# Extract changes related to added or removed dependencies
new_dependencies = diff.get('dictionary_item_added', [])
removed_dependencies = diff.get('dictionary_item_removed', [])

changes_dependencies = {
        "added_dependencies": new_dependencies,
        "removed_dependencies": removed_dependencies
    }

 # Save the differences to a JSON file
with open("dependency_changes", 'w') as f_diff:
    json.dump(changes_dependencies, f_diff, indent=4)



Dependencies have changed between the latest version and at commit:
{
    "values_changed": {
        "root": {
            "new_value": {
                ".eslintrc.js": [
                    "scripts/shared/pathsByLanguageVersion.js"
                ],
                ".prettierrc.js": [
                    "scripts/shared/pathsByLanguageVersion.js"
                ],
                "babel.config.js": [],
                "dangerfile.js": [
                    "scripts/rollup/stats.js"
                ],
                "fixtures/art/VectorWidget.js": [],
                "fixtures/art/app.js": [
                    "fixtures/art/VectorWidget.js"
                ],
                "fixtures/art/webpack.config.js": [],
                "fixtures/attribute-behavior/src/App.js": [
                    "fixtures/attribute-behavior/src/attributes.js"
                ],
                "fixtures/attribute-behavior/src/App.test.js": [
                    "fixtures/attribute-behavior/src/App.js